diff --git "a/checkpoint-25250/trainer_state.json" "b/checkpoint-25250/trainer_state.json"
new file mode 100644--- /dev/null
+++ "b/checkpoint-25250/trainer_state.json"
@@ -0,0 +1,176783 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.2525,
+  "eval_steps": 500,
+  "global_step": 25250,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1e-05,
+      "grad_norm": 1.4574347149106512,
+      "learning_rate": 3e-06,
+      "loss": 10.8576,
+      "step": 1
+    },
+    {
+      "epoch": 2e-05,
+      "grad_norm": 1.4482443114713344,
+      "learning_rate": 6e-06,
+      "loss": 10.8575,
+      "step": 2
+    },
+    {
+      "epoch": 3e-05,
+      "grad_norm": 1.4642003089092823,
+      "learning_rate": 9e-06,
+      "loss": 10.8576,
+      "step": 3
+    },
+    {
+      "epoch": 4e-05,
+      "grad_norm": 1.4485584072048276,
+      "learning_rate": 1.2e-05,
+      "loss": 10.8569,
+      "step": 4
+    },
+    {
+      "epoch": 5e-05,
+      "grad_norm": 1.4574321657516995,
+      "learning_rate": 1.5e-05,
+      "loss": 10.8527,
+      "step": 5
+    },
+    {
+      "epoch": 6e-05,
+      "grad_norm": 1.4585537094370684,
+      "learning_rate": 1.8e-05,
+      "loss": 10.8518,
+      "step": 6
+    },
+    {
+      "epoch": 7e-05,
+      "grad_norm": 1.4183780170798466,
+      "learning_rate": 2.1000000000000002e-05,
+      "loss": 10.8383,
+      "step": 7
+    },
+    {
+      "epoch": 8e-05,
+      "grad_norm": 1.286272643239374,
+      "learning_rate": 2.4e-05,
+      "loss": 10.8119,
+      "step": 8
+    },
+    {
+      "epoch": 9e-05,
+      "grad_norm": 1.246364249616181,
+      "learning_rate": 2.7e-05,
+      "loss": 10.8063,
+      "step": 9
+    },
+    {
+      "epoch": 0.0001,
+      "grad_norm": 1.2300728857390288,
+      "learning_rate": 3e-05,
+      "loss": 10.7913,
+      "step": 10
+    },
+    {
+      "epoch": 0.00011,
+      "grad_norm": 1.1639358472437353,
+      "learning_rate": 3.2999999999999996e-05,
+      "loss": 10.7756,
+      "step": 11
+    },
+    {
+      "epoch": 0.00012,
+      "grad_norm": 1.1455069336623074,
+      "learning_rate": 3.6e-05,
+      "loss": 10.7622,
+      "step": 12
+    },
+    {
+      "epoch": 0.00013,
+      "grad_norm": 1.104711768149659,
+      "learning_rate": 3.9e-05,
+      "loss": 10.7415,
+      "step": 13
+    },
+    {
+      "epoch": 0.00014,
+      "grad_norm": 1.0916409179063882,
+      "learning_rate": 4.2000000000000004e-05,
+      "loss": 10.7296,
+      "step": 14
+    },
+    {
+      "epoch": 0.00015,
+      "grad_norm": 1.0723152562842644,
+      "learning_rate": 4.4999999999999996e-05,
+      "loss": 10.7183,
+      "step": 15
+    },
+    {
+      "epoch": 0.00016,
+      "grad_norm": 1.0297043497697835,
+      "learning_rate": 4.8e-05,
+      "loss": 10.7,
+      "step": 16
+    },
+    {
+      "epoch": 0.00017,
+      "grad_norm": 0.9882210863590011,
+      "learning_rate": 5.1000000000000006e-05,
+      "loss": 10.6838,
+      "step": 17
+    },
+    {
+      "epoch": 0.00018,
+      "grad_norm": 0.966791538351231,
+      "learning_rate": 5.4e-05,
+      "loss": 10.6659,
+      "step": 18
+    },
+    {
+      "epoch": 0.00019,
+      "grad_norm": 0.9358769725124266,
+      "learning_rate": 5.7e-05,
+      "loss": 10.6509,
+      "step": 19
+    },
+    {
+      "epoch": 0.0002,
+      "grad_norm": 0.9313245306892226,
+      "learning_rate": 6e-05,
+      "loss": 10.638,
+      "step": 20
+    },
+    {
+      "epoch": 0.00021,
+      "grad_norm": 0.9056933112957348,
+      "learning_rate": 6.3e-05,
+      "loss": 10.6235,
+      "step": 21
+    },
+    {
+      "epoch": 0.00022,
+      "grad_norm": 0.9013277325687112,
+      "learning_rate": 6.599999999999999e-05,
+      "loss": 10.6069,
+      "step": 22
+    },
+    {
+      "epoch": 0.00023,
+      "grad_norm": 0.8982860560109955,
+      "learning_rate": 6.9e-05,
+      "loss": 10.5935,
+      "step": 23
+    },
+    {
+      "epoch": 0.00024,
+      "grad_norm": 0.8941575476865218,
+      "learning_rate": 7.2e-05,
+      "loss": 10.5802,
+      "step": 24
+    },
+    {
+      "epoch": 0.00025,
+      "grad_norm": 0.8955811690787305,
+      "learning_rate": 7.500000000000001e-05,
+      "loss": 10.5652,
+      "step": 25
+    },
+    {
+      "epoch": 0.00026,
+      "grad_norm": 0.9005417636666915,
+      "learning_rate": 7.8e-05,
+      "loss": 10.5507,
+      "step": 26
+    },
+    {
+      "epoch": 0.00027,
+      "grad_norm": 0.8929394290279239,
+      "learning_rate": 8.1e-05,
+      "loss": 10.5379,
+      "step": 27
+    },
+    {
+      "epoch": 0.00028,
+      "grad_norm": 0.892893092343029,
+      "learning_rate": 8.400000000000001e-05,
+      "loss": 10.5234,
+      "step": 28
+    },
+    {
+      "epoch": 0.00029,
+      "grad_norm": 0.8951984198637418,
+      "learning_rate": 8.7e-05,
+      "loss": 10.5078,
+      "step": 29
+    },
+    {
+      "epoch": 0.0003,
+      "grad_norm": 0.901037080772758,
+      "learning_rate": 8.999999999999999e-05,
+      "loss": 10.4913,
+      "step": 30
+    },
+    {
+      "epoch": 0.00031,
+      "grad_norm": 0.899757879512845,
+      "learning_rate": 9.3e-05,
+      "loss": 10.4759,
+      "step": 31
+    },
+    {
+      "epoch": 0.00032,
+      "grad_norm": 0.8963415000423225,
+      "learning_rate": 9.6e-05,
+      "loss": 10.4612,
+      "step": 32
+    },
+    {
+      "epoch": 0.00033,
+      "grad_norm": 0.8971468155029705,
+      "learning_rate": 9.900000000000001e-05,
+      "loss": 10.4429,
+      "step": 33
+    },
+    {
+      "epoch": 0.00034,
+      "grad_norm": 0.8964368558533553,
+      "learning_rate": 0.00010200000000000001,
+      "loss": 10.426,
+      "step": 34
+    },
+    {
+      "epoch": 0.00035,
+      "grad_norm": 0.9007340868108898,
+      "learning_rate": 0.00010500000000000002,
+      "loss": 10.4083,
+      "step": 35
+    },
+    {
+      "epoch": 0.00036,
+      "grad_norm": 0.8977475222905443,
+      "learning_rate": 0.000108,
+      "loss": 10.3895,
+      "step": 36
+    },
+    {
+      "epoch": 0.00037,
+      "grad_norm": 0.8929896912849768,
+      "learning_rate": 0.000111,
+      "loss": 10.3721,
+      "step": 37
+    },
+    {
+      "epoch": 0.00038,
+      "grad_norm": 0.8925204062685723,
+      "learning_rate": 0.000114,
+      "loss": 10.3515,
+      "step": 38
+    },
+    {
+      "epoch": 0.00039,
+      "grad_norm": 0.8947925451707294,
+      "learning_rate": 0.000117,
+      "loss": 10.3314,
+      "step": 39
+    },
+    {
+      "epoch": 0.0004,
+      "grad_norm": 0.8990961452390619,
+      "learning_rate": 0.00012,
+      "loss": 10.3088,
+      "step": 40
+    },
+    {
+      "epoch": 0.00041,
+      "grad_norm": 0.8951984584897338,
+      "learning_rate": 0.000123,
+      "loss": 10.2891,
+      "step": 41
+    },
+    {
+      "epoch": 0.00042,
+      "grad_norm": 0.8942493459254965,
+      "learning_rate": 0.000126,
+      "loss": 10.2679,
+      "step": 42
+    },
+    {
+      "epoch": 0.00043,
+      "grad_norm": 0.8983684110980745,
+      "learning_rate": 0.000129,
+      "loss": 10.243,
+      "step": 43
+    },
+    {
+      "epoch": 0.00044,
+      "grad_norm": 0.8985869855625094,
+      "learning_rate": 0.00013199999999999998,
+      "loss": 10.2206,
+      "step": 44
+    },
+    {
+      "epoch": 0.00045,
+      "grad_norm": 0.899962207774676,
+      "learning_rate": 0.000135,
+      "loss": 10.1962,
+      "step": 45
+    },
+    {
+      "epoch": 0.00046,
+      "grad_norm": 0.8914153211826606,
+      "learning_rate": 0.000138,
+      "loss": 10.1735,
+      "step": 46
+    },
+    {
+      "epoch": 0.00047,
+      "grad_norm": 0.8980761529388176,
+      "learning_rate": 0.000141,
+      "loss": 10.146,
+      "step": 47
+    },
+    {
+      "epoch": 0.00048,
+      "grad_norm": 0.9007540011988344,
+      "learning_rate": 0.000144,
+      "loss": 10.1222,
+      "step": 48
+    },
+    {
+      "epoch": 0.00049,
+      "grad_norm": 0.896344611975273,
+      "learning_rate": 0.000147,
+      "loss": 10.0974,
+      "step": 49
+    },
+    {
+      "epoch": 0.0005,
+      "grad_norm": 0.9015948414141589,
+      "learning_rate": 0.00015000000000000001,
+      "loss": 10.0706,
+      "step": 50
+    },
+    {
+      "epoch": 0.00051,
+      "grad_norm": 0.9016102183936469,
+      "learning_rate": 0.000153,
+      "loss": 10.0433,
+      "step": 51
+    },
+    {
+      "epoch": 0.00052,
+      "grad_norm": 0.8993585803645682,
+      "learning_rate": 0.000156,
+      "loss": 10.0158,
+      "step": 52
+    },
+    {
+      "epoch": 0.00053,
+      "grad_norm": 0.8901736410301992,
+      "learning_rate": 0.000159,
+      "loss": 9.9917,
+      "step": 53
+    },
+    {
+      "epoch": 0.00054,
+      "grad_norm": 0.9083328832659304,
+      "learning_rate": 0.000162,
+      "loss": 9.9578,
+      "step": 54
+    },
+    {
+      "epoch": 0.00055,
+      "grad_norm": 0.8975570632788401,
+      "learning_rate": 0.000165,
+      "loss": 9.9337,
+      "step": 55
+    },
+    {
+      "epoch": 0.00056,
+      "grad_norm": 0.8924582131156604,
+      "learning_rate": 0.00016800000000000002,
+      "loss": 9.904,
+      "step": 56
+    },
+    {
+      "epoch": 0.00057,
+      "grad_norm": 0.8937117244788118,
+      "learning_rate": 0.000171,
+      "loss": 9.8765,
+      "step": 57
+    },
+    {
+      "epoch": 0.00058,
+      "grad_norm": 0.8912667557674427,
+      "learning_rate": 0.000174,
+      "loss": 9.8514,
+      "step": 58
+    },
+    {
+      "epoch": 0.00059,
+      "grad_norm": 0.899445317105929,
+      "learning_rate": 0.000177,
+      "loss": 9.819,
+      "step": 59
+    },
+    {
+      "epoch": 0.0006,
+      "grad_norm": 0.8827247644946434,
+      "learning_rate": 0.00017999999999999998,
+      "loss": 9.7927,
+      "step": 60
+    },
+    {
+      "epoch": 0.00061,
+      "grad_norm": 0.8856006114384688,
+      "learning_rate": 0.000183,
+      "loss": 9.7645,
+      "step": 61
+    },
+    {
+      "epoch": 0.00062,
+      "grad_norm": 0.8887861476206323,
+      "learning_rate": 0.000186,
+      "loss": 9.7329,
+      "step": 62
+    },
+    {
+      "epoch": 0.00063,
+      "grad_norm": 0.8870948257177573,
+      "learning_rate": 0.000189,
+      "loss": 9.7044,
+      "step": 63
+    },
+    {
+      "epoch": 0.00064,
+      "grad_norm": 0.8854465518366905,
+      "learning_rate": 0.000192,
+      "loss": 9.6741,
+      "step": 64
+    },
+    {
+      "epoch": 0.00065,
+      "grad_norm": 0.8913964558604793,
+      "learning_rate": 0.00019500000000000002,
+      "loss": 9.6417,
+      "step": 65
+    },
+    {
+      "epoch": 0.00066,
+      "grad_norm": 0.8860459843940278,
+      "learning_rate": 0.00019800000000000002,
+      "loss": 9.6181,
+      "step": 66
+    },
+    {
+      "epoch": 0.00067,
+      "grad_norm": 0.8855602986146246,
+      "learning_rate": 0.000201,
+      "loss": 9.5886,
+      "step": 67
+    },
+    {
+      "epoch": 0.00068,
+      "grad_norm": 0.89182613307124,
+      "learning_rate": 0.00020400000000000003,
+      "loss": 9.5554,
+      "step": 68
+    },
+    {
+      "epoch": 0.00069,
+      "grad_norm": 0.8788147221752699,
+      "learning_rate": 0.00020700000000000002,
+      "loss": 9.5276,
+      "step": 69
+    },
+    {
+      "epoch": 0.0007,
+      "grad_norm": 0.89013480384513,
+      "learning_rate": 0.00021000000000000004,
+      "loss": 9.4911,
+      "step": 70
+    },
+    {
+      "epoch": 0.00071,
+      "grad_norm": 0.8832608390392153,
+      "learning_rate": 0.00021299999999999997,
+      "loss": 9.4637,
+      "step": 71
+    },
+    {
+      "epoch": 0.00072,
+      "grad_norm": 0.8850551460752941,
+      "learning_rate": 0.000216,
+      "loss": 9.4371,
+      "step": 72
+    },
+    {
+      "epoch": 0.00073,
+      "grad_norm": 0.8854430272945265,
+      "learning_rate": 0.00021899999999999998,
+      "loss": 9.4029,
+      "step": 73
+    },
+    {
+      "epoch": 0.00074,
+      "grad_norm": 0.881388508551569,
+      "learning_rate": 0.000222,
+      "loss": 9.3766,
+      "step": 74
+    },
+    {
+      "epoch": 0.00075,
+      "grad_norm": 0.8804734413831777,
+      "learning_rate": 0.000225,
+      "loss": 9.3413,
+      "step": 75
+    },
+    {
+      "epoch": 0.00076,
+      "grad_norm": 0.891470306594604,
+      "learning_rate": 0.000228,
+      "loss": 9.3136,
+      "step": 76
+    },
+    {
+      "epoch": 0.00077,
+      "grad_norm": 0.8835806398314079,
+      "learning_rate": 0.000231,
+      "loss": 9.2843,
+      "step": 77
+    },
+    {
+      "epoch": 0.00078,
+      "grad_norm": 0.8778470434340718,
+      "learning_rate": 0.000234,
+      "loss": 9.2534,
+      "step": 78
+    },
+    {
+      "epoch": 0.00079,
+      "grad_norm": 0.8849142747864956,
+      "learning_rate": 0.00023700000000000001,
+      "loss": 9.2174,
+      "step": 79
+    },
+    {
+      "epoch": 0.0008,
+      "grad_norm": 0.8831859916332898,
+      "learning_rate": 0.00024,
+      "loss": 9.1835,
+      "step": 80
+    },
+    {
+      "epoch": 0.00081,
+      "grad_norm": 0.8883671155155716,
+      "learning_rate": 0.00024300000000000002,
+      "loss": 9.1516,
+      "step": 81
+    },
+    {
+      "epoch": 0.00082,
+      "grad_norm": 0.8919005998406658,
+      "learning_rate": 0.000246,
+      "loss": 9.1278,
+      "step": 82
+    },
+    {
+      "epoch": 0.00083,
+      "grad_norm": 0.8904109612127737,
+      "learning_rate": 0.00024900000000000004,
+      "loss": 9.0926,
+      "step": 83
+    },
+    {
+      "epoch": 0.00084,
+      "grad_norm": 0.8922919840670517,
+      "learning_rate": 0.000252,
+      "loss": 9.0627,
+      "step": 84
+    },
+    {
+      "epoch": 0.00085,
+      "grad_norm": 0.8851770394662277,
+      "learning_rate": 0.000255,
+      "loss": 9.0389,
+      "step": 85
+    },
+    {
+      "epoch": 0.00086,
+      "grad_norm": 0.892287423580011,
+      "learning_rate": 0.000258,
+      "loss": 9.0029,
+      "step": 86
+    },
+    {
+      "epoch": 0.00087,
+      "grad_norm": 0.8819038715566697,
+      "learning_rate": 0.000261,
+      "loss": 8.9819,
+      "step": 87
+    },
+    {
+      "epoch": 0.00088,
+      "grad_norm": 0.8825670273484024,
+      "learning_rate": 0.00026399999999999997,
+      "loss": 8.9452,
+      "step": 88
+    },
+    {
+      "epoch": 0.00089,
+      "grad_norm": 0.87734677010751,
+      "learning_rate": 0.000267,
+      "loss": 8.92,
+      "step": 89
+    },
+    {
+      "epoch": 0.0009,
+      "grad_norm": 0.8800982265590005,
+      "learning_rate": 0.00027,
+      "loss": 8.8935,
+      "step": 90
+    },
+    {
+      "epoch": 0.00091,
+      "grad_norm": 0.8751738333241339,
+      "learning_rate": 0.000273,
+      "loss": 8.8635,
+      "step": 91
+    },
+    {
+      "epoch": 0.00092,
+      "grad_norm": 0.8805112062017327,
+      "learning_rate": 0.000276,
+      "loss": 8.8313,
+      "step": 92
+    },
+    {
+      "epoch": 0.00093,
+      "grad_norm": 0.8782110873043902,
+      "learning_rate": 0.000279,
+      "loss": 8.8018,
+      "step": 93
+    },
+    {
+      "epoch": 0.00094,
+      "grad_norm": 0.8793675998346636,
+      "learning_rate": 0.000282,
+      "loss": 8.7769,
+      "step": 94
+    },
+    {
+      "epoch": 0.00095,
+      "grad_norm": 0.8739437022686155,
+      "learning_rate": 0.000285,
+      "loss": 8.7468,
+      "step": 95
+    },
+    {
+      "epoch": 0.00096,
+      "grad_norm": 0.8792093745975903,
+      "learning_rate": 0.000288,
+      "loss": 8.7137,
+      "step": 96
+    },
+    {
+      "epoch": 0.00097,
+      "grad_norm": 0.8733827683228169,
+      "learning_rate": 0.000291,
+      "loss": 8.6904,
+      "step": 97
+    },
+    {
+      "epoch": 0.00098,
+      "grad_norm": 0.8720585520116785,
+      "learning_rate": 0.000294,
+      "loss": 8.6531,
+      "step": 98
+    },
+    {
+      "epoch": 0.00099,
+      "grad_norm": 0.8706078195741852,
+      "learning_rate": 0.000297,
+      "loss": 8.6333,
+      "step": 99
+    },
+    {
+      "epoch": 0.001,
+      "grad_norm": 0.868963072522692,
+      "learning_rate": 0.00030000000000000003,
+      "loss": 8.6078,
+      "step": 100
+    },
+    {
+      "epoch": 0.00101,
+      "grad_norm": 0.8733428125714966,
+      "learning_rate": 0.00030300000000000005,
+      "loss": 8.5742,
+      "step": 101
+    },
+    {
+      "epoch": 0.00102,
+      "grad_norm": 0.8594222895452354,
+      "learning_rate": 0.000306,
+      "loss": 8.5555,
+      "step": 102
+    },
+    {
+      "epoch": 0.00103,
+      "grad_norm": 0.8663526773106932,
+      "learning_rate": 0.000309,
+      "loss": 8.5314,
+      "step": 103
+    },
+    {
+      "epoch": 0.00104,
+      "grad_norm": 0.8588890352637412,
+      "learning_rate": 0.000312,
+      "loss": 8.5113,
+      "step": 104
+    },
+    {
+      "epoch": 0.00105,
+      "grad_norm": 0.862875507176608,
+      "learning_rate": 0.000315,
+      "loss": 8.4848,
+      "step": 105
+    },
+    {
+      "epoch": 0.00106,
+      "grad_norm": 0.858707935395144,
+      "learning_rate": 0.000318,
+      "loss": 8.4562,
+      "step": 106
+    },
+    {
+      "epoch": 0.00107,
+      "grad_norm": 0.8477892944676333,
+      "learning_rate": 0.000321,
+      "loss": 8.4313,
+      "step": 107
+    },
+    {
+      "epoch": 0.00108,
+      "grad_norm": 0.8523289020593828,
+      "learning_rate": 0.000324,
+      "loss": 8.4028,
+      "step": 108
+    },
+    {
+      "epoch": 0.00109,
+      "grad_norm": 0.847730490341604,
+      "learning_rate": 0.000327,
+      "loss": 8.3837,
+      "step": 109
+    },
+    {
+      "epoch": 0.0011,
+      "grad_norm": 0.8528691839316361,
+      "learning_rate": 0.00033,
+      "loss": 8.3605,
+      "step": 110
+    },
+    {
+      "epoch": 0.00111,
+      "grad_norm": 0.8490568938398722,
+      "learning_rate": 0.000333,
+      "loss": 8.3302,
+      "step": 111
+    },
+    {
+      "epoch": 0.00112,
+      "grad_norm": 0.8687159568709311,
+      "learning_rate": 0.00033600000000000004,
+      "loss": 8.3067,
+      "step": 112
+    },
+    {
+      "epoch": 0.00113,
+      "grad_norm": 0.9514329048402193,
+      "learning_rate": 0.000339,
+      "loss": 8.2682,
+      "step": 113
+    },
+    {
+      "epoch": 0.00114,
+      "grad_norm": 1.0232734531890686,
+      "learning_rate": 0.000342,
+      "loss": 8.2639,
+      "step": 114
+    },
+    {
+      "epoch": 0.00115,
+      "grad_norm": 0.88782361437861,
+      "learning_rate": 0.00034500000000000004,
+      "loss": 8.2339,
+      "step": 115
+    },
+    {
+      "epoch": 0.00116,
+      "grad_norm": 0.8477580889036649,
+      "learning_rate": 0.000348,
+      "loss": 8.2115,
+      "step": 116
+    },
+    {
+      "epoch": 0.00117,
+      "grad_norm": 0.9025353672223668,
+      "learning_rate": 0.000351,
+      "loss": 8.182,
+      "step": 117
+    },
+    {
+      "epoch": 0.00118,
+      "grad_norm": 0.8194804043714383,
+      "learning_rate": 0.000354,
+      "loss": 8.1632,
+      "step": 118
+    },
+    {
+      "epoch": 0.00119,
+      "grad_norm": 0.8747187640572928,
+      "learning_rate": 0.000357,
+      "loss": 8.1526,
+      "step": 119
+    },
+    {
+      "epoch": 0.0012,
+      "grad_norm": 0.819302051650556,
+      "learning_rate": 0.00035999999999999997,
+      "loss": 8.125,
+      "step": 120
+    },
+    {
+      "epoch": 0.00121,
+      "grad_norm": 0.8357670289489353,
+      "learning_rate": 0.000363,
+      "loss": 8.0977,
+      "step": 121
+    },
+    {
+      "epoch": 0.00122,
+      "grad_norm": 0.8269622916799205,
+      "learning_rate": 0.000366,
+      "loss": 8.0806,
+      "step": 122
+    },
+    {
+      "epoch": 0.00123,
+      "grad_norm": 0.7923909888868983,
+      "learning_rate": 0.000369,
+      "loss": 8.0558,
+      "step": 123
+    },
+    {
+      "epoch": 0.00124,
+      "grad_norm": 0.8405159164509748,
+      "learning_rate": 0.000372,
+      "loss": 8.0354,
+      "step": 124
+    },
+    {
+      "epoch": 0.00125,
+      "grad_norm": 0.7957014508382675,
+      "learning_rate": 0.000375,
+      "loss": 8.0077,
+      "step": 125
+    },
+    {
+      "epoch": 0.00126,
+      "grad_norm": 0.8032762193906265,
+      "learning_rate": 0.000378,
+      "loss": 7.9954,
+      "step": 126
+    },
+    {
+      "epoch": 0.00127,
+      "grad_norm": 0.8534337354315669,
+      "learning_rate": 0.000381,
+      "loss": 7.9712,
+      "step": 127
+    },
+    {
+      "epoch": 0.00128,
+      "grad_norm": 0.7889902179927859,
+      "learning_rate": 0.000384,
+      "loss": 7.9489,
+      "step": 128
+    },
+    {
+      "epoch": 0.00129,
+      "grad_norm": 0.7667804338310586,
+      "learning_rate": 0.00038700000000000003,
+      "loss": 7.936,
+      "step": 129
+    },
+    {
+      "epoch": 0.0013,
+      "grad_norm": 0.9090077886672961,
+      "learning_rate": 0.00039000000000000005,
+      "loss": 7.9113,
+      "step": 130
+    },
+    {
+      "epoch": 0.00131,
+      "grad_norm": 1.1017726636538738,
+      "learning_rate": 0.000393,
+      "loss": 7.8813,
+      "step": 131
+    },
+    {
+      "epoch": 0.00132,
+      "grad_norm": 0.9363739073599702,
+      "learning_rate": 0.00039600000000000003,
+      "loss": 7.8715,
+      "step": 132
+    },
+    {
+      "epoch": 0.00133,
+      "grad_norm": 0.7265974712286192,
+      "learning_rate": 0.00039900000000000005,
+      "loss": 7.8436,
+      "step": 133
+    },
+    {
+      "epoch": 0.00134,
+      "grad_norm": 1.085264808454749,
+      "learning_rate": 0.000402,
+      "loss": 7.8358,
+      "step": 134
+    },
+    {
+      "epoch": 0.00135,
+      "grad_norm": 1.1024504993278263,
+      "learning_rate": 0.00040500000000000003,
+      "loss": 7.8094,
+      "step": 135
+    },
+    {
+      "epoch": 0.00136,
+      "grad_norm": 0.7406778288782102,
+      "learning_rate": 0.00040800000000000005,
+      "loss": 7.7862,
+      "step": 136
+    },
+    {
+      "epoch": 0.00137,
+      "grad_norm": 0.948337541042023,
+      "learning_rate": 0.000411,
+      "loss": 7.7688,
+      "step": 137
+    },
+    {
+      "epoch": 0.00138,
+      "grad_norm": 0.9084991981726261,
+      "learning_rate": 0.00041400000000000003,
+      "loss": 7.7417,
+      "step": 138
+    },
+    {
+      "epoch": 0.00139,
+      "grad_norm": 0.7074494223870315,
+      "learning_rate": 0.00041700000000000005,
+      "loss": 7.7227,
+      "step": 139
+    },
+    {
+      "epoch": 0.0014,
+      "grad_norm": 0.7974190203181878,
+      "learning_rate": 0.00042000000000000007,
+      "loss": 7.7086,
+      "step": 140
+    },
+    {
+      "epoch": 0.00141,
+      "grad_norm": 0.6861992297830735,
+      "learning_rate": 0.000423,
+      "loss": 7.6913,
+      "step": 141
+    },
+    {
+      "epoch": 0.00142,
+      "grad_norm": 0.7349980301116962,
+      "learning_rate": 0.00042599999999999995,
+      "loss": 7.677,
+      "step": 142
+    },
+    {
+      "epoch": 0.00143,
+      "grad_norm": 0.7684994731116016,
+      "learning_rate": 0.00042899999999999997,
+      "loss": 7.6484,
+      "step": 143
+    },
+    {
+      "epoch": 0.00144,
+      "grad_norm": 0.6719380878952201,
+      "learning_rate": 0.000432,
+      "loss": 7.629,
+      "step": 144
+    },
+    {
+      "epoch": 0.00145,
+      "grad_norm": 0.7001362890446781,
+      "learning_rate": 0.000435,
+      "loss": 7.6232,
+      "step": 145
+    },
+    {
+      "epoch": 0.00146,
+      "grad_norm": 0.7384188504614574,
+      "learning_rate": 0.00043799999999999997,
+      "loss": 7.5849,
+      "step": 146
+    },
+    {
+      "epoch": 0.00147,
+      "grad_norm": 0.6536906507985312,
+      "learning_rate": 0.000441,
+      "loss": 7.5755,
+      "step": 147
+    },
+    {
+      "epoch": 0.00148,
+      "grad_norm": 0.6570387947682329,
+      "learning_rate": 0.000444,
+      "loss": 7.5632,
+      "step": 148
+    },
+    {
+      "epoch": 0.00149,
+      "grad_norm": 0.7127206041028632,
+      "learning_rate": 0.00044699999999999997,
+      "loss": 7.5571,
+      "step": 149
+    },
+    {
+      "epoch": 0.0015,
+      "grad_norm": 0.7308289900379658,
+      "learning_rate": 0.00045,
+      "loss": 7.5253,
+      "step": 150
+    },
+    {
+      "epoch": 0.00151,
+      "grad_norm": 0.8664894863114535,
+      "learning_rate": 0.000453,
+      "loss": 7.505,
+      "step": 151
+    },
+    {
+      "epoch": 0.00152,
+      "grad_norm": 0.8895841422117233,
+      "learning_rate": 0.000456,
+      "loss": 7.5034,
+      "step": 152
+    },
+    {
+      "epoch": 0.00153,
+      "grad_norm": 0.662621907628313,
+      "learning_rate": 0.000459,
+      "loss": 7.4855,
+      "step": 153
+    },
+    {
+      "epoch": 0.00154,
+      "grad_norm": 0.7176449004695503,
+      "learning_rate": 0.000462,
+      "loss": 7.4655,
+      "step": 154
+    },
+    {
+      "epoch": 0.00155,
+      "grad_norm": 0.7407325792933065,
+      "learning_rate": 0.000465,
+      "loss": 7.4508,
+      "step": 155
+    },
+    {
+      "epoch": 0.00156,
+      "grad_norm": 0.7037717820949844,
+      "learning_rate": 0.000468,
+      "loss": 7.4265,
+      "step": 156
+    },
+    {
+      "epoch": 0.00157,
+      "grad_norm": 0.5688508632010355,
+      "learning_rate": 0.000471,
+      "loss": 7.4159,
+      "step": 157
+    },
+    {
+      "epoch": 0.00158,
+      "grad_norm": 0.812094320370736,
+      "learning_rate": 0.00047400000000000003,
+      "loss": 7.4015,
+      "step": 158
+    },
+    {
+      "epoch": 0.00159,
+      "grad_norm": 0.6851465337071334,
+      "learning_rate": 0.000477,
+      "loss": 7.3879,
+      "step": 159
+    },
+    {
+      "epoch": 0.0016,
+      "grad_norm": 0.9553203809874583,
+      "learning_rate": 0.00048,
+      "loss": 7.3739,
+      "step": 160
+    },
+    {
+      "epoch": 0.00161,
+      "grad_norm": 0.7328141993841643,
+      "learning_rate": 0.00048300000000000003,
+      "loss": 7.3497,
+      "step": 161
+    },
+    {
+      "epoch": 0.00162,
+      "grad_norm": 0.6781537584586433,
+      "learning_rate": 0.00048600000000000005,
+      "loss": 7.3535,
+      "step": 162
+    },
+    {
+      "epoch": 0.00163,
+      "grad_norm": 0.5807738383940755,
+      "learning_rate": 0.0004890000000000001,
+      "loss": 7.3306,
+      "step": 163
+    },
+    {
+      "epoch": 0.00164,
+      "grad_norm": 0.6466438270814104,
+      "learning_rate": 0.000492,
+      "loss": 7.3371,
+      "step": 164
+    },
+    {
+      "epoch": 0.00165,
+      "grad_norm": 0.4365334647721832,
+      "learning_rate": 0.000495,
+      "loss": 7.3038,
+      "step": 165
+    },
+    {
+      "epoch": 0.00166,
+      "grad_norm": 0.7387692677149859,
+      "learning_rate": 0.0004980000000000001,
+      "loss": 7.2849,
+      "step": 166
+    },
+    {
+      "epoch": 0.00167,
+      "grad_norm": 0.6489238814400348,
+      "learning_rate": 0.000501,
+      "loss": 7.2765,
+      "step": 167
+    },
+    {
+      "epoch": 0.00168,
+      "grad_norm": 0.4943076416069544,
+      "learning_rate": 0.000504,
+      "loss": 7.2512,
+      "step": 168
+    },
+    {
+      "epoch": 0.00169,
+      "grad_norm": 0.6623047646201438,
+      "learning_rate": 0.0005070000000000001,
+      "loss": 7.2485,
+      "step": 169
+    },
+    {
+      "epoch": 0.0017,
+      "grad_norm": 0.5841690806336316,
+      "learning_rate": 0.00051,
+      "loss": 7.2331,
+      "step": 170
+    },
+    {
+      "epoch": 0.00171,
+      "grad_norm": 0.455313434659774,
+      "learning_rate": 0.000513,
+      "loss": 7.2132,
+      "step": 171
+    },
+    {
+      "epoch": 0.00172,
+      "grad_norm": 0.6012704724036779,
+      "learning_rate": 0.000516,
+      "loss": 7.214,
+      "step": 172
+    },
+    {
+      "epoch": 0.00173,
+      "grad_norm": 0.5433283029678282,
+      "learning_rate": 0.0005189999999999999,
+      "loss": 7.1993,
+      "step": 173
+    },
+    {
+      "epoch": 0.00174,
+      "grad_norm": 0.38251135313099477,
+      "learning_rate": 0.000522,
+      "loss": 7.2121,
+      "step": 174
+    },
+    {
+      "epoch": 0.00175,
+      "grad_norm": 0.6483519243845758,
+      "learning_rate": 0.000525,
+      "loss": 7.1796,
+      "step": 175
+    },
+    {
+      "epoch": 0.00176,
+      "grad_norm": 0.5106681215905619,
+      "learning_rate": 0.0005279999999999999,
+      "loss": 7.1531,
+      "step": 176
+    },
+    {
+      "epoch": 0.00177,
+      "grad_norm": 0.3867365673342131,
+      "learning_rate": 0.000531,
+      "loss": 7.1566,
+      "step": 177
+    },
+    {
+      "epoch": 0.00178,
+      "grad_norm": 0.6803005272577738,
+      "learning_rate": 0.000534,
+      "loss": 7.1525,
+      "step": 178
+    },
+    {
+      "epoch": 0.00179,
+      "grad_norm": 0.44918531151657043,
+      "learning_rate": 0.000537,
+      "loss": 7.1407,
+      "step": 179
+    },
+    {
+      "epoch": 0.0018,
+      "grad_norm": 0.39944612038572996,
+      "learning_rate": 0.00054,
+      "loss": 7.1121,
+      "step": 180
+    },
+    {
+      "epoch": 0.00181,
+      "grad_norm": 0.5937704312371144,
+      "learning_rate": 0.000543,
+      "loss": 7.1037,
+      "step": 181
+    },
+    {
+      "epoch": 0.00182,
+      "grad_norm": 0.3956001398507633,
+      "learning_rate": 0.000546,
+      "loss": 7.0976,
+      "step": 182
+    },
+    {
+      "epoch": 0.00183,
+      "grad_norm": 0.36202282333499824,
+      "learning_rate": 0.000549,
+      "loss": 7.0864,
+      "step": 183
+    },
+    {
+      "epoch": 0.00184,
+      "grad_norm": 0.564760920006622,
+      "learning_rate": 0.000552,
+      "loss": 7.0785,
+      "step": 184
+    },
+    {
+      "epoch": 0.00185,
+      "grad_norm": 0.36333201524042624,
+      "learning_rate": 0.000555,
+      "loss": 7.0515,
+      "step": 185
+    },
+    {
+      "epoch": 0.00186,
+      "grad_norm": 0.3517854453126823,
+      "learning_rate": 0.000558,
+      "loss": 7.0614,
+      "step": 186
+    },
+    {
+      "epoch": 0.00187,
+      "grad_norm": 0.5413049623390372,
+      "learning_rate": 0.000561,
+      "loss": 7.0394,
+      "step": 187
+    },
+    {
+      "epoch": 0.00188,
+      "grad_norm": 0.4452202297805183,
+      "learning_rate": 0.000564,
+      "loss": 7.0168,
+      "step": 188
+    },
+    {
+      "epoch": 0.00189,
+      "grad_norm": 0.2888404630990515,
+      "learning_rate": 0.000567,
+      "loss": 7.0108,
+      "step": 189
+    },
+    {
+      "epoch": 0.0019,
+      "grad_norm": 0.5069984229642092,
+      "learning_rate": 0.00057,
+      "loss": 6.9967,
+      "step": 190
+    },
+    {
+      "epoch": 0.00191,
+      "grad_norm": 0.5049159939798347,
+      "learning_rate": 0.000573,
+      "loss": 6.9897,
+      "step": 191
+    },
+    {
+      "epoch": 0.00192,
+      "grad_norm": 0.353459237780404,
+      "learning_rate": 0.000576,
+      "loss": 6.9784,
+      "step": 192
+    },
+    {
+      "epoch": 0.00193,
+      "grad_norm": 0.4583730098712965,
+      "learning_rate": 0.000579,
+      "loss": 6.9798,
+      "step": 193
+    },
+    {
+      "epoch": 0.00194,
+      "grad_norm": 0.5934016188318005,
+      "learning_rate": 0.000582,
+      "loss": 6.9757,
+      "step": 194
+    },
+    {
+      "epoch": 0.00195,
+      "grad_norm": 0.6611510755360627,
+      "learning_rate": 0.000585,
+      "loss": 6.9556,
+      "step": 195
+    },
+    {
+      "epoch": 0.00196,
+      "grad_norm": 0.6794801564658052,
+      "learning_rate": 0.000588,
+      "loss": 6.9455,
+      "step": 196
+    },
+    {
+      "epoch": 0.00197,
+      "grad_norm": 0.6965025324131076,
+      "learning_rate": 0.000591,
+      "loss": 6.9424,
+      "step": 197
+    },
+    {
+      "epoch": 0.00198,
+      "grad_norm": 1.0010424094015926,
+      "learning_rate": 0.000594,
+      "loss": 6.9428,
+      "step": 198
+    },
+    {
+      "epoch": 0.00199,
+      "grad_norm": 0.8473959375492113,
+      "learning_rate": 0.0005970000000000001,
+      "loss": 6.9257,
+      "step": 199
+    },
+    {
+      "epoch": 0.002,
+      "grad_norm": 0.4674164352666973,
+      "learning_rate": 0.0006000000000000001,
+      "loss": 6.9135,
+      "step": 200
+    },
+    {
+      "epoch": 0.00201,
+      "grad_norm": 0.6130101305251902,
+      "learning_rate": 0.000603,
+      "loss": 6.8907,
+      "step": 201
+    },
+    {
+      "epoch": 0.00202,
+      "grad_norm": 0.5766204915712545,
+      "learning_rate": 0.0006060000000000001,
+      "loss": 6.8786,
+      "step": 202
+    },
+    {
+      "epoch": 0.00203,
+      "grad_norm": 0.42726729176690026,
+      "learning_rate": 0.0006090000000000001,
+      "loss": 6.8819,
+      "step": 203
+    },
+    {
+      "epoch": 0.00204,
+      "grad_norm": 0.5161441207128264,
+      "learning_rate": 0.000612,
+      "loss": 6.8675,
+      "step": 204
+    },
+    {
+      "epoch": 0.00205,
+      "grad_norm": 0.44005998133884483,
+      "learning_rate": 0.000615,
+      "loss": 6.852,
+      "step": 205
+    },
+    {
+      "epoch": 0.00206,
+      "grad_norm": 0.4214710856814876,
+      "learning_rate": 0.000618,
+      "loss": 6.8455,
+      "step": 206
+    },
+    {
+      "epoch": 0.00207,
+      "grad_norm": 0.3835330802523119,
+      "learning_rate": 0.000621,
+      "loss": 6.8358,
+      "step": 207
+    },
+    {
+      "epoch": 0.00208,
+      "grad_norm": 0.4118282878888594,
+      "learning_rate": 0.000624,
+      "loss": 6.8325,
+      "step": 208
+    },
+    {
+      "epoch": 0.00209,
+      "grad_norm": 0.35207765117623135,
+      "learning_rate": 0.000627,
+      "loss": 6.8108,
+      "step": 209
+    },
+    {
+      "epoch": 0.0021,
+      "grad_norm": 0.35165038308098223,
+      "learning_rate": 0.00063,
+      "loss": 6.8001,
+      "step": 210
+    },
+    {
+      "epoch": 0.00211,
+      "grad_norm": 0.2907998960376576,
+      "learning_rate": 0.000633,
+      "loss": 6.794,
+      "step": 211
+    },
+    {
+      "epoch": 0.00212,
+      "grad_norm": 0.34183459291083,
+      "learning_rate": 0.000636,
+      "loss": 6.7864,
+      "step": 212
+    },
+    {
+      "epoch": 0.00213,
+      "grad_norm": 0.31625023606106545,
+      "learning_rate": 0.000639,
+      "loss": 6.7664,
+      "step": 213
+    },
+    {
+      "epoch": 0.00214,
+      "grad_norm": 0.3389167462098675,
+      "learning_rate": 0.000642,
+      "loss": 6.7645,
+      "step": 214
+    },
+    {
+      "epoch": 0.00215,
+      "grad_norm": 0.4689783367804683,
+      "learning_rate": 0.000645,
+      "loss": 6.7617,
+      "step": 215
+    },
+    {
+      "epoch": 0.00216,
+      "grad_norm": 0.7554849005001304,
+      "learning_rate": 0.000648,
+      "loss": 6.763,
+      "step": 216
+    },
+    {
+      "epoch": 0.00217,
+      "grad_norm": 1.3852957930548755,
+      "learning_rate": 0.000651,
+      "loss": 6.7541,
+      "step": 217
+    },
+    {
+      "epoch": 0.00218,
+      "grad_norm": 0.6607509792225358,
+      "learning_rate": 0.000654,
+      "loss": 6.7405,
+      "step": 218
+    },
+    {
+      "epoch": 0.00219,
+      "grad_norm": 0.6480761266032634,
+      "learning_rate": 0.000657,
+      "loss": 6.7122,
+      "step": 219
+    },
+    {
+      "epoch": 0.0022,
+      "grad_norm": 1.2227203355321279,
+      "learning_rate": 0.00066,
+      "loss": 6.7471,
+      "step": 220
+    },
+    {
+      "epoch": 0.00221,
+      "grad_norm": 1.1390371544106608,
+      "learning_rate": 0.0006630000000000001,
+      "loss": 6.7248,
+      "step": 221
+    },
+    {
+      "epoch": 0.00222,
+      "grad_norm": 0.7354052143767025,
+      "learning_rate": 0.000666,
+      "loss": 6.7013,
+      "step": 222
+    },
+    {
+      "epoch": 0.00223,
+      "grad_norm": 0.6134943537271429,
+      "learning_rate": 0.000669,
+      "loss": 6.6937,
+      "step": 223
+    },
+    {
+      "epoch": 0.00224,
+      "grad_norm": 0.5502679770385809,
+      "learning_rate": 0.0006720000000000001,
+      "loss": 6.6934,
+      "step": 224
+    },
+    {
+      "epoch": 0.00225,
+      "grad_norm": 0.47738132510350817,
+      "learning_rate": 0.000675,
+      "loss": 6.6778,
+      "step": 225
+    },
+    {
+      "epoch": 0.00226,
+      "grad_norm": 0.507114065421453,
+      "learning_rate": 0.000678,
+      "loss": 6.6712,
+      "step": 226
+    },
+    {
+      "epoch": 0.00227,
+      "grad_norm": 0.4403172481189424,
+      "learning_rate": 0.0006810000000000001,
+      "loss": 6.646,
+      "step": 227
+    },
+    {
+      "epoch": 0.00228,
+      "grad_norm": 0.34083753361811386,
+      "learning_rate": 0.000684,
+      "loss": 6.6465,
+      "step": 228
+    },
+    {
+      "epoch": 0.00229,
+      "grad_norm": 0.3502366193988861,
+      "learning_rate": 0.000687,
+      "loss": 6.6418,
+      "step": 229
+    },
+    {
+      "epoch": 0.0023,
+      "grad_norm": 0.40867670062411404,
+      "learning_rate": 0.0006900000000000001,
+      "loss": 6.6218,
+      "step": 230
+    },
+    {
+      "epoch": 0.00231,
+      "grad_norm": 0.3512396474179769,
+      "learning_rate": 0.000693,
+      "loss": 6.605,
+      "step": 231
+    },
+    {
+      "epoch": 0.00232,
+      "grad_norm": 0.37061772246720376,
+      "learning_rate": 0.000696,
+      "loss": 6.6164,
+      "step": 232
+    },
+    {
+      "epoch": 0.00233,
+      "grad_norm": 0.30975845835339183,
+      "learning_rate": 0.0006990000000000001,
+      "loss": 6.6026,
+      "step": 233
+    },
+    {
+      "epoch": 0.00234,
+      "grad_norm": 0.31056463949410484,
+      "learning_rate": 0.000702,
+      "loss": 6.5934,
+      "step": 234
+    },
+    {
+      "epoch": 0.00235,
+      "grad_norm": 0.3416829938754332,
+      "learning_rate": 0.000705,
+      "loss": 6.5798,
+      "step": 235
+    },
+    {
+      "epoch": 0.00236,
+      "grad_norm": 0.3580449551775802,
+      "learning_rate": 0.000708,
+      "loss": 6.5618,
+      "step": 236
+    },
+    {
+      "epoch": 0.00237,
+      "grad_norm": 0.31186166958910727,
+      "learning_rate": 0.0007109999999999999,
+      "loss": 6.5709,
+      "step": 237
+    },
+    {
+      "epoch": 0.00238,
+      "grad_norm": 0.35712039334009765,
+      "learning_rate": 0.000714,
+      "loss": 6.5601,
+      "step": 238
+    },
+    {
+      "epoch": 0.00239,
+      "grad_norm": 0.4689874670209286,
+      "learning_rate": 0.000717,
+      "loss": 6.5431,
+      "step": 239
+    },
+    {
+      "epoch": 0.0024,
+      "grad_norm": 0.8385556457838154,
+      "learning_rate": 0.0007199999999999999,
+      "loss": 6.5652,
+      "step": 240
+    },
+    {
+      "epoch": 0.00241,
+      "grad_norm": 1.2597764356570753,
+      "learning_rate": 0.000723,
+      "loss": 6.5721,
+      "step": 241
+    },
+    {
+      "epoch": 0.00242,
+      "grad_norm": 0.7018547016449656,
+      "learning_rate": 0.000726,
+      "loss": 6.5258,
+      "step": 242
+    },
+    {
+      "epoch": 0.00243,
+      "grad_norm": 0.9679840331331718,
+      "learning_rate": 0.000729,
+      "loss": 6.5235,
+      "step": 243
+    },
+    {
+      "epoch": 0.00244,
+      "grad_norm": 0.8151244219176279,
+      "learning_rate": 0.000732,
+      "loss": 6.5289,
+      "step": 244
+    },
+    {
+      "epoch": 0.00245,
+      "grad_norm": 0.7408239035858821,
+      "learning_rate": 0.000735,
+      "loss": 6.5159,
+      "step": 245
+    },
+    {
+      "epoch": 0.00246,
+      "grad_norm": 0.7269888029650857,
+      "learning_rate": 0.000738,
+      "loss": 6.5006,
+      "step": 246
+    },
+    {
+      "epoch": 0.00247,
+      "grad_norm": 0.6715573633422388,
+      "learning_rate": 0.000741,
+      "loss": 6.4916,
+      "step": 247
+    },
+    {
+      "epoch": 0.00248,
+      "grad_norm": 0.5355440416613977,
+      "learning_rate": 0.000744,
+      "loss": 6.4965,
+      "step": 248
+    },
+    {
+      "epoch": 0.00249,
+      "grad_norm": 0.47837466185087324,
+      "learning_rate": 0.000747,
+      "loss": 6.474,
+      "step": 249
+    },
+    {
+      "epoch": 0.0025,
+      "grad_norm": 0.5421674013984196,
+      "learning_rate": 0.00075,
+      "loss": 6.4781,
+      "step": 250
+    },
+    {
+      "epoch": 0.00251,
+      "grad_norm": 0.5046285559759315,
+      "learning_rate": 0.000753,
+      "loss": 6.4394,
+      "step": 251
+    },
+    {
+      "epoch": 0.00252,
+      "grad_norm": 0.4902399046559828,
+      "learning_rate": 0.000756,
+      "loss": 6.4478,
+      "step": 252
+    },
+    {
+      "epoch": 0.00253,
+      "grad_norm": 0.6026763597870729,
+      "learning_rate": 0.000759,
+      "loss": 6.4417,
+      "step": 253
+    },
+    {
+      "epoch": 0.00254,
+      "grad_norm": 0.6534629043130656,
+      "learning_rate": 0.000762,
+      "loss": 6.4357,
+      "step": 254
+    },
+    {
+      "epoch": 0.00255,
+      "grad_norm": 0.5879568865224861,
+      "learning_rate": 0.0007650000000000001,
+      "loss": 6.4331,
+      "step": 255
+    },
+    {
+      "epoch": 0.00256,
+      "grad_norm": 0.587549512889128,
+      "learning_rate": 0.000768,
+      "loss": 6.4121,
+      "step": 256
+    },
+    {
+      "epoch": 0.00257,
+      "grad_norm": 0.4948417519702818,
+      "learning_rate": 0.000771,
+      "loss": 6.4144,
+      "step": 257
+    },
+    {
+      "epoch": 0.00258,
+      "grad_norm": 0.500640645286767,
+      "learning_rate": 0.0007740000000000001,
+      "loss": 6.3915,
+      "step": 258
+    },
+    {
+      "epoch": 0.00259,
+      "grad_norm": 0.46096026590523803,
+      "learning_rate": 0.000777,
+      "loss": 6.3904,
+      "step": 259
+    },
+    {
+      "epoch": 0.0026,
+      "grad_norm": 0.36710644781296853,
+      "learning_rate": 0.0007800000000000001,
+      "loss": 6.3738,
+      "step": 260
+    },
+    {
+      "epoch": 0.00261,
+      "grad_norm": 0.3228962429333065,
+      "learning_rate": 0.0007830000000000001,
+      "loss": 6.3839,
+      "step": 261
+    },
+    {
+      "epoch": 0.00262,
+      "grad_norm": 0.33723485694460476,
+      "learning_rate": 0.000786,
+      "loss": 6.3601,
+      "step": 262
+    },
+    {
+      "epoch": 0.00263,
+      "grad_norm": 0.3792844357509868,
+      "learning_rate": 0.0007890000000000001,
+      "loss": 6.3664,
+      "step": 263
+    },
+    {
+      "epoch": 0.00264,
+      "grad_norm": 0.41999467297545995,
+      "learning_rate": 0.0007920000000000001,
+      "loss": 6.3505,
+      "step": 264
+    },
+    {
+      "epoch": 0.00265,
+      "grad_norm": 0.5058327981670172,
+      "learning_rate": 0.000795,
+      "loss": 6.3427,
+      "step": 265
+    },
+    {
+      "epoch": 0.00266,
+      "grad_norm": 0.728666013298328,
+      "learning_rate": 0.0007980000000000001,
+      "loss": 6.3445,
+      "step": 266
+    },
+    {
+      "epoch": 0.00267,
+      "grad_norm": 1.051597167429085,
+      "learning_rate": 0.0008010000000000001,
+      "loss": 6.3504,
+      "step": 267
+    },
+    {
+      "epoch": 0.00268,
+      "grad_norm": 0.8184600443662068,
+      "learning_rate": 0.000804,
+      "loss": 6.3425,
+      "step": 268
+    },
+    {
+      "epoch": 0.00269,
+      "grad_norm": 0.9044150906220578,
+      "learning_rate": 0.0008070000000000001,
+      "loss": 6.3249,
+      "step": 269
+    },
+    {
+      "epoch": 0.0027,
+      "grad_norm": 1.128589514160359,
+      "learning_rate": 0.0008100000000000001,
+      "loss": 6.3418,
+      "step": 270
+    },
+    {
+      "epoch": 0.00271,
+      "grad_norm": 0.8269681270889044,
+      "learning_rate": 0.000813,
+      "loss": 6.3324,
+      "step": 271
+    },
+    {
+      "epoch": 0.00272,
+      "grad_norm": 0.9605488749490486,
+      "learning_rate": 0.0008160000000000001,
+      "loss": 6.3229,
+      "step": 272
+    },
+    {
+      "epoch": 0.00273,
+      "grad_norm": 1.4256959145221717,
+      "learning_rate": 0.0008190000000000001,
+      "loss": 6.3332,
+      "step": 273
+    },
+    {
+      "epoch": 0.00274,
+      "grad_norm": 0.8746481700595169,
+      "learning_rate": 0.000822,
+      "loss": 6.301,
+      "step": 274
+    },
+    {
+      "epoch": 0.00275,
+      "grad_norm": 0.7483839590071554,
+      "learning_rate": 0.0008250000000000001,
+      "loss": 6.3047,
+      "step": 275
+    },
+    {
+      "epoch": 0.00276,
+      "grad_norm": 0.9683696395132253,
+      "learning_rate": 0.0008280000000000001,
+      "loss": 6.2965,
+      "step": 276
+    },
+    {
+      "epoch": 0.00277,
+      "grad_norm": 0.9445364353770574,
+      "learning_rate": 0.0008310000000000001,
+      "loss": 6.3021,
+      "step": 277
+    },
+    {
+      "epoch": 0.00278,
+      "grad_norm": 0.5637393663463164,
+      "learning_rate": 0.0008340000000000001,
+      "loss": 6.2711,
+      "step": 278
+    },
+    {
+      "epoch": 0.00279,
+      "grad_norm": 0.6474632709262491,
+      "learning_rate": 0.0008370000000000001,
+      "loss": 6.2722,
+      "step": 279
+    },
+    {
+      "epoch": 0.0028,
+      "grad_norm": 0.5477535984985658,
+      "learning_rate": 0.0008400000000000001,
+      "loss": 6.2548,
+      "step": 280
+    },
+    {
+      "epoch": 0.00281,
+      "grad_norm": 0.5235139271800053,
+      "learning_rate": 0.0008430000000000001,
+      "loss": 6.2428,
+      "step": 281
+    },
+    {
+      "epoch": 0.00282,
+      "grad_norm": 0.4309641347762167,
+      "learning_rate": 0.000846,
+      "loss": 6.2382,
+      "step": 282
+    },
+    {
+      "epoch": 0.00283,
+      "grad_norm": 0.47598469419389666,
+      "learning_rate": 0.0008489999999999999,
+      "loss": 6.2321,
+      "step": 283
+    },
+    {
+      "epoch": 0.00284,
+      "grad_norm": 0.41791767912211497,
+      "learning_rate": 0.0008519999999999999,
+      "loss": 6.2199,
+      "step": 284
+    },
+    {
+      "epoch": 0.00285,
+      "grad_norm": 0.4068034669968041,
+      "learning_rate": 0.000855,
+      "loss": 6.2226,
+      "step": 285
+    },
+    {
+      "epoch": 0.00286,
+      "grad_norm": 0.461449105404332,
+      "learning_rate": 0.0008579999999999999,
+      "loss": 6.2004,
+      "step": 286
+    },
+    {
+      "epoch": 0.00287,
+      "grad_norm": 0.3833700707849029,
+      "learning_rate": 0.000861,
+      "loss": 6.2013,
+      "step": 287
+    },
+    {
+      "epoch": 0.00288,
+      "grad_norm": 0.3960645710165419,
+      "learning_rate": 0.000864,
+      "loss": 6.1818,
+      "step": 288
+    },
+    {
+      "epoch": 0.00289,
+      "grad_norm": 0.44431212101949524,
+      "learning_rate": 0.0008669999999999999,
+      "loss": 6.1961,
+      "step": 289
+    },
+    {
+      "epoch": 0.0029,
+      "grad_norm": 0.5861155579490576,
+      "learning_rate": 0.00087,
+      "loss": 6.1786,
+      "step": 290
+    },
+    {
+      "epoch": 0.00291,
+      "grad_norm": 0.6639450218120423,
+      "learning_rate": 0.000873,
+      "loss": 6.1739,
+      "step": 291
+    },
+    {
+      "epoch": 0.00292,
+      "grad_norm": 0.8465664162859037,
+      "learning_rate": 0.0008759999999999999,
+      "loss": 6.1756,
+      "step": 292
+    },
+    {
+      "epoch": 0.00293,
+      "grad_norm": 0.9291009677838105,
+      "learning_rate": 0.000879,
+      "loss": 6.1618,
+      "step": 293
+    },
+    {
+      "epoch": 0.00294,
+      "grad_norm": 1.3082966000676708,
+      "learning_rate": 0.000882,
+      "loss": 6.1833,
+      "step": 294
+    },
+    {
+      "epoch": 0.00295,
+      "grad_norm": 1.1893342417992165,
+      "learning_rate": 0.0008849999999999999,
+      "loss": 6.1586,
+      "step": 295
+    },
+    {
+      "epoch": 0.00296,
+      "grad_norm": 0.6546841726146672,
+      "learning_rate": 0.000888,
+      "loss": 6.1551,
+      "step": 296
+    },
+    {
+      "epoch": 0.00297,
+      "grad_norm": 0.8416245966488266,
+      "learning_rate": 0.000891,
+      "loss": 6.1315,
+      "step": 297
+    },
+    {
+      "epoch": 0.00298,
+      "grad_norm": 0.806301942306892,
+      "learning_rate": 0.0008939999999999999,
+      "loss": 6.1452,
+      "step": 298
+    },
+    {
+      "epoch": 0.00299,
+      "grad_norm": 1.0994469943740992,
+      "learning_rate": 0.000897,
+      "loss": 6.1288,
+      "step": 299
+    },
+    {
+      "epoch": 0.003,
+      "grad_norm": 0.7425213774845364,
+      "learning_rate": 0.0009,
+      "loss": 6.123,
+      "step": 300
+    },
+    {
+      "epoch": 0.00301,
+      "grad_norm": 0.6433032250398036,
+      "learning_rate": 0.0009029999999999999,
+      "loss": 6.1149,
+      "step": 301
+    },
+    {
+      "epoch": 0.00302,
+      "grad_norm": 0.5449990789285414,
+      "learning_rate": 0.000906,
+      "loss": 6.0952,
+      "step": 302
+    },
+    {
+      "epoch": 0.00303,
+      "grad_norm": 0.5356674479383384,
+      "learning_rate": 0.000909,
+      "loss": 6.1067,
+      "step": 303
+    },
+    {
+      "epoch": 0.00304,
+      "grad_norm": 0.5049891146020773,
+      "learning_rate": 0.000912,
+      "loss": 6.1013,
+      "step": 304
+    },
+    {
+      "epoch": 0.00305,
+      "grad_norm": 0.48068374440179407,
+      "learning_rate": 0.000915,
+      "loss": 6.0985,
+      "step": 305
+    },
+    {
+      "epoch": 0.00306,
+      "grad_norm": 0.39305660615027715,
+      "learning_rate": 0.000918,
+      "loss": 6.0835,
+      "step": 306
+    },
+    {
+      "epoch": 0.00307,
+      "grad_norm": 0.4044411514506039,
+      "learning_rate": 0.000921,
+      "loss": 6.0774,
+      "step": 307
+    },
+    {
+      "epoch": 0.00308,
+      "grad_norm": 0.3317383414417003,
+      "learning_rate": 0.000924,
+      "loss": 6.0578,
+      "step": 308
+    },
+    {
+      "epoch": 0.00309,
+      "grad_norm": 0.4170081186564571,
+      "learning_rate": 0.000927,
+      "loss": 6.0741,
+      "step": 309
+    },
+    {
+      "epoch": 0.0031,
+      "grad_norm": 0.3537372632819937,
+      "learning_rate": 0.00093,
+      "loss": 6.0544,
+      "step": 310
+    },
+    {
+      "epoch": 0.00311,
+      "grad_norm": 0.3807795877969161,
+      "learning_rate": 0.000933,
+      "loss": 6.0455,
+      "step": 311
+    },
+    {
+      "epoch": 0.00312,
+      "grad_norm": 0.36301395919450175,
+      "learning_rate": 0.000936,
+      "loss": 6.025,
+      "step": 312
+    },
+    {
+      "epoch": 0.00313,
+      "grad_norm": 0.456154645430038,
+      "learning_rate": 0.0009390000000000001,
+      "loss": 6.0399,
+      "step": 313
+    },
+    {
+      "epoch": 0.00314,
+      "grad_norm": 0.6252307526263491,
+      "learning_rate": 0.000942,
+      "loss": 6.048,
+      "step": 314
+    },
+    {
+      "epoch": 0.00315,
+      "grad_norm": 1.0054536709858048,
+      "learning_rate": 0.000945,
+      "loss": 6.0332,
+      "step": 315
+    },
+    {
+      "epoch": 0.00316,
+      "grad_norm": 1.2671801420052267,
+      "learning_rate": 0.0009480000000000001,
+      "loss": 6.0322,
+      "step": 316
+    },
+    {
+      "epoch": 0.00317,
+      "grad_norm": 0.5267566088823273,
+      "learning_rate": 0.000951,
+      "loss": 6.0198,
+      "step": 317
+    },
+    {
+      "epoch": 0.00318,
+      "grad_norm": 1.2005145600955376,
+      "learning_rate": 0.000954,
+      "loss": 6.041,
+      "step": 318
+    },
+    {
+      "epoch": 0.00319,
+      "grad_norm": 1.004603435172909,
+      "learning_rate": 0.0009570000000000001,
+      "loss": 6.025,
+      "step": 319
+    },
+    {
+      "epoch": 0.0032,
+      "grad_norm": 1.1201777687880106,
+      "learning_rate": 0.00096,
+      "loss": 6.0356,
+      "step": 320
+    },
+    {
+      "epoch": 0.00321,
+      "grad_norm": 1.0167944206203925,
+      "learning_rate": 0.000963,
+      "loss": 6.012,
+      "step": 321
+    },
+    {
+      "epoch": 0.00322,
+      "grad_norm": 1.374478470085335,
+      "learning_rate": 0.0009660000000000001,
+      "loss": 6.025,
+      "step": 322
+    },
+    {
+      "epoch": 0.00323,
+      "grad_norm": 0.7765654772896999,
+      "learning_rate": 0.000969,
+      "loss": 6.0057,
+      "step": 323
+    },
+    {
+      "epoch": 0.00324,
+      "grad_norm": 0.6987857474960232,
+      "learning_rate": 0.0009720000000000001,
+      "loss": 6.0035,
+      "step": 324
+    },
+    {
+      "epoch": 0.00325,
+      "grad_norm": 0.7564069291902213,
+      "learning_rate": 0.0009750000000000001,
+      "loss": 5.9742,
+      "step": 325
+    },
+    {
+      "epoch": 0.00326,
+      "grad_norm": 0.7275905653375951,
+      "learning_rate": 0.0009780000000000001,
+      "loss": 5.9877,
+      "step": 326
+    },
+    {
+      "epoch": 0.00327,
+      "grad_norm": 0.6984989438722892,
+      "learning_rate": 0.000981,
+      "loss": 5.9857,
+      "step": 327
+    },
+    {
+      "epoch": 0.00328,
+      "grad_norm": 0.721382136086564,
+      "learning_rate": 0.000984,
+      "loss": 5.9627,
+      "step": 328
+    },
+    {
+      "epoch": 0.00329,
+      "grad_norm": 0.7841656285031653,
+      "learning_rate": 0.000987,
+      "loss": 5.9649,
+      "step": 329
+    },
+    {
+      "epoch": 0.0033,
+      "grad_norm": 0.8334397806028965,
+      "learning_rate": 0.00099,
+      "loss": 5.9568,
+      "step": 330
+    },
+    {
+      "epoch": 0.00331,
+      "grad_norm": 1.070828806370832,
+      "learning_rate": 0.0009930000000000002,
+      "loss": 5.9642,
+      "step": 331
+    },
+    {
+      "epoch": 0.00332,
+      "grad_norm": 1.0826703064607723,
+      "learning_rate": 0.0009960000000000001,
+      "loss": 5.9538,
+      "step": 332
+    },
+    {
+      "epoch": 0.00333,
+      "grad_norm": 0.9458285226462658,
+      "learning_rate": 0.000999,
+      "loss": 5.9361,
+      "step": 333
+    },
+    {
+      "epoch": 0.00334,
+      "grad_norm": 0.8783572499928525,
+      "learning_rate": 0.001002,
+      "loss": 5.9443,
+      "step": 334
+    },
+    {
+      "epoch": 0.00335,
+      "grad_norm": 0.5654549281403035,
+      "learning_rate": 0.001005,
+      "loss": 5.9354,
+      "step": 335
+    },
+    {
+      "epoch": 0.00336,
+      "grad_norm": 0.6516350129184051,
+      "learning_rate": 0.001008,
+      "loss": 5.9285,
+      "step": 336
+    },
+    {
+      "epoch": 0.00337,
+      "grad_norm": 0.5946351510133966,
+      "learning_rate": 0.0010110000000000002,
+      "loss": 5.9026,
+      "step": 337
+    },
+    {
+      "epoch": 0.00338,
+      "grad_norm": 0.46824568572142555,
+      "learning_rate": 0.0010140000000000001,
+      "loss": 5.9165,
+      "step": 338
+    },
+    {
+      "epoch": 0.00339,
+      "grad_norm": 0.5098867793571775,
+      "learning_rate": 0.0010170000000000001,
+      "loss": 5.9215,
+      "step": 339
+    },
+    {
+      "epoch": 0.0034,
+      "grad_norm": 0.5202026348670868,
+      "learning_rate": 0.00102,
+      "loss": 5.8881,
+      "step": 340
+    },
+    {
+      "epoch": 0.00341,
+      "grad_norm": 0.4748523902202147,
+      "learning_rate": 0.001023,
+      "loss": 5.8705,
+      "step": 341
+    },
+    {
+      "epoch": 0.00342,
+      "grad_norm": 0.42065780247182966,
+      "learning_rate": 0.001026,
+      "loss": 5.8732,
+      "step": 342
+    },
+    {
+      "epoch": 0.00343,
+      "grad_norm": 0.4385189981740614,
+      "learning_rate": 0.0010290000000000002,
+      "loss": 5.8762,
+      "step": 343
+    },
+    {
+      "epoch": 0.00344,
+      "grad_norm": 0.44131626809067537,
+      "learning_rate": 0.001032,
+      "loss": 5.8646,
+      "step": 344
+    },
+    {
+      "epoch": 0.00345,
+      "grad_norm": 0.5602404326473228,
+      "learning_rate": 0.001035,
+      "loss": 5.8576,
+      "step": 345
+    },
+    {
+      "epoch": 0.00346,
+      "grad_norm": 0.7338782884674983,
+      "learning_rate": 0.0010379999999999999,
+      "loss": 5.8684,
+      "step": 346
+    },
+    {
+      "epoch": 0.00347,
+      "grad_norm": 0.8871217127911775,
+      "learning_rate": 0.001041,
+      "loss": 5.8429,
+      "step": 347
+    },
+    {
+      "epoch": 0.00348,
+      "grad_norm": 0.7294005585257834,
+      "learning_rate": 0.001044,
+      "loss": 5.8498,
+      "step": 348
+    },
+    {
+      "epoch": 0.00349,
+      "grad_norm": 0.672488440407315,
+      "learning_rate": 0.001047,
+      "loss": 5.8268,
+      "step": 349
+    },
+    {
+      "epoch": 0.0035,
+      "grad_norm": 0.944274771030096,
+      "learning_rate": 0.00105,
+      "loss": 5.8574,
+      "step": 350
+    },
+    {
+      "epoch": 0.00351,
+      "grad_norm": 0.8431353927439175,
+      "learning_rate": 0.001053,
+      "loss": 5.8586,
+      "step": 351
+    },
+    {
+      "epoch": 0.00352,
+      "grad_norm": 0.8223595555288745,
+      "learning_rate": 0.0010559999999999999,
+      "loss": 5.8326,
+      "step": 352
+    },
+    {
+      "epoch": 0.00353,
+      "grad_norm": 0.7412573085023916,
+      "learning_rate": 0.001059,
+      "loss": 5.8336,
+      "step": 353
+    },
+    {
+      "epoch": 0.00354,
+      "grad_norm": 0.7219192917744488,
+      "learning_rate": 0.001062,
+      "loss": 5.8047,
+      "step": 354
+    },
+    {
+      "epoch": 0.00355,
+      "grad_norm": 0.7332482461779326,
+      "learning_rate": 0.001065,
+      "loss": 5.8213,
+      "step": 355
+    },
+    {
+      "epoch": 0.00356,
+      "grad_norm": 0.6300796792438244,
+      "learning_rate": 0.001068,
+      "loss": 5.8096,
+      "step": 356
+    },
+    {
+      "epoch": 0.00357,
+      "grad_norm": 0.7796926368866208,
+      "learning_rate": 0.001071,
+      "loss": 5.802,
+      "step": 357
+    },
+    {
+      "epoch": 0.00358,
+      "grad_norm": 1.048118676098695,
+      "learning_rate": 0.001074,
+      "loss": 5.8089,
+      "step": 358
+    },
+    {
+      "epoch": 0.00359,
+      "grad_norm": 0.8469283052653666,
+      "learning_rate": 0.001077,
+      "loss": 5.7908,
+      "step": 359
+    },
+    {
+      "epoch": 0.0036,
+      "grad_norm": 0.824969051981288,
+      "learning_rate": 0.00108,
+      "loss": 5.7948,
+      "step": 360
+    },
+    {
+      "epoch": 0.00361,
+      "grad_norm": 0.8007424119602606,
+      "learning_rate": 0.001083,
+      "loss": 5.7876,
+      "step": 361
+    },
+    {
+      "epoch": 0.00362,
+      "grad_norm": 0.7740442530973947,
+      "learning_rate": 0.001086,
+      "loss": 5.7713,
+      "step": 362
+    },
+    {
+      "epoch": 0.00363,
+      "grad_norm": 0.8217790100779416,
+      "learning_rate": 0.001089,
+      "loss": 5.7673,
+      "step": 363
+    },
+    {
+      "epoch": 0.00364,
+      "grad_norm": 0.9222078053619298,
+      "learning_rate": 0.001092,
+      "loss": 5.7813,
+      "step": 364
+    },
+    {
+      "epoch": 0.00365,
+      "grad_norm": 1.3725693861639678,
+      "learning_rate": 0.001095,
+      "loss": 5.801,
+      "step": 365
+    },
+    {
+      "epoch": 0.00366,
+      "grad_norm": 0.7809021294214551,
+      "learning_rate": 0.001098,
+      "loss": 5.7587,
+      "step": 366
+    },
+    {
+      "epoch": 0.00367,
+      "grad_norm": 1.1843193874441642,
+      "learning_rate": 0.001101,
+      "loss": 5.7731,
+      "step": 367
+    },
+    {
+      "epoch": 0.00368,
+      "grad_norm": 0.7875897064576857,
+      "learning_rate": 0.001104,
+      "loss": 5.7636,
+      "step": 368
+    },
+    {
+      "epoch": 0.00369,
+      "grad_norm": 0.6858717859751121,
+      "learning_rate": 0.001107,
+      "loss": 5.7637,
+      "step": 369
+    },
+    {
+      "epoch": 0.0037,
+      "grad_norm": 0.6252688126791845,
+      "learning_rate": 0.00111,
+      "loss": 5.7428,
+      "step": 370
+    },
+    {
+      "epoch": 0.00371,
+      "grad_norm": 0.6219765776345385,
+      "learning_rate": 0.001113,
+      "loss": 5.7116,
+      "step": 371
+    },
+    {
+      "epoch": 0.00372,
+      "grad_norm": 0.5998751494739524,
+      "learning_rate": 0.001116,
+      "loss": 5.7331,
+      "step": 372
+    },
+    {
+      "epoch": 0.00373,
+      "grad_norm": 0.5714053922949759,
+      "learning_rate": 0.001119,
+      "loss": 5.72,
+      "step": 373
+    },
+    {
+      "epoch": 0.00374,
+      "grad_norm": 0.628102864915935,
+      "learning_rate": 0.001122,
+      "loss": 5.7262,
+      "step": 374
+    },
+    {
+      "epoch": 0.00375,
+      "grad_norm": 0.5909991857975568,
+      "learning_rate": 0.0011250000000000001,
+      "loss": 5.7276,
+      "step": 375
+    },
+    {
+      "epoch": 0.00376,
+      "grad_norm": 0.5216592986380127,
+      "learning_rate": 0.001128,
+      "loss": 5.72,
+      "step": 376
+    },
+    {
+      "epoch": 0.00377,
+      "grad_norm": 0.5469626358369448,
+      "learning_rate": 0.001131,
+      "loss": 5.7145,
+      "step": 377
+    },
+    {
+      "epoch": 0.00378,
+      "grad_norm": 0.6291207985788981,
+      "learning_rate": 0.001134,
+      "loss": 5.7123,
+      "step": 378
+    },
+    {
+      "epoch": 0.00379,
+      "grad_norm": 0.6445779804368097,
+      "learning_rate": 0.001137,
+      "loss": 5.698,
+      "step": 379
+    },
+    {
+      "epoch": 0.0038,
+      "grad_norm": 0.7034810438425685,
+      "learning_rate": 0.00114,
+      "loss": 5.7177,
+      "step": 380
+    },
+    {
+      "epoch": 0.00381,
+      "grad_norm": 0.9330449756794958,
+      "learning_rate": 0.0011430000000000001,
+      "loss": 5.6924,
+      "step": 381
+    },
+    {
+      "epoch": 0.00382,
+      "grad_norm": 0.9449582673655755,
+      "learning_rate": 0.001146,
+      "loss": 5.6863,
+      "step": 382
+    },
+    {
+      "epoch": 0.00383,
+      "grad_norm": 0.6715143882341864,
+      "learning_rate": 0.001149,
+      "loss": 5.6846,
+      "step": 383
+    },
+    {
+      "epoch": 0.00384,
+      "grad_norm": 0.8662082921806001,
+      "learning_rate": 0.001152,
+      "loss": 5.699,
+      "step": 384
+    },
+    {
+      "epoch": 0.00385,
+      "grad_norm": 0.8482542645994808,
+      "learning_rate": 0.001155,
+      "loss": 5.6818,
+      "step": 385
+    },
+    {
+      "epoch": 0.00386,
+      "grad_norm": 0.8694120663581818,
+      "learning_rate": 0.001158,
+      "loss": 5.6914,
+      "step": 386
+    },
+    {
+      "epoch": 0.00387,
+      "grad_norm": 1.382803938865885,
+      "learning_rate": 0.0011610000000000001,
+      "loss": 5.6813,
+      "step": 387
+    },
+    {
+      "epoch": 0.00388,
+      "grad_norm": 0.9644228724095026,
+      "learning_rate": 0.001164,
+      "loss": 5.6689,
+      "step": 388
+    },
+    {
+      "epoch": 0.00389,
+      "grad_norm": 0.7396617808819147,
+      "learning_rate": 0.001167,
+      "loss": 5.6709,
+      "step": 389
+    },
+    {
+      "epoch": 0.0039,
+      "grad_norm": 0.5609185173455993,
+      "learning_rate": 0.00117,
+      "loss": 5.648,
+      "step": 390
+    },
+    {
+      "epoch": 0.00391,
+      "grad_norm": 0.735416794156057,
+      "learning_rate": 0.001173,
+      "loss": 5.6589,
+      "step": 391
+    },
+    {
+      "epoch": 0.00392,
+      "grad_norm": 0.7564046131732848,
+      "learning_rate": 0.001176,
+      "loss": 5.6662,
+      "step": 392
+    },
+    {
+      "epoch": 0.00393,
+      "grad_norm": 0.7233997204719024,
+      "learning_rate": 0.0011790000000000001,
+      "loss": 5.6377,
+      "step": 393
+    },
+    {
+      "epoch": 0.00394,
+      "grad_norm": 0.6288246845478384,
+      "learning_rate": 0.001182,
+      "loss": 5.6239,
+      "step": 394
+    },
+    {
+      "epoch": 0.00395,
+      "grad_norm": 0.6696319293361586,
+      "learning_rate": 0.001185,
+      "loss": 5.6102,
+      "step": 395
+    },
+    {
+      "epoch": 0.00396,
+      "grad_norm": 0.7214553632515296,
+      "learning_rate": 0.001188,
+      "loss": 5.6276,
+      "step": 396
+    },
+    {
+      "epoch": 0.00397,
+      "grad_norm": 0.7104651338358826,
+      "learning_rate": 0.001191,
+      "loss": 5.619,
+      "step": 397
+    },
+    {
+      "epoch": 0.00398,
+      "grad_norm": 0.5955487966253655,
+      "learning_rate": 0.0011940000000000002,
+      "loss": 5.6195,
+      "step": 398
+    },
+    {
+      "epoch": 0.00399,
+      "grad_norm": 0.6061151049974988,
+      "learning_rate": 0.0011970000000000001,
+      "loss": 5.6211,
+      "step": 399
+    },
+    {
+      "epoch": 0.004,
+      "grad_norm": 0.747975845260549,
+      "learning_rate": 0.0012000000000000001,
+      "loss": 5.6119,
+      "step": 400
+    },
+    {
+      "epoch": 0.00401,
+      "grad_norm": 1.066378470212407,
+      "learning_rate": 0.001203,
+      "loss": 5.6055,
+      "step": 401
+    },
+    {
+      "epoch": 0.00402,
+      "grad_norm": 1.1521859370487337,
+      "learning_rate": 0.001206,
+      "loss": 5.6144,
+      "step": 402
+    },
+    {
+      "epoch": 0.00403,
+      "grad_norm": 1.0137206411467539,
+      "learning_rate": 0.001209,
+      "loss": 5.6121,
+      "step": 403
+    },
+    {
+      "epoch": 0.00404,
+      "grad_norm": 1.1663989264954397,
+      "learning_rate": 0.0012120000000000002,
+      "loss": 5.6177,
+      "step": 404
+    },
+    {
+      "epoch": 0.00405,
+      "grad_norm": 1.1363050593119097,
+      "learning_rate": 0.0012150000000000002,
+      "loss": 5.6098,
+      "step": 405
+    },
+    {
+      "epoch": 0.00406,
+      "grad_norm": 1.0087910495777368,
+      "learning_rate": 0.0012180000000000001,
+      "loss": 5.5952,
+      "step": 406
+    },
+    {
+      "epoch": 0.00407,
+      "grad_norm": 0.9520738669869687,
+      "learning_rate": 0.0012209999999999999,
+      "loss": 5.5915,
+      "step": 407
+    },
+    {
+      "epoch": 0.00408,
+      "grad_norm": 1.0377478362348644,
+      "learning_rate": 0.001224,
+      "loss": 5.6129,
+      "step": 408
+    },
+    {
+      "epoch": 0.00409,
+      "grad_norm": 1.1016437762309168,
+      "learning_rate": 0.001227,
+      "loss": 5.5927,
+      "step": 409
+    },
+    {
+      "epoch": 0.0041,
+      "grad_norm": 0.9709841842795465,
+      "learning_rate": 0.00123,
+      "loss": 5.5883,
+      "step": 410
+    },
+    {
+      "epoch": 0.00411,
+      "grad_norm": 0.8240534275873757,
+      "learning_rate": 0.001233,
+      "loss": 5.5977,
+      "step": 411
+    },
+    {
+      "epoch": 0.00412,
+      "grad_norm": 0.8077831494606339,
+      "learning_rate": 0.001236,
+      "loss": 5.5894,
+      "step": 412
+    },
+    {
+      "epoch": 0.00413,
+      "grad_norm": 0.7433003641205871,
+      "learning_rate": 0.0012389999999999999,
+      "loss": 5.5815,
+      "step": 413
+    },
+    {
+      "epoch": 0.00414,
+      "grad_norm": 0.615571577509929,
+      "learning_rate": 0.001242,
+      "loss": 5.5582,
+      "step": 414
+    },
+    {
+      "epoch": 0.00415,
+      "grad_norm": 0.656841702591939,
+      "learning_rate": 0.001245,
+      "loss": 5.5606,
+      "step": 415
+    },
+    {
+      "epoch": 0.00416,
+      "grad_norm": 0.613196865035469,
+      "learning_rate": 0.001248,
+      "loss": 5.5418,
+      "step": 416
+    },
+    {
+      "epoch": 0.00417,
+      "grad_norm": 0.48631699887544017,
+      "learning_rate": 0.001251,
+      "loss": 5.5683,
+      "step": 417
+    },
+    {
+      "epoch": 0.00418,
+      "grad_norm": 0.4611320175617472,
+      "learning_rate": 0.001254,
+      "loss": 5.5401,
+      "step": 418
+    },
+    {
+      "epoch": 0.00419,
+      "grad_norm": 0.5824396705507784,
+      "learning_rate": 0.0012569999999999999,
+      "loss": 5.5305,
+      "step": 419
+    },
+    {
+      "epoch": 0.0042,
+      "grad_norm": 0.8055087238093066,
+      "learning_rate": 0.00126,
+      "loss": 5.5467,
+      "step": 420
+    },
+    {
+      "epoch": 0.00421,
+      "grad_norm": 0.9831917562206494,
+      "learning_rate": 0.001263,
+      "loss": 5.5503,
+      "step": 421
+    },
+    {
+      "epoch": 0.00422,
+      "grad_norm": 0.8124592037579013,
+      "learning_rate": 0.001266,
+      "loss": 5.5357,
+      "step": 422
+    },
+    {
+      "epoch": 0.00423,
+      "grad_norm": 1.0116811247233115,
+      "learning_rate": 0.001269,
+      "loss": 5.5337,
+      "step": 423
+    },
+    {
+      "epoch": 0.00424,
+      "grad_norm": 1.368200107021336,
+      "learning_rate": 0.001272,
+      "loss": 5.5595,
+      "step": 424
+    },
+    {
+      "epoch": 0.00425,
+      "grad_norm": 0.9353292964204464,
+      "learning_rate": 0.001275,
+      "loss": 5.5445,
+      "step": 425
+    },
+    {
+      "epoch": 0.00426,
+      "grad_norm": 0.9604600896045714,
+      "learning_rate": 0.001278,
+      "loss": 5.5493,
+      "step": 426
+    },
+    {
+      "epoch": 0.00427,
+      "grad_norm": 0.9684632680550875,
+      "learning_rate": 0.001281,
+      "loss": 5.5328,
+      "step": 427
+    },
+    {
+      "epoch": 0.00428,
+      "grad_norm": 0.9533788870858234,
+      "learning_rate": 0.001284,
+      "loss": 5.5058,
+      "step": 428
+    },
+    {
+      "epoch": 0.00429,
+      "grad_norm": 1.113271451738206,
+      "learning_rate": 0.001287,
+      "loss": 5.5322,
+      "step": 429
+    },
+    {
+      "epoch": 0.0043,
+      "grad_norm": 0.9492520989573756,
+      "learning_rate": 0.00129,
+      "loss": 5.5226,
+      "step": 430
+    },
+    {
+      "epoch": 0.00431,
+      "grad_norm": 1.1309185966069144,
+      "learning_rate": 0.001293,
+      "loss": 5.5118,
+      "step": 431
+    },
+    {
+      "epoch": 0.00432,
+      "grad_norm": 1.2851571092183034,
+      "learning_rate": 0.001296,
+      "loss": 5.5367,
+      "step": 432
+    },
+    {
+      "epoch": 0.00433,
+      "grad_norm": 0.7219415163940603,
+      "learning_rate": 0.001299,
+      "loss": 5.5106,
+      "step": 433
+    },
+    {
+      "epoch": 0.00434,
+      "grad_norm": 0.7943786167833424,
+      "learning_rate": 0.001302,
+      "loss": 5.5109,
+      "step": 434
+    },
+    {
+      "epoch": 0.00435,
+      "grad_norm": 0.7514833526497924,
+      "learning_rate": 0.001305,
+      "loss": 5.5051,
+      "step": 435
+    },
+    {
+      "epoch": 0.00436,
+      "grad_norm": 0.5499552719714149,
+      "learning_rate": 0.001308,
+      "loss": 5.4758,
+      "step": 436
+    },
+    {
+      "epoch": 0.00437,
+      "grad_norm": 0.5803132978630802,
+      "learning_rate": 0.001311,
+      "loss": 5.494,
+      "step": 437
+    },
+    {
+      "epoch": 0.00438,
+      "grad_norm": 0.4343754614294673,
+      "learning_rate": 0.001314,
+      "loss": 5.4701,
+      "step": 438
+    },
+    {
+      "epoch": 0.00439,
+      "grad_norm": 0.49450539118744047,
+      "learning_rate": 0.001317,
+      "loss": 5.4997,
+      "step": 439
+    },
+    {
+      "epoch": 0.0044,
+      "grad_norm": 0.3670496641121159,
+      "learning_rate": 0.00132,
+      "loss": 5.4633,
+      "step": 440
+    },
+    {
+      "epoch": 0.00441,
+      "grad_norm": 0.44023069828614625,
+      "learning_rate": 0.001323,
+      "loss": 5.4624,
+      "step": 441
+    },
+    {
+      "epoch": 0.00442,
+      "grad_norm": 0.45124022563783683,
+      "learning_rate": 0.0013260000000000001,
+      "loss": 5.4541,
+      "step": 442
+    },
+    {
+      "epoch": 0.00443,
+      "grad_norm": 0.5170657075032803,
+      "learning_rate": 0.001329,
+      "loss": 5.4572,
+      "step": 443
+    },
+    {
+      "epoch": 0.00444,
+      "grad_norm": 0.6406930762525409,
+      "learning_rate": 0.001332,
+      "loss": 5.4608,
+      "step": 444
+    },
+    {
+      "epoch": 0.00445,
+      "grad_norm": 0.7431205120059805,
+      "learning_rate": 0.001335,
+      "loss": 5.4462,
+      "step": 445
+    },
+    {
+      "epoch": 0.00446,
+      "grad_norm": 0.9849435734941417,
+      "learning_rate": 0.001338,
+      "loss": 5.4578,
+      "step": 446
+    },
+    {
+      "epoch": 0.00447,
+      "grad_norm": 1.1031854356255555,
+      "learning_rate": 0.001341,
+      "loss": 5.469,
+      "step": 447
+    },
+    {
+      "epoch": 0.00448,
+      "grad_norm": 0.8550264287932019,
+      "learning_rate": 0.0013440000000000001,
+      "loss": 5.4411,
+      "step": 448
+    },
+    {
+      "epoch": 0.00449,
+      "grad_norm": 0.8569858280927948,
+      "learning_rate": 0.001347,
+      "loss": 5.4491,
+      "step": 449
+    },
+    {
+      "epoch": 0.0045,
+      "grad_norm": 0.766625402040655,
+      "learning_rate": 0.00135,
+      "loss": 5.4461,
+      "step": 450
+    },
+    {
+      "epoch": 0.00451,
+      "grad_norm": 0.8992860281272513,
+      "learning_rate": 0.001353,
+      "loss": 5.4411,
+      "step": 451
+    },
+    {
+      "epoch": 0.00452,
+      "grad_norm": 0.9587820940422671,
+      "learning_rate": 0.001356,
+      "loss": 5.4273,
+      "step": 452
+    },
+    {
+      "epoch": 0.00453,
+      "grad_norm": 0.9706881368414635,
+      "learning_rate": 0.001359,
+      "loss": 5.4204,
+      "step": 453
+    },
+    {
+      "epoch": 0.00454,
+      "grad_norm": 1.1942708782025127,
+      "learning_rate": 0.0013620000000000001,
+      "loss": 5.4481,
+      "step": 454
+    },
+    {
+      "epoch": 0.00455,
+      "grad_norm": 1.0651260215062406,
+      "learning_rate": 0.0013650000000000001,
+      "loss": 5.4483,
+      "step": 455
+    },
+    {
+      "epoch": 0.00456,
+      "grad_norm": 1.0894309440916736,
+      "learning_rate": 0.001368,
+      "loss": 5.4405,
+      "step": 456
+    },
+    {
+      "epoch": 0.00457,
+      "grad_norm": 1.3632436316792311,
+      "learning_rate": 0.001371,
+      "loss": 5.442,
+      "step": 457
+    },
+    {
+      "epoch": 0.00458,
+      "grad_norm": 0.9802910013598249,
+      "learning_rate": 0.001374,
+      "loss": 5.4423,
+      "step": 458
+    },
+    {
+      "epoch": 0.00459,
+      "grad_norm": 0.9681577835390196,
+      "learning_rate": 0.0013770000000000002,
+      "loss": 5.4314,
+      "step": 459
+    },
+    {
+      "epoch": 0.0046,
+      "grad_norm": 0.955881976726698,
+      "learning_rate": 0.0013800000000000002,
+      "loss": 5.4146,
+      "step": 460
+    },
+    {
+      "epoch": 0.00461,
+      "grad_norm": 1.072138092727722,
+      "learning_rate": 0.0013830000000000001,
+      "loss": 5.4186,
+      "step": 461
+    },
+    {
+      "epoch": 0.00462,
+      "grad_norm": 0.8672451154734739,
+      "learning_rate": 0.001386,
+      "loss": 5.4193,
+      "step": 462
+    },
+    {
+      "epoch": 0.00463,
+      "grad_norm": 0.9908998654710571,
+      "learning_rate": 0.001389,
+      "loss": 5.4011,
+      "step": 463
+    },
+    {
+      "epoch": 0.00464,
+      "grad_norm": 1.1599842863140275,
+      "learning_rate": 0.001392,
+      "loss": 5.4321,
+      "step": 464
+    },
+    {
+      "epoch": 0.00465,
+      "grad_norm": 0.698212811337685,
+      "learning_rate": 0.0013950000000000002,
+      "loss": 5.3861,
+      "step": 465
+    },
+    {
+      "epoch": 0.00466,
+      "grad_norm": 0.6209828526031017,
+      "learning_rate": 0.0013980000000000002,
+      "loss": 5.4004,
+      "step": 466
+    },
+    {
+      "epoch": 0.00467,
+      "grad_norm": 0.626908730655191,
+      "learning_rate": 0.0014010000000000001,
+      "loss": 5.3894,
+      "step": 467
+    },
+    {
+      "epoch": 0.00468,
+      "grad_norm": 0.6046317726003113,
+      "learning_rate": 0.001404,
+      "loss": 5.3787,
+      "step": 468
+    },
+    {
+      "epoch": 0.00469,
+      "grad_norm": 0.5283854127913247,
+      "learning_rate": 0.001407,
+      "loss": 5.3767,
+      "step": 469
+    },
+    {
+      "epoch": 0.0047,
+      "grad_norm": 0.5494342704278756,
+      "learning_rate": 0.00141,
+      "loss": 5.3859,
+      "step": 470
+    },
+    {
+      "epoch": 0.00471,
+      "grad_norm": 0.6548172150181142,
+      "learning_rate": 0.001413,
+      "loss": 5.3653,
+      "step": 471
+    },
+    {
+      "epoch": 0.00472,
+      "grad_norm": 0.8352552132751145,
+      "learning_rate": 0.001416,
+      "loss": 5.3799,
+      "step": 472
+    },
+    {
+      "epoch": 0.00473,
+      "grad_norm": 1.04775859687285,
+      "learning_rate": 0.001419,
+      "loss": 5.3985,
+      "step": 473
+    },
+    {
+      "epoch": 0.00474,
+      "grad_norm": 0.9808295146566943,
+      "learning_rate": 0.0014219999999999999,
+      "loss": 5.3787,
+      "step": 474
+    },
+    {
+      "epoch": 0.00475,
+      "grad_norm": 1.0065096729700158,
+      "learning_rate": 0.001425,
+      "loss": 5.3725,
+      "step": 475
+    },
+    {
+      "epoch": 0.00476,
+      "grad_norm": 1.0604383614131039,
+      "learning_rate": 0.001428,
+      "loss": 5.4172,
+      "step": 476
+    },
+    {
+      "epoch": 0.00477,
+      "grad_norm": 0.5551620314639825,
+      "learning_rate": 0.001431,
+      "loss": 5.3484,
+      "step": 477
+    },
+    {
+      "epoch": 0.00478,
+      "grad_norm": 0.6446758952809472,
+      "learning_rate": 0.001434,
+      "loss": 5.3819,
+      "step": 478
+    },
+    {
+      "epoch": 0.00479,
+      "grad_norm": 0.6811842486215005,
+      "learning_rate": 0.001437,
+      "loss": 5.3551,
+      "step": 479
+    },
+    {
+      "epoch": 0.0048,
+      "grad_norm": 0.6985415352827566,
+      "learning_rate": 0.0014399999999999999,
+      "loss": 5.3502,
+      "step": 480
+    },
+    {
+      "epoch": 0.00481,
+      "grad_norm": 0.7430626707063542,
+      "learning_rate": 0.001443,
+      "loss": 5.3537,
+      "step": 481
+    },
+    {
+      "epoch": 0.00482,
+      "grad_norm": 0.7883555304691023,
+      "learning_rate": 0.001446,
+      "loss": 5.3281,
+      "step": 482
+    },
+    {
+      "epoch": 0.00483,
+      "grad_norm": 0.9604272864715826,
+      "learning_rate": 0.001449,
+      "loss": 5.3455,
+      "step": 483
+    },
+    {
+      "epoch": 0.00484,
+      "grad_norm": 0.9435245256880889,
+      "learning_rate": 0.001452,
+      "loss": 5.3494,
+      "step": 484
+    },
+    {
+      "epoch": 0.00485,
+      "grad_norm": 0.8042883678590446,
+      "learning_rate": 0.001455,
+      "loss": 5.3511,
+      "step": 485
+    },
+    {
+      "epoch": 0.00486,
+      "grad_norm": 0.7958734942371152,
+      "learning_rate": 0.001458,
+      "loss": 5.3247,
+      "step": 486
+    },
+    {
+      "epoch": 0.00487,
+      "grad_norm": 0.7735191187819994,
+      "learning_rate": 0.001461,
+      "loss": 5.3397,
+      "step": 487
+    },
+    {
+      "epoch": 0.00488,
+      "grad_norm": 1.012081783792506,
+      "learning_rate": 0.001464,
+      "loss": 5.3265,
+      "step": 488
+    },
+    {
+      "epoch": 0.00489,
+      "grad_norm": 0.8905045147335564,
+      "learning_rate": 0.001467,
+      "loss": 5.3131,
+      "step": 489
+    },
+    {
+      "epoch": 0.0049,
+      "grad_norm": 0.8898357576674187,
+      "learning_rate": 0.00147,
+      "loss": 5.3259,
+      "step": 490
+    },
+    {
+      "epoch": 0.00491,
+      "grad_norm": 0.9415043482581119,
+      "learning_rate": 0.001473,
+      "loss": 5.3269,
+      "step": 491
+    },
+    {
+      "epoch": 0.00492,
+      "grad_norm": 0.8320755719970162,
+      "learning_rate": 0.001476,
+      "loss": 5.3196,
+      "step": 492
+    },
+    {
+      "epoch": 0.00493,
+      "grad_norm": 0.8445852574519659,
+      "learning_rate": 0.001479,
+      "loss": 5.3145,
+      "step": 493
+    },
+    {
+      "epoch": 0.00494,
+      "grad_norm": 1.055556747775556,
+      "learning_rate": 0.001482,
+      "loss": 5.3274,
+      "step": 494
+    },
+    {
+      "epoch": 0.00495,
+      "grad_norm": 0.8469107680360277,
+      "learning_rate": 0.001485,
+      "loss": 5.3268,
+      "step": 495
+    },
+    {
+      "epoch": 0.00496,
+      "grad_norm": 0.7635320378283988,
+      "learning_rate": 0.001488,
+      "loss": 5.3083,
+      "step": 496
+    },
+    {
+      "epoch": 0.00497,
+      "grad_norm": 0.8508130432624411,
+      "learning_rate": 0.001491,
+      "loss": 5.2927,
+      "step": 497
+    },
+    {
+      "epoch": 0.00498,
+      "grad_norm": 0.6847944056515368,
+      "learning_rate": 0.001494,
+      "loss": 5.3134,
+      "step": 498
+    },
+    {
+      "epoch": 0.00499,
+      "grad_norm": 0.73441383592788,
+      "learning_rate": 0.001497,
+      "loss": 5.2894,
+      "step": 499
+    },
+    {
+      "epoch": 0.005,
+      "grad_norm": 0.6599139353534871,
+      "learning_rate": 0.0015,
+      "loss": 5.2898,
+      "step": 500
+    },
+    {
+      "epoch": 0.00501,
+      "grad_norm": 0.5852764030687648,
+      "learning_rate": 0.001503,
+      "loss": 5.2881,
+      "step": 501
+    },
+    {
+      "epoch": 0.00502,
+      "grad_norm": 0.6853021422001514,
+      "learning_rate": 0.001506,
+      "loss": 5.2946,
+      "step": 502
+    },
+    {
+      "epoch": 0.00503,
+      "grad_norm": 0.720141729806561,
+      "learning_rate": 0.0015090000000000001,
+      "loss": 5.2988,
+      "step": 503
+    },
+    {
+      "epoch": 0.00504,
+      "grad_norm": 0.7571759373525393,
+      "learning_rate": 0.001512,
+      "loss": 5.2829,
+      "step": 504
+    },
+    {
+      "epoch": 0.00505,
+      "grad_norm": 0.7892633543044816,
+      "learning_rate": 0.001515,
+      "loss": 5.2795,
+      "step": 505
+    },
+    {
+      "epoch": 0.00506,
+      "grad_norm": 0.7628738296895545,
+      "learning_rate": 0.001518,
+      "loss": 5.2778,
+      "step": 506
+    },
+    {
+      "epoch": 0.00507,
+      "grad_norm": 0.7334519380566521,
+      "learning_rate": 0.001521,
+      "loss": 5.2722,
+      "step": 507
+    },
+    {
+      "epoch": 0.00508,
+      "grad_norm": 0.7350882061617702,
+      "learning_rate": 0.001524,
+      "loss": 5.2693,
+      "step": 508
+    },
+    {
+      "epoch": 0.00509,
+      "grad_norm": 0.7407389039658853,
+      "learning_rate": 0.0015270000000000001,
+      "loss": 5.2562,
+      "step": 509
+    },
+    {
+      "epoch": 0.0051,
+      "grad_norm": 0.7104400166661097,
+      "learning_rate": 0.0015300000000000001,
+      "loss": 5.2696,
+      "step": 510
+    },
+    {
+      "epoch": 0.00511,
+      "grad_norm": 0.8484400275698938,
+      "learning_rate": 0.001533,
+      "loss": 5.2541,
+      "step": 511
+    },
+    {
+      "epoch": 0.00512,
+      "grad_norm": 0.7909533536283434,
+      "learning_rate": 0.001536,
+      "loss": 5.2475,
+      "step": 512
+    },
+    {
+      "epoch": 0.00513,
+      "grad_norm": 0.9481517521047255,
+      "learning_rate": 0.001539,
+      "loss": 5.2684,
+      "step": 513
+    },
+    {
+      "epoch": 0.00514,
+      "grad_norm": 1.405665610624422,
+      "learning_rate": 0.001542,
+      "loss": 5.2742,
+      "step": 514
+    },
+    {
+      "epoch": 0.00515,
+      "grad_norm": 0.9237050984729354,
+      "learning_rate": 0.0015450000000000001,
+      "loss": 5.2636,
+      "step": 515
+    },
+    {
+      "epoch": 0.00516,
+      "grad_norm": 1.1634719669204763,
+      "learning_rate": 0.0015480000000000001,
+      "loss": 5.2755,
+      "step": 516
+    },
+    {
+      "epoch": 0.00517,
+      "grad_norm": 0.8585760471561034,
+      "learning_rate": 0.001551,
+      "loss": 5.2577,
+      "step": 517
+    },
+    {
+      "epoch": 0.00518,
+      "grad_norm": 0.728905665920263,
+      "learning_rate": 0.001554,
+      "loss": 5.2422,
+      "step": 518
+    },
+    {
+      "epoch": 0.00519,
+      "grad_norm": 0.841003643450708,
+      "learning_rate": 0.001557,
+      "loss": 5.2472,
+      "step": 519
+    },
+    {
+      "epoch": 0.0052,
+      "grad_norm": 0.8542329179478354,
+      "learning_rate": 0.0015600000000000002,
+      "loss": 5.2573,
+      "step": 520
+    },
+    {
+      "epoch": 0.00521,
+      "grad_norm": 0.9136344705581151,
+      "learning_rate": 0.0015630000000000002,
+      "loss": 5.2327,
+      "step": 521
+    },
+    {
+      "epoch": 0.00522,
+      "grad_norm": 1.1142541660381047,
+      "learning_rate": 0.0015660000000000001,
+      "loss": 5.2435,
+      "step": 522
+    },
+    {
+      "epoch": 0.00523,
+      "grad_norm": 1.2424782927197504,
+      "learning_rate": 0.001569,
+      "loss": 5.2642,
+      "step": 523
+    },
+    {
+      "epoch": 0.00524,
+      "grad_norm": 0.9564855927553343,
+      "learning_rate": 0.001572,
+      "loss": 5.264,
+      "step": 524
+    },
+    {
+      "epoch": 0.00525,
+      "grad_norm": 0.8662515649518988,
+      "learning_rate": 0.001575,
+      "loss": 5.2537,
+      "step": 525
+    },
+    {
+      "epoch": 0.00526,
+      "grad_norm": 0.72079723918141,
+      "learning_rate": 0.0015780000000000002,
+      "loss": 5.2224,
+      "step": 526
+    },
+    {
+      "epoch": 0.00527,
+      "grad_norm": 0.6822789926865414,
+      "learning_rate": 0.0015810000000000002,
+      "loss": 5.2353,
+      "step": 527
+    },
+    {
+      "epoch": 0.00528,
+      "grad_norm": 0.6986034229049856,
+      "learning_rate": 0.0015840000000000001,
+      "loss": 5.2128,
+      "step": 528
+    },
+    {
+      "epoch": 0.00529,
+      "grad_norm": 0.6935526974121139,
+      "learning_rate": 0.001587,
+      "loss": 5.2236,
+      "step": 529
+    },
+    {
+      "epoch": 0.0053,
+      "grad_norm": 0.6635846542220551,
+      "learning_rate": 0.00159,
+      "loss": 5.2302,
+      "step": 530
+    },
+    {
+      "epoch": 0.00531,
+      "grad_norm": 0.7197123783695031,
+      "learning_rate": 0.001593,
+      "loss": 5.2167,
+      "step": 531
+    },
+    {
+      "epoch": 0.00532,
+      "grad_norm": 0.6862921713046577,
+      "learning_rate": 0.0015960000000000002,
+      "loss": 5.2205,
+      "step": 532
+    },
+    {
+      "epoch": 0.00533,
+      "grad_norm": 0.672254585940336,
+      "learning_rate": 0.0015990000000000002,
+      "loss": 5.2165,
+      "step": 533
+    },
+    {
+      "epoch": 0.00534,
+      "grad_norm": 0.6180247915886188,
+      "learning_rate": 0.0016020000000000001,
+      "loss": 5.2018,
+      "step": 534
+    },
+    {
+      "epoch": 0.00535,
+      "grad_norm": 0.7076887136252737,
+      "learning_rate": 0.001605,
+      "loss": 5.2099,
+      "step": 535
+    },
+    {
+      "epoch": 0.00536,
+      "grad_norm": 0.8627381010586813,
+      "learning_rate": 0.001608,
+      "loss": 5.2158,
+      "step": 536
+    },
+    {
+      "epoch": 0.00537,
+      "grad_norm": 0.9890089503230703,
+      "learning_rate": 0.0016110000000000002,
+      "loss": 5.2125,
+      "step": 537
+    },
+    {
+      "epoch": 0.00538,
+      "grad_norm": 1.0890684861329838,
+      "learning_rate": 0.0016140000000000002,
+      "loss": 5.1997,
+      "step": 538
+    },
+    {
+      "epoch": 0.00539,
+      "grad_norm": 0.7898695514456295,
+      "learning_rate": 0.0016170000000000002,
+      "loss": 5.1885,
+      "step": 539
+    },
+    {
+      "epoch": 0.0054,
+      "grad_norm": 0.7877858658336557,
+      "learning_rate": 0.0016200000000000001,
+      "loss": 5.1979,
+      "step": 540
+    },
+    {
+      "epoch": 0.00541,
+      "grad_norm": 0.8613625325852945,
+      "learning_rate": 0.001623,
+      "loss": 5.1884,
+      "step": 541
+    },
+    {
+      "epoch": 0.00542,
+      "grad_norm": 0.9310959295325639,
+      "learning_rate": 0.001626,
+      "loss": 5.2135,
+      "step": 542
+    },
+    {
+      "epoch": 0.00543,
+      "grad_norm": 1.0650490028229627,
+      "learning_rate": 0.0016290000000000002,
+      "loss": 5.1821,
+      "step": 543
+    },
+    {
+      "epoch": 0.00544,
+      "grad_norm": 1.058155288535174,
+      "learning_rate": 0.0016320000000000002,
+      "loss": 5.2106,
+      "step": 544
+    },
+    {
+      "epoch": 0.00545,
+      "grad_norm": 0.7888748411290551,
+      "learning_rate": 0.0016350000000000002,
+      "loss": 5.2093,
+      "step": 545
+    },
+    {
+      "epoch": 0.00546,
+      "grad_norm": 0.8327623405474364,
+      "learning_rate": 0.0016380000000000001,
+      "loss": 5.1821,
+      "step": 546
+    },
+    {
+      "epoch": 0.00547,
+      "grad_norm": 0.9359889846839972,
+      "learning_rate": 0.001641,
+      "loss": 5.1867,
+      "step": 547
+    },
+    {
+      "epoch": 0.00548,
+      "grad_norm": 0.8250520922796094,
+      "learning_rate": 0.001644,
+      "loss": 5.1781,
+      "step": 548
+    },
+    {
+      "epoch": 0.00549,
+      "grad_norm": 0.8346002681162655,
+      "learning_rate": 0.0016470000000000002,
+      "loss": 5.1786,
+      "step": 549
+    },
+    {
+      "epoch": 0.0055,
+      "grad_norm": 0.8307775357404767,
+      "learning_rate": 0.0016500000000000002,
+      "loss": 5.1901,
+      "step": 550
+    },
+    {
+      "epoch": 0.00551,
+      "grad_norm": 0.8868587049067305,
+      "learning_rate": 0.0016530000000000002,
+      "loss": 5.1882,
+      "step": 551
+    },
+    {
+      "epoch": 0.00552,
+      "grad_norm": 1.0021721179116465,
+      "learning_rate": 0.0016560000000000001,
+      "loss": 5.1887,
+      "step": 552
+    },
+    {
+      "epoch": 0.00553,
+      "grad_norm": 1.0435640829012027,
+      "learning_rate": 0.001659,
+      "loss": 5.1828,
+      "step": 553
+    },
+    {
+      "epoch": 0.00554,
+      "grad_norm": 1.091952614784726,
+      "learning_rate": 0.0016620000000000003,
+      "loss": 5.1843,
+      "step": 554
+    },
+    {
+      "epoch": 0.00555,
+      "grad_norm": 0.7530351004511261,
+      "learning_rate": 0.0016650000000000002,
+      "loss": 5.1804,
+      "step": 555
+    },
+    {
+      "epoch": 0.00556,
+      "grad_norm": 0.7840106393058217,
+      "learning_rate": 0.0016680000000000002,
+      "loss": 5.1664,
+      "step": 556
+    },
+    {
+      "epoch": 0.00557,
+      "grad_norm": 0.6999816254444311,
+      "learning_rate": 0.0016710000000000002,
+      "loss": 5.1437,
+      "step": 557
+    },
+    {
+      "epoch": 0.00558,
+      "grad_norm": 0.7845980119871422,
+      "learning_rate": 0.0016740000000000001,
+      "loss": 5.1792,
+      "step": 558
+    },
+    {
+      "epoch": 0.00559,
+      "grad_norm": 1.002237738003299,
+      "learning_rate": 0.001677,
+      "loss": 5.165,
+      "step": 559
+    },
+    {
+      "epoch": 0.0056,
+      "grad_norm": 1.1590360602458978,
+      "learning_rate": 0.0016800000000000003,
+      "loss": 5.15,
+      "step": 560
+    },
+    {
+      "epoch": 0.00561,
+      "grad_norm": 0.9693350319936842,
+      "learning_rate": 0.0016830000000000003,
+      "loss": 5.1673,
+      "step": 561
+    },
+    {
+      "epoch": 0.00562,
+      "grad_norm": 0.9337806686381701,
+      "learning_rate": 0.0016860000000000002,
+      "loss": 5.1656,
+      "step": 562
+    },
+    {
+      "epoch": 0.00563,
+      "grad_norm": 0.7021371152666548,
+      "learning_rate": 0.001689,
+      "loss": 5.1554,
+      "step": 563
+    },
+    {
+      "epoch": 0.00564,
+      "grad_norm": 0.761762323285238,
+      "learning_rate": 0.001692,
+      "loss": 5.1364,
+      "step": 564
+    },
+    {
+      "epoch": 0.00565,
+      "grad_norm": 0.9962272803832337,
+      "learning_rate": 0.001695,
+      "loss": 5.1465,
+      "step": 565
+    },
+    {
+      "epoch": 0.00566,
+      "grad_norm": 0.9270663941212276,
+      "learning_rate": 0.0016979999999999999,
+      "loss": 5.1454,
+      "step": 566
+    },
+    {
+      "epoch": 0.00567,
+      "grad_norm": 1.0296751705367089,
+      "learning_rate": 0.0017009999999999998,
+      "loss": 5.1403,
+      "step": 567
+    },
+    {
+      "epoch": 0.00568,
+      "grad_norm": 1.0921308967550072,
+      "learning_rate": 0.0017039999999999998,
+      "loss": 5.1647,
+      "step": 568
+    },
+    {
+      "epoch": 0.00569,
+      "grad_norm": 0.9680118526613578,
+      "learning_rate": 0.001707,
+      "loss": 5.1376,
+      "step": 569
+    },
+    {
+      "epoch": 0.0057,
+      "grad_norm": 1.1373894554942883,
+      "learning_rate": 0.00171,
+      "loss": 5.1667,
+      "step": 570
+    },
+    {
+      "epoch": 0.00571,
+      "grad_norm": 0.9491793536820188,
+      "learning_rate": 0.001713,
+      "loss": 5.1618,
+      "step": 571
+    },
+    {
+      "epoch": 0.00572,
+      "grad_norm": 1.0938883440367575,
+      "learning_rate": 0.0017159999999999999,
+      "loss": 5.1499,
+      "step": 572
+    },
+    {
+      "epoch": 0.00573,
+      "grad_norm": 0.8953824806877001,
+      "learning_rate": 0.0017189999999999998,
+      "loss": 5.1459,
+      "step": 573
+    },
+    {
+      "epoch": 0.00574,
+      "grad_norm": 0.8777825611555061,
+      "learning_rate": 0.001722,
+      "loss": 5.1467,
+      "step": 574
+    },
+    {
+      "epoch": 0.00575,
+      "grad_norm": 0.9427806830230203,
+      "learning_rate": 0.001725,
+      "loss": 5.132,
+      "step": 575
+    },
+    {
+      "epoch": 0.00576,
+      "grad_norm": 0.8846520959631657,
+      "learning_rate": 0.001728,
+      "loss": 5.1269,
+      "step": 576
+    },
+    {
+      "epoch": 0.00577,
+      "grad_norm": 0.6344552018416748,
+      "learning_rate": 0.001731,
+      "loss": 5.1395,
+      "step": 577
+    },
+    {
+      "epoch": 0.00578,
+      "grad_norm": 0.6771922101340231,
+      "learning_rate": 0.0017339999999999999,
+      "loss": 5.1199,
+      "step": 578
+    },
+    {
+      "epoch": 0.00579,
+      "grad_norm": 0.5381676919488529,
+      "learning_rate": 0.0017369999999999998,
+      "loss": 5.1001,
+      "step": 579
+    },
+    {
+      "epoch": 0.0058,
+      "grad_norm": 0.5416857271346956,
+      "learning_rate": 0.00174,
+      "loss": 5.1179,
+      "step": 580
+    },
+    {
+      "epoch": 0.00581,
+      "grad_norm": 0.4886409554358658,
+      "learning_rate": 0.001743,
+      "loss": 5.1131,
+      "step": 581
+    },
+    {
+      "epoch": 0.00582,
+      "grad_norm": 0.3953796837190132,
+      "learning_rate": 0.001746,
+      "loss": 5.0842,
+      "step": 582
+    },
+    {
+      "epoch": 0.00583,
+      "grad_norm": 0.3816231764982968,
+      "learning_rate": 0.001749,
+      "loss": 5.1076,
+      "step": 583
+    },
+    {
+      "epoch": 0.00584,
+      "grad_norm": 0.39062356918599367,
+      "learning_rate": 0.0017519999999999999,
+      "loss": 5.1003,
+      "step": 584
+    },
+    {
+      "epoch": 0.00585,
+      "grad_norm": 0.3915858898605007,
+      "learning_rate": 0.0017549999999999998,
+      "loss": 5.0837,
+      "step": 585
+    },
+    {
+      "epoch": 0.00586,
+      "grad_norm": 0.4063811781565053,
+      "learning_rate": 0.001758,
+      "loss": 5.0866,
+      "step": 586
+    },
+    {
+      "epoch": 0.00587,
+      "grad_norm": 0.4274699383917251,
+      "learning_rate": 0.001761,
+      "loss": 5.0709,
+      "step": 587
+    },
+    {
+      "epoch": 0.00588,
+      "grad_norm": 0.5008029306674315,
+      "learning_rate": 0.001764,
+      "loss": 5.0767,
+      "step": 588
+    },
+    {
+      "epoch": 0.00589,
+      "grad_norm": 0.7229655060897183,
+      "learning_rate": 0.001767,
+      "loss": 5.084,
+      "step": 589
+    },
+    {
+      "epoch": 0.0059,
+      "grad_norm": 0.9281114393382421,
+      "learning_rate": 0.0017699999999999999,
+      "loss": 5.0779,
+      "step": 590
+    },
+    {
+      "epoch": 0.00591,
+      "grad_norm": 0.9689787780419555,
+      "learning_rate": 0.001773,
+      "loss": 5.0756,
+      "step": 591
+    },
+    {
+      "epoch": 0.00592,
+      "grad_norm": 0.8435580831880815,
+      "learning_rate": 0.001776,
+      "loss": 5.0975,
+      "step": 592
+    },
+    {
+      "epoch": 0.00593,
+      "grad_norm": 0.9986679613531879,
+      "learning_rate": 0.001779,
+      "loss": 5.098,
+      "step": 593
+    },
+    {
+      "epoch": 0.00594,
+      "grad_norm": 1.0996840470415932,
+      "learning_rate": 0.001782,
+      "loss": 5.1014,
+      "step": 594
+    },
+    {
+      "epoch": 0.00595,
+      "grad_norm": 0.9601369241773534,
+      "learning_rate": 0.001785,
+      "loss": 5.0957,
+      "step": 595
+    },
+    {
+      "epoch": 0.00596,
+      "grad_norm": 0.8606352953891906,
+      "learning_rate": 0.0017879999999999999,
+      "loss": 5.0932,
+      "step": 596
+    },
+    {
+      "epoch": 0.00597,
+      "grad_norm": 0.8716680293105395,
+      "learning_rate": 0.001791,
+      "loss": 5.066,
+      "step": 597
+    },
+    {
+      "epoch": 0.00598,
+      "grad_norm": 0.9954805607751368,
+      "learning_rate": 0.001794,
+      "loss": 5.0837,
+      "step": 598
+    },
+    {
+      "epoch": 0.00599,
+      "grad_norm": 1.0139742121604893,
+      "learning_rate": 0.001797,
+      "loss": 5.0892,
+      "step": 599
+    },
+    {
+      "epoch": 0.006,
+      "grad_norm": 0.8994983668637855,
+      "learning_rate": 0.0018,
+      "loss": 5.083,
+      "step": 600
+    },
+    {
+      "epoch": 0.00601,
+      "grad_norm": 1.012817382509115,
+      "learning_rate": 0.001803,
+      "loss": 5.0872,
+      "step": 601
+    },
+    {
+      "epoch": 0.00602,
+      "grad_norm": 0.829792766990954,
+      "learning_rate": 0.0018059999999999999,
+      "loss": 5.0655,
+      "step": 602
+    },
+    {
+      "epoch": 0.00603,
+      "grad_norm": 0.8978641621726422,
+      "learning_rate": 0.001809,
+      "loss": 5.0896,
+      "step": 603
+    },
+    {
+      "epoch": 0.00604,
+      "grad_norm": 1.006526980400353,
+      "learning_rate": 0.001812,
+      "loss": 5.0733,
+      "step": 604
+    },
+    {
+      "epoch": 0.00605,
+      "grad_norm": 0.9664384705752951,
+      "learning_rate": 0.001815,
+      "loss": 5.0828,
+      "step": 605
+    },
+    {
+      "epoch": 0.00606,
+      "grad_norm": 1.0962791606856168,
+      "learning_rate": 0.001818,
+      "loss": 5.0888,
+      "step": 606
+    },
+    {
+      "epoch": 0.00607,
+      "grad_norm": 0.9313257227556363,
+      "learning_rate": 0.001821,
+      "loss": 5.0876,
+      "step": 607
+    },
+    {
+      "epoch": 0.00608,
+      "grad_norm": 0.9207616431206566,
+      "learning_rate": 0.001824,
+      "loss": 5.068,
+      "step": 608
+    },
+    {
+      "epoch": 0.00609,
+      "grad_norm": 0.9552257534317795,
+      "learning_rate": 0.001827,
+      "loss": 5.0751,
+      "step": 609
+    },
+    {
+      "epoch": 0.0061,
+      "grad_norm": 1.1338881088580717,
+      "learning_rate": 0.00183,
+      "loss": 5.062,
+      "step": 610
+    },
+    {
+      "epoch": 0.00611,
+      "grad_norm": 0.907481169345242,
+      "learning_rate": 0.001833,
+      "loss": 5.0576,
+      "step": 611
+    },
+    {
+      "epoch": 0.00612,
+      "grad_norm": 1.04757168088542,
+      "learning_rate": 0.001836,
+      "loss": 5.0686,
+      "step": 612
+    },
+    {
+      "epoch": 0.00613,
+      "grad_norm": 0.9021638869008188,
+      "learning_rate": 0.001839,
+      "loss": 5.0559,
+      "step": 613
+    },
+    {
+      "epoch": 0.00614,
+      "grad_norm": 0.7877209937196055,
+      "learning_rate": 0.001842,
+      "loss": 5.0768,
+      "step": 614
+    },
+    {
+      "epoch": 0.00615,
+      "grad_norm": 0.7456491069500266,
+      "learning_rate": 0.001845,
+      "loss": 5.0572,
+      "step": 615
+    },
+    {
+      "epoch": 0.00616,
+      "grad_norm": 0.7643816652567319,
+      "learning_rate": 0.001848,
+      "loss": 5.043,
+      "step": 616
+    },
+    {
+      "epoch": 0.00617,
+      "grad_norm": 0.8920948602141958,
+      "learning_rate": 0.001851,
+      "loss": 5.0555,
+      "step": 617
+    },
+    {
+      "epoch": 0.00618,
+      "grad_norm": 1.0257178323795717,
+      "learning_rate": 0.001854,
+      "loss": 5.0507,
+      "step": 618
+    },
+    {
+      "epoch": 0.00619,
+      "grad_norm": 0.7092119630036949,
+      "learning_rate": 0.001857,
+      "loss": 5.0429,
+      "step": 619
+    },
+    {
+      "epoch": 0.0062,
+      "grad_norm": 0.6092469798525071,
+      "learning_rate": 0.00186,
+      "loss": 5.0262,
+      "step": 620
+    },
+    {
+      "epoch": 0.00621,
+      "grad_norm": 0.7552706683211996,
+      "learning_rate": 0.001863,
+      "loss": 5.032,
+      "step": 621
+    },
+    {
+      "epoch": 0.00622,
+      "grad_norm": 0.6988650940233329,
+      "learning_rate": 0.001866,
+      "loss": 5.0245,
+      "step": 622
+    },
+    {
+      "epoch": 0.00623,
+      "grad_norm": 0.6062593525325608,
+      "learning_rate": 0.001869,
+      "loss": 5.0091,
+      "step": 623
+    },
+    {
+      "epoch": 0.00624,
+      "grad_norm": 0.555479795278756,
+      "learning_rate": 0.001872,
+      "loss": 5.0161,
+      "step": 624
+    },
+    {
+      "epoch": 0.00625,
+      "grad_norm": 0.6339519418385815,
+      "learning_rate": 0.001875,
+      "loss": 5.0276,
+      "step": 625
+    },
+    {
+      "epoch": 0.00626,
+      "grad_norm": 0.7820017260467441,
+      "learning_rate": 0.0018780000000000001,
+      "loss": 5.0084,
+      "step": 626
+    },
+    {
+      "epoch": 0.00627,
+      "grad_norm": 0.8263278266088275,
+      "learning_rate": 0.001881,
+      "loss": 5.0003,
+      "step": 627
+    },
+    {
+      "epoch": 0.00628,
+      "grad_norm": 0.5717634931424201,
+      "learning_rate": 0.001884,
+      "loss": 5.0204,
+      "step": 628
+    },
+    {
+      "epoch": 0.00629,
+      "grad_norm": 0.5505525631139665,
+      "learning_rate": 0.001887,
+      "loss": 4.9928,
+      "step": 629
+    },
+    {
+      "epoch": 0.0063,
+      "grad_norm": 0.47331922258372455,
+      "learning_rate": 0.00189,
+      "loss": 4.9837,
+      "step": 630
+    },
+    {
+      "epoch": 0.00631,
+      "grad_norm": 0.4173073508747504,
+      "learning_rate": 0.0018930000000000002,
+      "loss": 4.9757,
+      "step": 631
+    },
+    {
+      "epoch": 0.00632,
+      "grad_norm": 0.42808966750972455,
+      "learning_rate": 0.0018960000000000001,
+      "loss": 4.9928,
+      "step": 632
+    },
+    {
+      "epoch": 0.00633,
+      "grad_norm": 0.46750467774391197,
+      "learning_rate": 0.001899,
+      "loss": 4.9879,
+      "step": 633
+    },
+    {
+      "epoch": 0.00634,
+      "grad_norm": 0.5696215613123979,
+      "learning_rate": 0.001902,
+      "loss": 4.9763,
+      "step": 634
+    },
+    {
+      "epoch": 0.00635,
+      "grad_norm": 0.7138673203190975,
+      "learning_rate": 0.001905,
+      "loss": 4.9686,
+      "step": 635
+    },
+    {
+      "epoch": 0.00636,
+      "grad_norm": 0.8836310721952346,
+      "learning_rate": 0.001908,
+      "loss": 4.9828,
+      "step": 636
+    },
+    {
+      "epoch": 0.00637,
+      "grad_norm": 0.9755328249694639,
+      "learning_rate": 0.0019110000000000002,
+      "loss": 5.002,
+      "step": 637
+    },
+    {
+      "epoch": 0.00638,
+      "grad_norm": 1.3272264449165134,
+      "learning_rate": 0.0019140000000000001,
+      "loss": 5.0051,
+      "step": 638
+    },
+    {
+      "epoch": 0.00639,
+      "grad_norm": 0.9140856571338883,
+      "learning_rate": 0.001917,
+      "loss": 4.9726,
+      "step": 639
+    },
+    {
+      "epoch": 0.0064,
+      "grad_norm": 1.0466736994412218,
+      "learning_rate": 0.00192,
+      "loss": 4.9869,
+      "step": 640
+    },
+    {
+      "epoch": 0.00641,
+      "grad_norm": 1.1161814084223103,
+      "learning_rate": 0.001923,
+      "loss": 5.0003,
+      "step": 641
+    },
+    {
+      "epoch": 0.00642,
+      "grad_norm": 1.1352153221992676,
+      "learning_rate": 0.001926,
+      "loss": 4.992,
+      "step": 642
+    },
+    {
+      "epoch": 0.00643,
+      "grad_norm": 1.2268384891507862,
+      "learning_rate": 0.0019290000000000002,
+      "loss": 5.008,
+      "step": 643
+    },
+    {
+      "epoch": 0.00644,
+      "grad_norm": 0.9564122465750423,
+      "learning_rate": 0.0019320000000000001,
+      "loss": 4.9857,
+      "step": 644
+    },
+    {
+      "epoch": 0.00645,
+      "grad_norm": 0.9066234784688915,
+      "learning_rate": 0.001935,
+      "loss": 4.9889,
+      "step": 645
+    },
+    {
+      "epoch": 0.00646,
+      "grad_norm": 0.894776309426942,
+      "learning_rate": 0.001938,
+      "loss": 4.995,
+      "step": 646
+    },
+    {
+      "epoch": 0.00647,
+      "grad_norm": 1.036514260058091,
+      "learning_rate": 0.001941,
+      "loss": 5.0081,
+      "step": 647
+    },
+    {
+      "epoch": 0.00648,
+      "grad_norm": 1.0669688261896326,
+      "learning_rate": 0.0019440000000000002,
+      "loss": 4.9991,
+      "step": 648
+    },
+    {
+      "epoch": 0.00649,
+      "grad_norm": 1.0027602192299327,
+      "learning_rate": 0.0019470000000000002,
+      "loss": 4.9834,
+      "step": 649
+    },
+    {
+      "epoch": 0.0065,
+      "grad_norm": 1.2453243505592877,
+      "learning_rate": 0.0019500000000000001,
+      "loss": 5.0183,
+      "step": 650
+    },
+    {
+      "epoch": 0.00651,
+      "grad_norm": 0.9810465710779535,
+      "learning_rate": 0.001953,
+      "loss": 4.9899,
+      "step": 651
+    },
+    {
+      "epoch": 0.00652,
+      "grad_norm": 1.0529607937914427,
+      "learning_rate": 0.0019560000000000003,
+      "loss": 4.9855,
+      "step": 652
+    },
+    {
+      "epoch": 0.00653,
+      "grad_norm": 0.8367844014470341,
+      "learning_rate": 0.0019590000000000002,
+      "loss": 4.9719,
+      "step": 653
+    },
+    {
+      "epoch": 0.00654,
+      "grad_norm": 0.9556352679930769,
+      "learning_rate": 0.001962,
+      "loss": 4.96,
+      "step": 654
+    },
+    {
+      "epoch": 0.00655,
+      "grad_norm": 1.0712859822115042,
+      "learning_rate": 0.001965,
+      "loss": 4.9811,
+      "step": 655
+    },
+    {
+      "epoch": 0.00656,
+      "grad_norm": 0.8381525542412449,
+      "learning_rate": 0.001968,
+      "loss": 4.9628,
+      "step": 656
+    },
+    {
+      "epoch": 0.00657,
+      "grad_norm": 0.853522104902103,
+      "learning_rate": 0.001971,
+      "loss": 4.9486,
+      "step": 657
+    },
+    {
+      "epoch": 0.00658,
+      "grad_norm": 0.9276507218825019,
+      "learning_rate": 0.001974,
+      "loss": 4.9746,
+      "step": 658
+    },
+    {
+      "epoch": 0.00659,
+      "grad_norm": 1.0041122671115763,
+      "learning_rate": 0.001977,
+      "loss": 4.9552,
+      "step": 659
+    },
+    {
+      "epoch": 0.0066,
+      "grad_norm": 0.925609681342879,
+      "learning_rate": 0.00198,
+      "loss": 4.9516,
+      "step": 660
+    },
+    {
+      "epoch": 0.00661,
+      "grad_norm": 0.9340897694356347,
+      "learning_rate": 0.001983,
+      "loss": 4.9301,
+      "step": 661
+    },
+    {
+      "epoch": 0.00662,
+      "grad_norm": 1.1039924911609775,
+      "learning_rate": 0.0019860000000000004,
+      "loss": 4.9353,
+      "step": 662
+    },
+    {
+      "epoch": 0.00663,
+      "grad_norm": 0.842271096957419,
+      "learning_rate": 0.0019890000000000003,
+      "loss": 4.942,
+      "step": 663
+    },
+    {
+      "epoch": 0.00664,
+      "grad_norm": 0.9255021185692508,
+      "learning_rate": 0.0019920000000000003,
+      "loss": 4.9294,
+      "step": 664
+    },
+    {
+      "epoch": 0.00665,
+      "grad_norm": 0.9624840652606003,
+      "learning_rate": 0.0019950000000000002,
+      "loss": 4.9033,
+      "step": 665
+    },
+    {
+      "epoch": 0.00666,
+      "grad_norm": 0.8706912476713129,
+      "learning_rate": 0.001998,
+      "loss": 4.9275,
+      "step": 666
+    },
+    {
+      "epoch": 0.00667,
+      "grad_norm": 0.9355886153293348,
+      "learning_rate": 0.002001,
+      "loss": 4.9186,
+      "step": 667
+    },
+    {
+      "epoch": 0.00668,
+      "grad_norm": 0.9525697321085718,
+      "learning_rate": 0.002004,
+      "loss": 4.9422,
+      "step": 668
+    },
+    {
+      "epoch": 0.00669,
+      "grad_norm": 0.9357519892866003,
+      "learning_rate": 0.002007,
+      "loss": 4.9423,
+      "step": 669
+    },
+    {
+      "epoch": 0.0067,
+      "grad_norm": 0.8227655724451596,
+      "learning_rate": 0.00201,
+      "loss": 4.9287,
+      "step": 670
+    },
+    {
+      "epoch": 0.00671,
+      "grad_norm": 0.6363703099565691,
+      "learning_rate": 0.002013,
+      "loss": 4.8975,
+      "step": 671
+    },
+    {
+      "epoch": 0.00672,
+      "grad_norm": 0.5774852286623988,
+      "learning_rate": 0.002016,
+      "loss": 4.894,
+      "step": 672
+    },
+    {
+      "epoch": 0.00673,
+      "grad_norm": 0.5472152449551199,
+      "learning_rate": 0.002019,
+      "loss": 4.8881,
+      "step": 673
+    },
+    {
+      "epoch": 0.00674,
+      "grad_norm": 0.5267919584843693,
+      "learning_rate": 0.0020220000000000004,
+      "loss": 4.8857,
+      "step": 674
+    },
+    {
+      "epoch": 0.00675,
+      "grad_norm": 0.4823533534454447,
+      "learning_rate": 0.0020250000000000003,
+      "loss": 4.8922,
+      "step": 675
+    },
+    {
+      "epoch": 0.00676,
+      "grad_norm": 0.5135326218761617,
+      "learning_rate": 0.0020280000000000003,
+      "loss": 4.8739,
+      "step": 676
+    },
+    {
+      "epoch": 0.00677,
+      "grad_norm": 0.5629070673817519,
+      "learning_rate": 0.0020310000000000003,
+      "loss": 4.8753,
+      "step": 677
+    },
+    {
+      "epoch": 0.00678,
+      "grad_norm": 0.6636518437686225,
+      "learning_rate": 0.0020340000000000002,
+      "loss": 4.8844,
+      "step": 678
+    },
+    {
+      "epoch": 0.00679,
+      "grad_norm": 0.7939150938071041,
+      "learning_rate": 0.002037,
+      "loss": 4.8694,
+      "step": 679
+    },
+    {
+      "epoch": 0.0068,
+      "grad_norm": 0.6961050241971515,
+      "learning_rate": 0.00204,
+      "loss": 4.8681,
+      "step": 680
+    },
+    {
+      "epoch": 0.00681,
+      "grad_norm": 0.55281231514776,
+      "learning_rate": 0.002043,
+      "loss": 4.854,
+      "step": 681
+    },
+    {
+      "epoch": 0.00682,
+      "grad_norm": 0.6966375314025457,
+      "learning_rate": 0.002046,
+      "loss": 4.8606,
+      "step": 682
+    },
+    {
+      "epoch": 0.00683,
+      "grad_norm": 0.7919918537287608,
+      "learning_rate": 0.002049,
+      "loss": 4.8437,
+      "step": 683
+    },
+    {
+      "epoch": 0.00684,
+      "grad_norm": 1.014505658611748,
+      "learning_rate": 0.002052,
+      "loss": 4.868,
+      "step": 684
+    },
+    {
+      "epoch": 0.00685,
+      "grad_norm": 1.1979460789898289,
+      "learning_rate": 0.0020550000000000004,
+      "loss": 4.8824,
+      "step": 685
+    },
+    {
+      "epoch": 0.00686,
+      "grad_norm": 1.0209197434565493,
+      "learning_rate": 0.0020580000000000004,
+      "loss": 4.8555,
+      "step": 686
+    },
+    {
+      "epoch": 0.00687,
+      "grad_norm": 0.8666785819902827,
+      "learning_rate": 0.0020610000000000003,
+      "loss": 4.8457,
+      "step": 687
+    },
+    {
+      "epoch": 0.00688,
+      "grad_norm": 1.066277682185938,
+      "learning_rate": 0.002064,
+      "loss": 4.8576,
+      "step": 688
+    },
+    {
+      "epoch": 0.00689,
+      "grad_norm": 0.9564373481813865,
+      "learning_rate": 0.002067,
+      "loss": 4.8511,
+      "step": 689
+    },
+    {
+      "epoch": 0.0069,
+      "grad_norm": 1.0921039434762347,
+      "learning_rate": 0.00207,
+      "loss": 4.854,
+      "step": 690
+    },
+    {
+      "epoch": 0.00691,
+      "grad_norm": 0.9682767243250711,
+      "learning_rate": 0.0020729999999999998,
+      "loss": 4.8265,
+      "step": 691
+    },
+    {
+      "epoch": 0.00692,
+      "grad_norm": 1.0182149348151321,
+      "learning_rate": 0.0020759999999999997,
+      "loss": 4.8523,
+      "step": 692
+    },
+    {
+      "epoch": 0.00693,
+      "grad_norm": 1.1559582715656997,
+      "learning_rate": 0.0020789999999999997,
+      "loss": 4.8626,
+      "step": 693
+    },
+    {
+      "epoch": 0.00694,
+      "grad_norm": 0.8731253854999043,
+      "learning_rate": 0.002082,
+      "loss": 4.8314,
+      "step": 694
+    },
+    {
+      "epoch": 0.00695,
+      "grad_norm": 1.02924772696324,
+      "learning_rate": 0.002085,
+      "loss": 4.8388,
+      "step": 695
+    },
+    {
+      "epoch": 0.00696,
+      "grad_norm": 1.1148665982097032,
+      "learning_rate": 0.002088,
+      "loss": 4.8532,
+      "step": 696
+    },
+    {
+      "epoch": 0.00697,
+      "grad_norm": 1.1306884538795905,
+      "learning_rate": 0.002091,
+      "loss": 4.8556,
+      "step": 697
+    },
+    {
+      "epoch": 0.00698,
+      "grad_norm": 1.267565149046305,
+      "learning_rate": 0.002094,
+      "loss": 4.8474,
+      "step": 698
+    },
+    {
+      "epoch": 0.00699,
+      "grad_norm": 0.9067921371830103,
+      "learning_rate": 0.002097,
+      "loss": 4.8312,
+      "step": 699
+    },
+    {
+      "epoch": 0.007,
+      "grad_norm": 0.8797095491461238,
+      "learning_rate": 0.0021,
+      "loss": 4.8378,
+      "step": 700
+    },
+    {
+      "epoch": 0.00701,
+      "grad_norm": 0.9764684557107473,
+      "learning_rate": 0.002103,
+      "loss": 4.8231,
+      "step": 701
+    },
+    {
+      "epoch": 0.00702,
+      "grad_norm": 0.9975561372265659,
+      "learning_rate": 0.002106,
+      "loss": 4.8269,
+      "step": 702
+    },
+    {
+      "epoch": 0.00703,
+      "grad_norm": 0.8409706105195134,
+      "learning_rate": 0.0021089999999999998,
+      "loss": 4.8046,
+      "step": 703
+    },
+    {
+      "epoch": 0.00704,
+      "grad_norm": 0.8602349489288845,
+      "learning_rate": 0.0021119999999999997,
+      "loss": 4.8112,
+      "step": 704
+    },
+    {
+      "epoch": 0.00705,
+      "grad_norm": 0.810283974291816,
+      "learning_rate": 0.002115,
+      "loss": 4.8189,
+      "step": 705
+    },
+    {
+      "epoch": 0.00706,
+      "grad_norm": 0.7904979262360543,
+      "learning_rate": 0.002118,
+      "loss": 4.8127,
+      "step": 706
+    },
+    {
+      "epoch": 0.00707,
+      "grad_norm": 0.8638007527609218,
+      "learning_rate": 0.002121,
+      "loss": 4.7902,
+      "step": 707
+    },
+    {
+      "epoch": 0.00708,
+      "grad_norm": 0.9274642295068019,
+      "learning_rate": 0.002124,
+      "loss": 4.7754,
+      "step": 708
+    },
+    {
+      "epoch": 0.00709,
+      "grad_norm": 0.9265048264631317,
+      "learning_rate": 0.002127,
+      "loss": 4.8051,
+      "step": 709
+    },
+    {
+      "epoch": 0.0071,
+      "grad_norm": 1.0021235226233682,
+      "learning_rate": 0.00213,
+      "loss": 4.8021,
+      "step": 710
+    },
+    {
+      "epoch": 0.00711,
+      "grad_norm": 0.8616225643918476,
+      "learning_rate": 0.002133,
+      "loss": 4.7687,
+      "step": 711
+    },
+    {
+      "epoch": 0.00712,
+      "grad_norm": 1.1509118831082872,
+      "learning_rate": 0.002136,
+      "loss": 4.8063,
+      "step": 712
+    },
+    {
+      "epoch": 0.00713,
+      "grad_norm": 0.8979386810595619,
+      "learning_rate": 0.002139,
+      "loss": 4.8085,
+      "step": 713
+    },
+    {
+      "epoch": 0.00714,
+      "grad_norm": 0.9672478380991173,
+      "learning_rate": 0.002142,
+      "loss": 4.7999,
+      "step": 714
+    },
+    {
+      "epoch": 0.00715,
+      "grad_norm": 1.0283238664767786,
+      "learning_rate": 0.0021449999999999998,
+      "loss": 4.7746,
+      "step": 715
+    },
+    {
+      "epoch": 0.00716,
+      "grad_norm": 0.8031836486660412,
+      "learning_rate": 0.002148,
+      "loss": 4.7611,
+      "step": 716
+    },
+    {
+      "epoch": 0.00717,
+      "grad_norm": 0.8079202930068127,
+      "learning_rate": 0.002151,
+      "loss": 4.7802,
+      "step": 717
+    },
+    {
+      "epoch": 0.00718,
+      "grad_norm": 0.7625878769693025,
+      "learning_rate": 0.002154,
+      "loss": 4.7607,
+      "step": 718
+    },
+    {
+      "epoch": 0.00719,
+      "grad_norm": 0.8833410128202536,
+      "learning_rate": 0.002157,
+      "loss": 4.7767,
+      "step": 719
+    },
+    {
+      "epoch": 0.0072,
+      "grad_norm": 1.0099718860880083,
+      "learning_rate": 0.00216,
+      "loss": 4.7733,
+      "step": 720
+    },
+    {
+      "epoch": 0.00721,
+      "grad_norm": 1.0037155368349988,
+      "learning_rate": 0.002163,
+      "loss": 4.78,
+      "step": 721
+    },
+    {
+      "epoch": 0.00722,
+      "grad_norm": 0.8678480066188063,
+      "learning_rate": 0.002166,
+      "loss": 4.7474,
+      "step": 722
+    },
+    {
+      "epoch": 0.00723,
+      "grad_norm": 0.9399210002572385,
+      "learning_rate": 0.002169,
+      "loss": 4.7657,
+      "step": 723
+    },
+    {
+      "epoch": 0.00724,
+      "grad_norm": 0.8184829699796181,
+      "learning_rate": 0.002172,
+      "loss": 4.7533,
+      "step": 724
+    },
+    {
+      "epoch": 0.00725,
+      "grad_norm": 0.8676839571587074,
+      "learning_rate": 0.002175,
+      "loss": 4.7513,
+      "step": 725
+    },
+    {
+      "epoch": 0.00726,
+      "grad_norm": 0.9799992988904748,
+      "learning_rate": 0.002178,
+      "loss": 4.7626,
+      "step": 726
+    },
+    {
+      "epoch": 0.00727,
+      "grad_norm": 1.2281779129682024,
+      "learning_rate": 0.0021809999999999998,
+      "loss": 4.7581,
+      "step": 727
+    },
+    {
+      "epoch": 0.00728,
+      "grad_norm": 1.082945747060172,
+      "learning_rate": 0.002184,
+      "loss": 4.7657,
+      "step": 728
+    },
+    {
+      "epoch": 0.00729,
+      "grad_norm": 1.0915510364818644,
+      "learning_rate": 0.002187,
+      "loss": 4.7617,
+      "step": 729
+    },
+    {
+      "epoch": 0.0073,
+      "grad_norm": 1.0738468909531949,
+      "learning_rate": 0.00219,
+      "loss": 4.7676,
+      "step": 730
+    },
+    {
+      "epoch": 0.00731,
+      "grad_norm": 1.0774407965183543,
+      "learning_rate": 0.002193,
+      "loss": 4.7572,
+      "step": 731
+    },
+    {
+      "epoch": 0.00732,
+      "grad_norm": 0.9732910355796593,
+      "learning_rate": 0.002196,
+      "loss": 4.7638,
+      "step": 732
+    },
+    {
+      "epoch": 0.00733,
+      "grad_norm": 1.02567545008427,
+      "learning_rate": 0.002199,
+      "loss": 4.7467,
+      "step": 733
+    },
+    {
+      "epoch": 0.00734,
+      "grad_norm": 0.9058765241181546,
+      "learning_rate": 0.002202,
+      "loss": 4.7686,
+      "step": 734
+    },
+    {
+      "epoch": 0.00735,
+      "grad_norm": 0.9682392169542167,
+      "learning_rate": 0.002205,
+      "loss": 4.7759,
+      "step": 735
+    },
+    {
+      "epoch": 0.00736,
+      "grad_norm": 0.9938811207200824,
+      "learning_rate": 0.002208,
+      "loss": 4.7615,
+      "step": 736
+    },
+    {
+      "epoch": 0.00737,
+      "grad_norm": 1.0964297344539389,
+      "learning_rate": 0.002211,
+      "loss": 4.7347,
+      "step": 737
+    },
+    {
+      "epoch": 0.00738,
+      "grad_norm": 0.8707349769325928,
+      "learning_rate": 0.002214,
+      "loss": 4.7342,
+      "step": 738
+    },
+    {
+      "epoch": 0.00739,
+      "grad_norm": 0.7399818233744658,
+      "learning_rate": 0.0022170000000000002,
+      "loss": 4.717,
+      "step": 739
+    },
+    {
+      "epoch": 0.0074,
+      "grad_norm": 0.9377539124718652,
+      "learning_rate": 0.00222,
+      "loss": 4.7301,
+      "step": 740
+    },
+    {
+      "epoch": 0.00741,
+      "grad_norm": 0.8596400455739317,
+      "learning_rate": 0.002223,
+      "loss": 4.699,
+      "step": 741
+    },
+    {
+      "epoch": 0.00742,
+      "grad_norm": 0.6578432901740889,
+      "learning_rate": 0.002226,
+      "loss": 4.7249,
+      "step": 742
+    },
+    {
+      "epoch": 0.00743,
+      "grad_norm": 0.6175884285032084,
+      "learning_rate": 0.002229,
+      "loss": 4.6843,
+      "step": 743
+    },
+    {
+      "epoch": 0.00744,
+      "grad_norm": 0.6615203369086972,
+      "learning_rate": 0.002232,
+      "loss": 4.6918,
+      "step": 744
+    },
+    {
+      "epoch": 0.00745,
+      "grad_norm": 0.6999197355703424,
+      "learning_rate": 0.002235,
+      "loss": 4.7005,
+      "step": 745
+    },
+    {
+      "epoch": 0.00746,
+      "grad_norm": 0.7056349857734648,
+      "learning_rate": 0.002238,
+      "loss": 4.6964,
+      "step": 746
+    },
+    {
+      "epoch": 0.00747,
+      "grad_norm": 0.6678161149510893,
+      "learning_rate": 0.002241,
+      "loss": 4.6817,
+      "step": 747
+    },
+    {
+      "epoch": 0.00748,
+      "grad_norm": 0.6725287147155753,
+      "learning_rate": 0.002244,
+      "loss": 4.6915,
+      "step": 748
+    },
+    {
+      "epoch": 0.00749,
+      "grad_norm": 0.7368138262221237,
+      "learning_rate": 0.002247,
+      "loss": 4.6725,
+      "step": 749
+    },
+    {
+      "epoch": 0.0075,
+      "grad_norm": 0.7550252977049275,
+      "learning_rate": 0.0022500000000000003,
+      "loss": 4.7124,
+      "step": 750
+    },
+    {
+      "epoch": 0.00751,
+      "grad_norm": 0.6461697616177359,
+      "learning_rate": 0.0022530000000000002,
+      "loss": 4.6948,
+      "step": 751
+    },
+    {
+      "epoch": 0.00752,
+      "grad_norm": 0.6473605328229959,
+      "learning_rate": 0.002256,
+      "loss": 4.648,
+      "step": 752
+    },
+    {
+      "epoch": 0.00753,
+      "grad_norm": 0.8172272904356894,
+      "learning_rate": 0.002259,
+      "loss": 4.6929,
+      "step": 753
+    },
+    {
+      "epoch": 0.00754,
+      "grad_norm": 0.8400684755887758,
+      "learning_rate": 0.002262,
+      "loss": 4.7068,
+      "step": 754
+    },
+    {
+      "epoch": 0.00755,
+      "grad_norm": 0.7078832518794317,
+      "learning_rate": 0.002265,
+      "loss": 4.6656,
+      "step": 755
+    },
+    {
+      "epoch": 0.00756,
+      "grad_norm": 0.6076060117836831,
+      "learning_rate": 0.002268,
+      "loss": 4.6484,
+      "step": 756
+    },
+    {
+      "epoch": 0.00757,
+      "grad_norm": 0.7133093934008413,
+      "learning_rate": 0.002271,
+      "loss": 4.6658,
+      "step": 757
+    },
+    {
+      "epoch": 0.00758,
+      "grad_norm": 0.7661771348142844,
+      "learning_rate": 0.002274,
+      "loss": 4.6521,
+      "step": 758
+    },
+    {
+      "epoch": 0.00759,
+      "grad_norm": 0.9250288948777622,
+      "learning_rate": 0.002277,
+      "loss": 4.6753,
+      "step": 759
+    },
+    {
+      "epoch": 0.0076,
+      "grad_norm": 1.037174236565274,
+      "learning_rate": 0.00228,
+      "loss": 4.669,
+      "step": 760
+    },
+    {
+      "epoch": 0.00761,
+      "grad_norm": 0.9678315157211191,
+      "learning_rate": 0.002283,
+      "loss": 4.6392,
+      "step": 761
+    },
+    {
+      "epoch": 0.00762,
+      "grad_norm": 1.3728001530688312,
+      "learning_rate": 0.0022860000000000003,
+      "loss": 4.6453,
+      "step": 762
+    },
+    {
+      "epoch": 0.00763,
+      "grad_norm": 1.0284727877786697,
+      "learning_rate": 0.0022890000000000002,
+      "loss": 4.6793,
+      "step": 763
+    },
+    {
+      "epoch": 0.00764,
+      "grad_norm": 0.9914794664489192,
+      "learning_rate": 0.002292,
+      "loss": 4.6942,
+      "step": 764
+    },
+    {
+      "epoch": 0.00765,
+      "grad_norm": 0.984322504117537,
+      "learning_rate": 0.002295,
+      "loss": 4.6765,
+      "step": 765
+    },
+    {
+      "epoch": 0.00766,
+      "grad_norm": 0.9320893698991433,
+      "learning_rate": 0.002298,
+      "loss": 4.6792,
+      "step": 766
+    },
+    {
+      "epoch": 0.00767,
+      "grad_norm": 1.0651442494276249,
+      "learning_rate": 0.002301,
+      "loss": 4.6823,
+      "step": 767
+    },
+    {
+      "epoch": 0.00768,
+      "grad_norm": 0.9179111624711317,
+      "learning_rate": 0.002304,
+      "loss": 4.6817,
+      "step": 768
+    },
+    {
+      "epoch": 0.00769,
+      "grad_norm": 1.026483766110404,
+      "learning_rate": 0.002307,
+      "loss": 4.6987,
+      "step": 769
+    },
+    {
+      "epoch": 0.0077,
+      "grad_norm": 1.1653119571960542,
+      "learning_rate": 0.00231,
+      "loss": 4.6621,
+      "step": 770
+    },
+    {
+      "epoch": 0.00771,
+      "grad_norm": 0.8477238808348645,
+      "learning_rate": 0.002313,
+      "loss": 4.689,
+      "step": 771
+    },
+    {
+      "epoch": 0.00772,
+      "grad_norm": 0.6519421622488206,
+      "learning_rate": 0.002316,
+      "loss": 4.6631,
+      "step": 772
+    },
+    {
+      "epoch": 0.00773,
+      "grad_norm": 0.6177861857364649,
+      "learning_rate": 0.0023190000000000003,
+      "loss": 4.6627,
+      "step": 773
+    },
+    {
+      "epoch": 0.00774,
+      "grad_norm": 0.6901665734497584,
+      "learning_rate": 0.0023220000000000003,
+      "loss": 4.6775,
+      "step": 774
+    },
+    {
+      "epoch": 0.00775,
+      "grad_norm": 0.7356087164350898,
+      "learning_rate": 0.0023250000000000002,
+      "loss": 4.6725,
+      "step": 775
+    },
+    {
+      "epoch": 0.00776,
+      "grad_norm": 0.8693432194982287,
+      "learning_rate": 0.002328,
+      "loss": 4.6722,
+      "step": 776
+    },
+    {
+      "epoch": 0.00777,
+      "grad_norm": 1.099570573598906,
+      "learning_rate": 0.002331,
+      "loss": 4.6596,
+      "step": 777
+    },
+    {
+      "epoch": 0.00778,
+      "grad_norm": 1.0694357671416344,
+      "learning_rate": 0.002334,
+      "loss": 4.6725,
+      "step": 778
+    },
+    {
+      "epoch": 0.00779,
+      "grad_norm": 1.0144407661707453,
+      "learning_rate": 0.002337,
+      "loss": 4.6345,
+      "step": 779
+    },
+    {
+      "epoch": 0.0078,
+      "grad_norm": 0.9392788681741788,
+      "learning_rate": 0.00234,
+      "loss": 4.6579,
+      "step": 780
+    },
+    {
+      "epoch": 0.00781,
+      "grad_norm": 0.9039044158767507,
+      "learning_rate": 0.002343,
+      "loss": 4.6528,
+      "step": 781
+    },
+    {
+      "epoch": 0.00782,
+      "grad_norm": 0.9671545635863801,
+      "learning_rate": 0.002346,
+      "loss": 4.6552,
+      "step": 782
+    },
+    {
+      "epoch": 0.00783,
+      "grad_norm": 0.9789758043675277,
+      "learning_rate": 0.002349,
+      "loss": 4.6404,
+      "step": 783
+    },
+    {
+      "epoch": 0.00784,
+      "grad_norm": 0.7674708275760124,
+      "learning_rate": 0.002352,
+      "loss": 4.6449,
+      "step": 784
+    },
+    {
+      "epoch": 0.00785,
+      "grad_norm": 0.6509064102772842,
+      "learning_rate": 0.0023550000000000003,
+      "loss": 4.6391,
+      "step": 785
+    },
+    {
+      "epoch": 0.00786,
+      "grad_norm": 0.6282839162170526,
+      "learning_rate": 0.0023580000000000003,
+      "loss": 4.6356,
+      "step": 786
+    },
+    {
+      "epoch": 0.00787,
+      "grad_norm": 0.6520823986249177,
+      "learning_rate": 0.0023610000000000003,
+      "loss": 4.6384,
+      "step": 787
+    },
+    {
+      "epoch": 0.00788,
+      "grad_norm": 0.6693545084259133,
+      "learning_rate": 0.002364,
+      "loss": 4.6342,
+      "step": 788
+    },
+    {
+      "epoch": 0.00789,
+      "grad_norm": 0.7061838607079715,
+      "learning_rate": 0.002367,
+      "loss": 4.6597,
+      "step": 789
+    },
+    {
+      "epoch": 0.0079,
+      "grad_norm": 0.7001074671969121,
+      "learning_rate": 0.00237,
+      "loss": 4.6333,
+      "step": 790
+    },
+    {
+      "epoch": 0.00791,
+      "grad_norm": 0.6934841557036142,
+      "learning_rate": 0.002373,
+      "loss": 4.5873,
+      "step": 791
+    },
+    {
+      "epoch": 0.00792,
+      "grad_norm": 0.6555126375785874,
+      "learning_rate": 0.002376,
+      "loss": 4.6158,
+      "step": 792
+    },
+    {
+      "epoch": 0.00793,
+      "grad_norm": 0.6198619755446345,
+      "learning_rate": 0.002379,
+      "loss": 4.6082,
+      "step": 793
+    },
+    {
+      "epoch": 0.00794,
+      "grad_norm": 0.5540734864838481,
+      "learning_rate": 0.002382,
+      "loss": 4.615,
+      "step": 794
+    },
+    {
+      "epoch": 0.00795,
+      "grad_norm": 0.5756469670820633,
+      "learning_rate": 0.002385,
+      "loss": 4.5927,
+      "step": 795
+    },
+    {
+      "epoch": 0.00796,
+      "grad_norm": 0.5912602735893169,
+      "learning_rate": 0.0023880000000000004,
+      "loss": 4.5839,
+      "step": 796
+    },
+    {
+      "epoch": 0.00797,
+      "grad_norm": 0.6477479187436139,
+      "learning_rate": 0.0023910000000000003,
+      "loss": 4.628,
+      "step": 797
+    },
+    {
+      "epoch": 0.00798,
+      "grad_norm": 0.6568610896012951,
+      "learning_rate": 0.0023940000000000003,
+      "loss": 4.5975,
+      "step": 798
+    },
+    {
+      "epoch": 0.00799,
+      "grad_norm": 0.8964867071559416,
+      "learning_rate": 0.0023970000000000003,
+      "loss": 4.6327,
+      "step": 799
+    },
+    {
+      "epoch": 0.008,
+      "grad_norm": 1.1130272400690795,
+      "learning_rate": 0.0024000000000000002,
+      "loss": 4.6127,
+      "step": 800
+    },
+    {
+      "epoch": 0.00801,
+      "grad_norm": 0.8936330318178004,
+      "learning_rate": 0.002403,
+      "loss": 4.624,
+      "step": 801
+    },
+    {
+      "epoch": 0.00802,
+      "grad_norm": 1.279259611288336,
+      "learning_rate": 0.002406,
+      "loss": 4.6431,
+      "step": 802
+    },
+    {
+      "epoch": 0.00803,
+      "grad_norm": 0.7588568023085343,
+      "learning_rate": 0.002409,
+      "loss": 4.5967,
+      "step": 803
+    },
+    {
+      "epoch": 0.00804,
+      "grad_norm": 0.9911172738466991,
+      "learning_rate": 0.002412,
+      "loss": 4.6083,
+      "step": 804
+    },
+    {
+      "epoch": 0.00805,
+      "grad_norm": 1.0297855025384943,
+      "learning_rate": 0.002415,
+      "loss": 4.6095,
+      "step": 805
+    },
+    {
+      "epoch": 0.00806,
+      "grad_norm": 0.8656546102408385,
+      "learning_rate": 0.002418,
+      "loss": 4.6231,
+      "step": 806
+    },
+    {
+      "epoch": 0.00807,
+      "grad_norm": 0.7922998530369505,
+      "learning_rate": 0.0024210000000000004,
+      "loss": 4.6144,
+      "step": 807
+    },
+    {
+      "epoch": 0.00808,
+      "grad_norm": 0.7119238411669042,
+      "learning_rate": 0.0024240000000000004,
+      "loss": 4.5933,
+      "step": 808
+    },
+    {
+      "epoch": 0.00809,
+      "grad_norm": 0.6923611180056076,
+      "learning_rate": 0.0024270000000000003,
+      "loss": 4.5855,
+      "step": 809
+    },
+    {
+      "epoch": 0.0081,
+      "grad_norm": 0.759382946117684,
+      "learning_rate": 0.0024300000000000003,
+      "loss": 4.6149,
+      "step": 810
+    },
+    {
+      "epoch": 0.00811,
+      "grad_norm": 0.9629068291697402,
+      "learning_rate": 0.0024330000000000003,
+      "loss": 4.5955,
+      "step": 811
+    },
+    {
+      "epoch": 0.00812,
+      "grad_norm": 0.9897740547487952,
+      "learning_rate": 0.0024360000000000002,
+      "loss": 4.5793,
+      "step": 812
+    },
+    {
+      "epoch": 0.00813,
+      "grad_norm": 1.0343029805479964,
+      "learning_rate": 0.0024389999999999998,
+      "loss": 4.6192,
+      "step": 813
+    },
+    {
+      "epoch": 0.00814,
+      "grad_norm": 1.188458156645309,
+      "learning_rate": 0.0024419999999999997,
+      "loss": 4.6409,
+      "step": 814
+    },
+    {
+      "epoch": 0.00815,
+      "grad_norm": 0.960728927074141,
+      "learning_rate": 0.0024449999999999997,
+      "loss": 4.6319,
+      "step": 815
+    },
+    {
+      "epoch": 0.00816,
+      "grad_norm": 1.0099133377105225,
+      "learning_rate": 0.002448,
+      "loss": 4.6186,
+      "step": 816
+    },
+    {
+      "epoch": 0.00817,
+      "grad_norm": 0.9311887346236664,
+      "learning_rate": 0.002451,
+      "loss": 4.6246,
+      "step": 817
+    },
+    {
+      "epoch": 0.00818,
+      "grad_norm": 1.2609797944179002,
+      "learning_rate": 0.002454,
+      "loss": 4.625,
+      "step": 818
+    },
+    {
+      "epoch": 0.00819,
+      "grad_norm": 1.0447566978787928,
+      "learning_rate": 0.002457,
+      "loss": 4.6401,
+      "step": 819
+    },
+    {
+      "epoch": 0.0082,
+      "grad_norm": 1.1000322233283122,
+      "learning_rate": 0.00246,
+      "loss": 4.6239,
+      "step": 820
+    },
+    {
+      "epoch": 0.00821,
+      "grad_norm": 0.8928816997822231,
+      "learning_rate": 0.002463,
+      "loss": 4.5932,
+      "step": 821
+    },
+    {
+      "epoch": 0.00822,
+      "grad_norm": 0.9105337296182261,
+      "learning_rate": 0.002466,
+      "loss": 4.5996,
+      "step": 822
+    },
+    {
+      "epoch": 0.00823,
+      "grad_norm": 0.6537296194510775,
+      "learning_rate": 0.002469,
+      "loss": 4.5905,
+      "step": 823
+    },
+    {
+      "epoch": 0.00824,
+      "grad_norm": 0.6615102644633621,
+      "learning_rate": 0.002472,
+      "loss": 4.61,
+      "step": 824
+    },
+    {
+      "epoch": 0.00825,
+      "grad_norm": 0.7605760557018463,
+      "learning_rate": 0.0024749999999999998,
+      "loss": 4.6148,
+      "step": 825
+    },
+    {
+      "epoch": 0.00826,
+      "grad_norm": 0.9288808111624368,
+      "learning_rate": 0.0024779999999999997,
+      "loss": 4.5806,
+      "step": 826
+    },
+    {
+      "epoch": 0.00827,
+      "grad_norm": 0.8099205132023622,
+      "learning_rate": 0.002481,
+      "loss": 4.6084,
+      "step": 827
+    },
+    {
+      "epoch": 0.00828,
+      "grad_norm": 0.7444084275861881,
+      "learning_rate": 0.002484,
+      "loss": 4.5965,
+      "step": 828
+    },
+    {
+      "epoch": 0.00829,
+      "grad_norm": 0.6438605280155706,
+      "learning_rate": 0.002487,
+      "loss": 4.5891,
+      "step": 829
+    },
+    {
+      "epoch": 0.0083,
+      "grad_norm": 0.6242869251575957,
+      "learning_rate": 0.00249,
+      "loss": 4.5955,
+      "step": 830
+    },
+    {
+      "epoch": 0.00831,
+      "grad_norm": 0.6705073637500226,
+      "learning_rate": 0.002493,
+      "loss": 4.576,
+      "step": 831
+    },
+    {
+      "epoch": 0.00832,
+      "grad_norm": 0.7603459804613621,
+      "learning_rate": 0.002496,
+      "loss": 4.5759,
+      "step": 832
+    },
+    {
+      "epoch": 0.00833,
+      "grad_norm": 0.7662808437283888,
+      "learning_rate": 0.002499,
+      "loss": 4.5696,
+      "step": 833
+    },
+    {
+      "epoch": 0.00834,
+      "grad_norm": 0.8012380275176963,
+      "learning_rate": 0.002502,
+      "loss": 4.5743,
+      "step": 834
+    },
+    {
+      "epoch": 0.00835,
+      "grad_norm": 1.0075156101089233,
+      "learning_rate": 0.002505,
+      "loss": 4.5783,
+      "step": 835
+    },
+    {
+      "epoch": 0.00836,
+      "grad_norm": 1.0847073328294785,
+      "learning_rate": 0.002508,
+      "loss": 4.6155,
+      "step": 836
+    },
+    {
+      "epoch": 0.00837,
+      "grad_norm": 0.8849400794535106,
+      "learning_rate": 0.0025109999999999998,
+      "loss": 4.5603,
+      "step": 837
+    },
+    {
+      "epoch": 0.00838,
+      "grad_norm": 0.8832989831172253,
+      "learning_rate": 0.0025139999999999997,
+      "loss": 4.5816,
+      "step": 838
+    },
+    {
+      "epoch": 0.00839,
+      "grad_norm": 0.9511730781538094,
+      "learning_rate": 0.002517,
+      "loss": 4.5843,
+      "step": 839
+    },
+    {
+      "epoch": 0.0084,
+      "grad_norm": 1.037817208386755,
+      "learning_rate": 0.00252,
+      "loss": 4.5831,
+      "step": 840
+    },
+    {
+      "epoch": 0.00841,
+      "grad_norm": 0.8472466492390959,
+      "learning_rate": 0.002523,
+      "loss": 4.5624,
+      "step": 841
+    },
+    {
+      "epoch": 0.00842,
+      "grad_norm": 0.9269217528832409,
+      "learning_rate": 0.002526,
+      "loss": 4.5661,
+      "step": 842
+    },
+    {
+      "epoch": 0.00843,
+      "grad_norm": 0.8716247137682855,
+      "learning_rate": 0.002529,
+      "loss": 4.5688,
+      "step": 843
+    },
+    {
+      "epoch": 0.00844,
+      "grad_norm": 0.7416972066179122,
+      "learning_rate": 0.002532,
+      "loss": 4.585,
+      "step": 844
+    },
+    {
+      "epoch": 0.00845,
+      "grad_norm": 0.6177824387251759,
+      "learning_rate": 0.002535,
+      "loss": 4.5442,
+      "step": 845
+    },
+    {
+      "epoch": 0.00846,
+      "grad_norm": 0.5865348849804463,
+      "learning_rate": 0.002538,
+      "loss": 4.5656,
+      "step": 846
+    },
+    {
+      "epoch": 0.00847,
+      "grad_norm": 0.5324615561823309,
+      "learning_rate": 0.002541,
+      "loss": 4.5442,
+      "step": 847
+    },
+    {
+      "epoch": 0.00848,
+      "grad_norm": 0.5568583953268653,
+      "learning_rate": 0.002544,
+      "loss": 4.5407,
+      "step": 848
+    },
+    {
+      "epoch": 0.00849,
+      "grad_norm": 0.600307108588631,
+      "learning_rate": 0.002547,
+      "loss": 4.5371,
+      "step": 849
+    },
+    {
+      "epoch": 0.0085,
+      "grad_norm": 0.5343909127282478,
+      "learning_rate": 0.00255,
+      "loss": 4.5435,
+      "step": 850
+    },
+    {
+      "epoch": 0.00851,
+      "grad_norm": 0.5790732432599173,
+      "learning_rate": 0.002553,
+      "loss": 4.5588,
+      "step": 851
+    },
+    {
+      "epoch": 0.00852,
+      "grad_norm": 0.6406298666409433,
+      "learning_rate": 0.002556,
+      "loss": 4.5503,
+      "step": 852
+    },
+    {
+      "epoch": 0.00853,
+      "grad_norm": 0.6711876835719848,
+      "learning_rate": 0.002559,
+      "loss": 4.5298,
+      "step": 853
+    },
+    {
+      "epoch": 0.00854,
+      "grad_norm": 0.643558144142947,
+      "learning_rate": 0.002562,
+      "loss": 4.5219,
+      "step": 854
+    },
+    {
+      "epoch": 0.00855,
+      "grad_norm": 0.621905866188772,
+      "learning_rate": 0.002565,
+      "loss": 4.5026,
+      "step": 855
+    },
+    {
+      "epoch": 0.00856,
+      "grad_norm": 0.7503391003054042,
+      "learning_rate": 0.002568,
+      "loss": 4.5375,
+      "step": 856
+    },
+    {
+      "epoch": 0.00857,
+      "grad_norm": 0.90802719077466,
+      "learning_rate": 0.002571,
+      "loss": 4.5344,
+      "step": 857
+    },
+    {
+      "epoch": 0.00858,
+      "grad_norm": 0.9157789056738207,
+      "learning_rate": 0.002574,
+      "loss": 4.5308,
+      "step": 858
+    },
+    {
+      "epoch": 0.00859,
+      "grad_norm": 0.8455467899517649,
+      "learning_rate": 0.002577,
+      "loss": 4.5208,
+      "step": 859
+    },
+    {
+      "epoch": 0.0086,
+      "grad_norm": 0.929986585155377,
+      "learning_rate": 0.00258,
+      "loss": 4.5557,
+      "step": 860
+    },
+    {
+      "epoch": 0.00861,
+      "grad_norm": 1.062443351155347,
+      "learning_rate": 0.0025830000000000002,
+      "loss": 4.5815,
+      "step": 861
+    },
+    {
+      "epoch": 0.00862,
+      "grad_norm": 1.0476479971551458,
+      "learning_rate": 0.002586,
+      "loss": 4.5382,
+      "step": 862
+    },
+    {
+      "epoch": 0.00863,
+      "grad_norm": 0.9516272804079478,
+      "learning_rate": 0.002589,
+      "loss": 4.5686,
+      "step": 863
+    },
+    {
+      "epoch": 0.00864,
+      "grad_norm": 1.0775847362135182,
+      "learning_rate": 0.002592,
+      "loss": 4.5222,
+      "step": 864
+    },
+    {
+      "epoch": 0.00865,
+      "grad_norm": 0.902924351244226,
+      "learning_rate": 0.002595,
+      "loss": 4.5357,
+      "step": 865
+    },
+    {
+      "epoch": 0.00866,
+      "grad_norm": 0.889409265471235,
+      "learning_rate": 0.002598,
+      "loss": 4.5623,
+      "step": 866
+    },
+    {
+      "epoch": 0.00867,
+      "grad_norm": 1.1312843257696636,
+      "learning_rate": 0.002601,
+      "loss": 4.5565,
+      "step": 867
+    },
+    {
+      "epoch": 0.00868,
+      "grad_norm": 1.0195609890673947,
+      "learning_rate": 0.002604,
+      "loss": 4.5479,
+      "step": 868
+    },
+    {
+      "epoch": 0.00869,
+      "grad_norm": 0.8378311515658349,
+      "learning_rate": 0.002607,
+      "loss": 4.5492,
+      "step": 869
+    },
+    {
+      "epoch": 0.0087,
+      "grad_norm": 0.93569973985326,
+      "learning_rate": 0.00261,
+      "loss": 4.5413,
+      "step": 870
+    },
+    {
+      "epoch": 0.00871,
+      "grad_norm": 0.9947247250751194,
+      "learning_rate": 0.002613,
+      "loss": 4.5608,
+      "step": 871
+    },
+    {
+      "epoch": 0.00872,
+      "grad_norm": 0.9456334146879876,
+      "learning_rate": 0.002616,
+      "loss": 4.5489,
+      "step": 872
+    },
+    {
+      "epoch": 0.00873,
+      "grad_norm": 0.9088952463307589,
+      "learning_rate": 0.0026190000000000002,
+      "loss": 4.5587,
+      "step": 873
+    },
+    {
+      "epoch": 0.00874,
+      "grad_norm": 0.7636008668853458,
+      "learning_rate": 0.002622,
+      "loss": 4.5702,
+      "step": 874
+    },
+    {
+      "epoch": 0.00875,
+      "grad_norm": 0.8446171091890929,
+      "learning_rate": 0.002625,
+      "loss": 4.5191,
+      "step": 875
+    },
+    {
+      "epoch": 0.00876,
+      "grad_norm": 0.8731048122579586,
+      "learning_rate": 0.002628,
+      "loss": 4.551,
+      "step": 876
+    },
+    {
+      "epoch": 0.00877,
+      "grad_norm": 0.9444127454444267,
+      "learning_rate": 0.002631,
+      "loss": 4.5701,
+      "step": 877
+    },
+    {
+      "epoch": 0.00878,
+      "grad_norm": 0.9863621265162925,
+      "learning_rate": 0.002634,
+      "loss": 4.5108,
+      "step": 878
+    },
+    {
+      "epoch": 0.00879,
+      "grad_norm": 0.7746522972030878,
+      "learning_rate": 0.002637,
+      "loss": 4.5293,
+      "step": 879
+    },
+    {
+      "epoch": 0.0088,
+      "grad_norm": 0.7150869014843059,
+      "learning_rate": 0.00264,
+      "loss": 4.5368,
+      "step": 880
+    },
+    {
+      "epoch": 0.00881,
+      "grad_norm": 0.866142425879982,
+      "learning_rate": 0.002643,
+      "loss": 4.5448,
+      "step": 881
+    },
+    {
+      "epoch": 0.00882,
+      "grad_norm": 1.0565004714930601,
+      "learning_rate": 0.002646,
+      "loss": 4.5522,
+      "step": 882
+    },
+    {
+      "epoch": 0.00883,
+      "grad_norm": 0.88323512650563,
+      "learning_rate": 0.002649,
+      "loss": 4.5382,
+      "step": 883
+    },
+    {
+      "epoch": 0.00884,
+      "grad_norm": 0.8377440854137777,
+      "learning_rate": 0.0026520000000000003,
+      "loss": 4.5227,
+      "step": 884
+    },
+    {
+      "epoch": 0.00885,
+      "grad_norm": 0.8587569363150891,
+      "learning_rate": 0.0026550000000000002,
+      "loss": 4.5189,
+      "step": 885
+    },
+    {
+      "epoch": 0.00886,
+      "grad_norm": 0.9455034420832737,
+      "learning_rate": 0.002658,
+      "loss": 4.5539,
+      "step": 886
+    },
+    {
+      "epoch": 0.00887,
+      "grad_norm": 0.8258368480900744,
+      "learning_rate": 0.002661,
+      "loss": 4.536,
+      "step": 887
+    },
+    {
+      "epoch": 0.00888,
+      "grad_norm": 0.927023832062946,
+      "learning_rate": 0.002664,
+      "loss": 4.5392,
+      "step": 888
+    },
+    {
+      "epoch": 0.00889,
+      "grad_norm": 0.9905400773231482,
+      "learning_rate": 0.002667,
+      "loss": 4.5077,
+      "step": 889
+    },
+    {
+      "epoch": 0.0089,
+      "grad_norm": 1.0181625806478707,
+      "learning_rate": 0.00267,
+      "loss": 4.551,
+      "step": 890
+    },
+    {
+      "epoch": 0.00891,
+      "grad_norm": 1.0618776306697646,
+      "learning_rate": 0.002673,
+      "loss": 4.5446,
+      "step": 891
+    },
+    {
+      "epoch": 0.00892,
+      "grad_norm": 0.9464629097549706,
+      "learning_rate": 0.002676,
+      "loss": 4.5493,
+      "step": 892
+    },
+    {
+      "epoch": 0.00893,
+      "grad_norm": 1.02959831042168,
+      "learning_rate": 0.002679,
+      "loss": 4.5321,
+      "step": 893
+    },
+    {
+      "epoch": 0.00894,
+      "grad_norm": 0.8717588229222071,
+      "learning_rate": 0.002682,
+      "loss": 4.5126,
+      "step": 894
+    },
+    {
+      "epoch": 0.00895,
+      "grad_norm": 0.7766302283006379,
+      "learning_rate": 0.0026850000000000003,
+      "loss": 4.5191,
+      "step": 895
+    },
+    {
+      "epoch": 0.00896,
+      "grad_norm": 0.7210349653989065,
+      "learning_rate": 0.0026880000000000003,
+      "loss": 4.5061,
+      "step": 896
+    },
+    {
+      "epoch": 0.00897,
+      "grad_norm": 0.7840425119747009,
+      "learning_rate": 0.0026910000000000002,
+      "loss": 4.5249,
+      "step": 897
+    },
+    {
+      "epoch": 0.00898,
+      "grad_norm": 0.7907811575022647,
+      "learning_rate": 0.002694,
+      "loss": 4.518,
+      "step": 898
+    },
+    {
+      "epoch": 0.00899,
+      "grad_norm": 0.7866169386193732,
+      "learning_rate": 0.002697,
+      "loss": 4.5195,
+      "step": 899
+    },
+    {
+      "epoch": 0.009,
+      "grad_norm": 0.7303478296484299,
+      "learning_rate": 0.0027,
+      "loss": 4.5449,
+      "step": 900
+    },
+    {
+      "epoch": 0.00901,
+      "grad_norm": 0.6444049592088867,
+      "learning_rate": 0.002703,
+      "loss": 4.4943,
+      "step": 901
+    },
+    {
+      "epoch": 0.00902,
+      "grad_norm": 0.5222361304775145,
+      "learning_rate": 0.002706,
+      "loss": 4.498,
+      "step": 902
+    },
+    {
+      "epoch": 0.00903,
+      "grad_norm": 0.6152635474063327,
+      "learning_rate": 0.002709,
+      "loss": 4.4967,
+      "step": 903
+    },
+    {
+      "epoch": 0.00904,
+      "grad_norm": 0.6931791624762859,
+      "learning_rate": 0.002712,
+      "loss": 4.4818,
+      "step": 904
+    },
+    {
+      "epoch": 0.00905,
+      "grad_norm": 0.6467753244335184,
+      "learning_rate": 0.002715,
+      "loss": 4.4929,
+      "step": 905
+    },
+    {
+      "epoch": 0.00906,
+      "grad_norm": 0.6638188976320862,
+      "learning_rate": 0.002718,
+      "loss": 4.5049,
+      "step": 906
+    },
+    {
+      "epoch": 0.00907,
+      "grad_norm": 0.503631484987646,
+      "learning_rate": 0.0027210000000000003,
+      "loss": 4.5121,
+      "step": 907
+    },
+    {
+      "epoch": 0.00908,
+      "grad_norm": 0.4745734069368428,
+      "learning_rate": 0.0027240000000000003,
+      "loss": 4.481,
+      "step": 908
+    },
+    {
+      "epoch": 0.00909,
+      "grad_norm": 0.5648420216556126,
+      "learning_rate": 0.0027270000000000003,
+      "loss": 4.5013,
+      "step": 909
+    },
+    {
+      "epoch": 0.0091,
+      "grad_norm": 0.5471966544704259,
+      "learning_rate": 0.0027300000000000002,
+      "loss": 4.5027,
+      "step": 910
+    },
+    {
+      "epoch": 0.00911,
+      "grad_norm": 0.6580432739675335,
+      "learning_rate": 0.002733,
+      "loss": 4.4619,
+      "step": 911
+    },
+    {
+      "epoch": 0.00912,
+      "grad_norm": 0.652804172410743,
+      "learning_rate": 0.002736,
+      "loss": 4.4678,
+      "step": 912
+    },
+    {
+      "epoch": 0.00913,
+      "grad_norm": 0.6108886726833864,
+      "learning_rate": 0.002739,
+      "loss": 4.4601,
+      "step": 913
+    },
+    {
+      "epoch": 0.00914,
+      "grad_norm": 0.5246452629450868,
+      "learning_rate": 0.002742,
+      "loss": 4.4865,
+      "step": 914
+    },
+    {
+      "epoch": 0.00915,
+      "grad_norm": 0.6046562570851716,
+      "learning_rate": 0.002745,
+      "loss": 4.4501,
+      "step": 915
+    },
+    {
+      "epoch": 0.00916,
+      "grad_norm": 0.6470672223067736,
+      "learning_rate": 0.002748,
+      "loss": 4.4304,
+      "step": 916
+    },
+    {
+      "epoch": 0.00917,
+      "grad_norm": 0.6458845430146066,
+      "learning_rate": 0.002751,
+      "loss": 4.4431,
+      "step": 917
+    },
+    {
+      "epoch": 0.00918,
+      "grad_norm": 0.6107710101855935,
+      "learning_rate": 0.0027540000000000004,
+      "loss": 4.4622,
+      "step": 918
+    },
+    {
+      "epoch": 0.00919,
+      "grad_norm": 0.6056157888916294,
+      "learning_rate": 0.0027570000000000003,
+      "loss": 4.4436,
+      "step": 919
+    },
+    {
+      "epoch": 0.0092,
+      "grad_norm": 0.6858065732895877,
+      "learning_rate": 0.0027600000000000003,
+      "loss": 4.462,
+      "step": 920
+    },
+    {
+      "epoch": 0.00921,
+      "grad_norm": 0.9391926745722488,
+      "learning_rate": 0.0027630000000000003,
+      "loss": 4.4556,
+      "step": 921
+    },
+    {
+      "epoch": 0.00922,
+      "grad_norm": 1.1348542218598812,
+      "learning_rate": 0.0027660000000000002,
+      "loss": 4.5063,
+      "step": 922
+    },
+    {
+      "epoch": 0.00923,
+      "grad_norm": 1.0421503736233508,
+      "learning_rate": 0.002769,
+      "loss": 4.4713,
+      "step": 923
+    },
+    {
+      "epoch": 0.00924,
+      "grad_norm": 1.0588478925879097,
+      "learning_rate": 0.002772,
+      "loss": 4.4959,
+      "step": 924
+    },
+    {
+      "epoch": 0.00925,
+      "grad_norm": 0.9781832929515508,
+      "learning_rate": 0.002775,
+      "loss": 4.4806,
+      "step": 925
+    },
+    {
+      "epoch": 0.00926,
+      "grad_norm": 0.7776497655560727,
+      "learning_rate": 0.002778,
+      "loss": 4.488,
+      "step": 926
+    },
+    {
+      "epoch": 0.00927,
+      "grad_norm": 0.7034519817521436,
+      "learning_rate": 0.002781,
+      "loss": 4.4448,
+      "step": 927
+    },
+    {
+      "epoch": 0.00928,
+      "grad_norm": 0.9270286412247504,
+      "learning_rate": 0.002784,
+      "loss": 4.4965,
+      "step": 928
+    },
+    {
+      "epoch": 0.00929,
+      "grad_norm": 1.0728170784278697,
+      "learning_rate": 0.0027870000000000004,
+      "loss": 4.4869,
+      "step": 929
+    },
+    {
+      "epoch": 0.0093,
+      "grad_norm": 0.9492127377122095,
+      "learning_rate": 0.0027900000000000004,
+      "loss": 4.4732,
+      "step": 930
+    },
+    {
+      "epoch": 0.00931,
+      "grad_norm": 0.9513900088751025,
+      "learning_rate": 0.0027930000000000003,
+      "loss": 4.4833,
+      "step": 931
+    },
+    {
+      "epoch": 0.00932,
+      "grad_norm": 1.4536440294804005,
+      "learning_rate": 0.0027960000000000003,
+      "loss": 4.5168,
+      "step": 932
+    },
+    {
+      "epoch": 0.00933,
+      "grad_norm": 1.132862147568844,
+      "learning_rate": 0.0027990000000000003,
+      "loss": 4.4769,
+      "step": 933
+    },
+    {
+      "epoch": 0.00934,
+      "grad_norm": 0.8513409648274727,
+      "learning_rate": 0.0028020000000000002,
+      "loss": 4.4941,
+      "step": 934
+    },
+    {
+      "epoch": 0.00935,
+      "grad_norm": 0.8677938211616196,
+      "learning_rate": 0.002805,
+      "loss": 4.5057,
+      "step": 935
+    },
+    {
+      "epoch": 0.00936,
+      "grad_norm": 0.8298971693271944,
+      "learning_rate": 0.002808,
+      "loss": 4.5081,
+      "step": 936
+    },
+    {
+      "epoch": 0.00937,
+      "grad_norm": 0.8363829119527492,
+      "learning_rate": 0.002811,
+      "loss": 4.521,
+      "step": 937
+    },
+    {
+      "epoch": 0.00938,
+      "grad_norm": 1.0036953395826609,
+      "learning_rate": 0.002814,
+      "loss": 4.5006,
+      "step": 938
+    },
+    {
+      "epoch": 0.00939,
+      "grad_norm": 1.0054078613176451,
+      "learning_rate": 0.002817,
+      "loss": 4.5073,
+      "step": 939
+    },
+    {
+      "epoch": 0.0094,
+      "grad_norm": 0.8667374621688471,
+      "learning_rate": 0.00282,
+      "loss": 4.4958,
+      "step": 940
+    },
+    {
+      "epoch": 0.00941,
+      "grad_norm": 0.9205808599892458,
+      "learning_rate": 0.002823,
+      "loss": 4.4733,
+      "step": 941
+    },
+    {
+      "epoch": 0.00942,
+      "grad_norm": 0.9778408651584425,
+      "learning_rate": 0.002826,
+      "loss": 4.5008,
+      "step": 942
+    },
+    {
+      "epoch": 0.00943,
+      "grad_norm": 1.082550194860624,
+      "learning_rate": 0.002829,
+      "loss": 4.4958,
+      "step": 943
+    },
+    {
+      "epoch": 0.00944,
+      "grad_norm": 1.355725245571646,
+      "learning_rate": 0.002832,
+      "loss": 4.5179,
+      "step": 944
+    },
+    {
+      "epoch": 0.00945,
+      "grad_norm": 0.8345488861943434,
+      "learning_rate": 0.002835,
+      "loss": 4.5015,
+      "step": 945
+    },
+    {
+      "epoch": 0.00946,
+      "grad_norm": 1.0009931674850154,
+      "learning_rate": 0.002838,
+      "loss": 4.5193,
+      "step": 946
+    },
+    {
+      "epoch": 0.00947,
+      "grad_norm": 1.0176650405493206,
+      "learning_rate": 0.0028409999999999998,
+      "loss": 4.4912,
+      "step": 947
+    },
+    {
+      "epoch": 0.00948,
+      "grad_norm": 0.836896216552642,
+      "learning_rate": 0.0028439999999999997,
+      "loss": 4.472,
+      "step": 948
+    },
+    {
+      "epoch": 0.00949,
+      "grad_norm": 0.6881971993105954,
+      "learning_rate": 0.002847,
+      "loss": 4.5057,
+      "step": 949
+    },
+    {
+      "epoch": 0.0095,
+      "grad_norm": 0.8926787293989068,
+      "learning_rate": 0.00285,
+      "loss": 4.5027,
+      "step": 950
+    },
+    {
+      "epoch": 0.00951,
+      "grad_norm": 1.0437772730376889,
+      "learning_rate": 0.002853,
+      "loss": 4.4861,
+      "step": 951
+    },
+    {
+      "epoch": 0.00952,
+      "grad_norm": 0.8745743913439339,
+      "learning_rate": 0.002856,
+      "loss": 4.5041,
+      "step": 952
+    },
+    {
+      "epoch": 0.00953,
+      "grad_norm": 0.7847706213592531,
+      "learning_rate": 0.002859,
+      "loss": 4.4446,
+      "step": 953
+    },
+    {
+      "epoch": 0.00954,
+      "grad_norm": 0.6236105730880978,
+      "learning_rate": 0.002862,
+      "loss": 4.4945,
+      "step": 954
+    },
+    {
+      "epoch": 0.00955,
+      "grad_norm": 0.5696186051972435,
+      "learning_rate": 0.002865,
+      "loss": 4.475,
+      "step": 955
+    },
+    {
+      "epoch": 0.00956,
+      "grad_norm": 0.5459272314199634,
+      "learning_rate": 0.002868,
+      "loss": 4.4518,
+      "step": 956
+    },
+    {
+      "epoch": 0.00957,
+      "grad_norm": 0.5100325019322003,
+      "learning_rate": 0.002871,
+      "loss": 4.4721,
+      "step": 957
+    },
+    {
+      "epoch": 0.00958,
+      "grad_norm": 0.6617022302690957,
+      "learning_rate": 0.002874,
+      "loss": 4.4567,
+      "step": 958
+    },
+    {
+      "epoch": 0.00959,
+      "grad_norm": 0.7948420381771908,
+      "learning_rate": 0.002877,
+      "loss": 4.4693,
+      "step": 959
+    },
+    {
+      "epoch": 0.0096,
+      "grad_norm": 1.0277128972108451,
+      "learning_rate": 0.0028799999999999997,
+      "loss": 4.4747,
+      "step": 960
+    },
+    {
+      "epoch": 0.00961,
+      "grad_norm": 0.9925275083373442,
+      "learning_rate": 0.002883,
+      "loss": 4.4507,
+      "step": 961
+    },
+    {
+      "epoch": 0.00962,
+      "grad_norm": 1.0177847800658486,
+      "learning_rate": 0.002886,
+      "loss": 4.4861,
+      "step": 962
+    },
+    {
+      "epoch": 0.00963,
+      "grad_norm": 1.0118802628275685,
+      "learning_rate": 0.002889,
+      "loss": 4.4865,
+      "step": 963
+    },
+    {
+      "epoch": 0.00964,
+      "grad_norm": 1.1856958623453784,
+      "learning_rate": 0.002892,
+      "loss": 4.4868,
+      "step": 964
+    },
+    {
+      "epoch": 0.00965,
+      "grad_norm": 0.8341279969334199,
+      "learning_rate": 0.002895,
+      "loss": 4.4524,
+      "step": 965
+    },
+    {
+      "epoch": 0.00966,
+      "grad_norm": 0.6756655743310646,
+      "learning_rate": 0.002898,
+      "loss": 4.4552,
+      "step": 966
+    },
+    {
+      "epoch": 0.00967,
+      "grad_norm": 0.6634929266596646,
+      "learning_rate": 0.002901,
+      "loss": 4.4933,
+      "step": 967
+    },
+    {
+      "epoch": 0.00968,
+      "grad_norm": 0.6850072615364151,
+      "learning_rate": 0.002904,
+      "loss": 4.4564,
+      "step": 968
+    },
+    {
+      "epoch": 0.00969,
+      "grad_norm": 0.6166935294692494,
+      "learning_rate": 0.002907,
+      "loss": 4.4862,
+      "step": 969
+    },
+    {
+      "epoch": 0.0097,
+      "grad_norm": 0.6616720297110108,
+      "learning_rate": 0.00291,
+      "loss": 4.4401,
+      "step": 970
+    },
+    {
+      "epoch": 0.00971,
+      "grad_norm": 0.5463010292381552,
+      "learning_rate": 0.002913,
+      "loss": 4.4415,
+      "step": 971
+    },
+    {
+      "epoch": 0.00972,
+      "grad_norm": 0.5115285821904162,
+      "learning_rate": 0.002916,
+      "loss": 4.4454,
+      "step": 972
+    },
+    {
+      "epoch": 0.00973,
+      "grad_norm": 0.543781975527911,
+      "learning_rate": 0.002919,
+      "loss": 4.4402,
+      "step": 973
+    },
+    {
+      "epoch": 0.00974,
+      "grad_norm": 0.5167263203162235,
+      "learning_rate": 0.002922,
+      "loss": 4.4246,
+      "step": 974
+    },
+    {
+      "epoch": 0.00975,
+      "grad_norm": 0.4676730441900605,
+      "learning_rate": 0.002925,
+      "loss": 4.4434,
+      "step": 975
+    },
+    {
+      "epoch": 0.00976,
+      "grad_norm": 0.4483140513716565,
+      "learning_rate": 0.002928,
+      "loss": 4.4259,
+      "step": 976
+    },
+    {
+      "epoch": 0.00977,
+      "grad_norm": 0.45300675248114197,
+      "learning_rate": 0.002931,
+      "loss": 4.4582,
+      "step": 977
+    },
+    {
+      "epoch": 0.00978,
+      "grad_norm": 0.524704391185419,
+      "learning_rate": 0.002934,
+      "loss": 4.4335,
+      "step": 978
+    },
+    {
+      "epoch": 0.00979,
+      "grad_norm": 0.6643809565846889,
+      "learning_rate": 0.002937,
+      "loss": 4.4266,
+      "step": 979
+    },
+    {
+      "epoch": 0.0098,
+      "grad_norm": 0.9159629342753403,
+      "learning_rate": 0.00294,
+      "loss": 4.433,
+      "step": 980
+    },
+    {
+      "epoch": 0.00981,
+      "grad_norm": 1.0134318026936866,
+      "learning_rate": 0.002943,
+      "loss": 4.4516,
+      "step": 981
+    },
+    {
+      "epoch": 0.00982,
+      "grad_norm": 0.7532241089289973,
+      "learning_rate": 0.002946,
+      "loss": 4.4387,
+      "step": 982
+    },
+    {
+      "epoch": 0.00983,
+      "grad_norm": 0.7539130672753217,
+      "learning_rate": 0.0029490000000000002,
+      "loss": 4.4372,
+      "step": 983
+    },
+    {
+      "epoch": 0.00984,
+      "grad_norm": 0.7045823796689694,
+      "learning_rate": 0.002952,
+      "loss": 4.4213,
+      "step": 984
+    },
+    {
+      "epoch": 0.00985,
+      "grad_norm": 0.6478785171714704,
+      "learning_rate": 0.002955,
+      "loss": 4.444,
+      "step": 985
+    },
+    {
+      "epoch": 0.00986,
+      "grad_norm": 0.5640139479823427,
+      "learning_rate": 0.002958,
+      "loss": 4.3997,
+      "step": 986
+    },
+    {
+      "epoch": 0.00987,
+      "grad_norm": 0.6145958247621988,
+      "learning_rate": 0.002961,
+      "loss": 4.4141,
+      "step": 987
+    },
+    {
+      "epoch": 0.00988,
+      "grad_norm": 0.7121366087401472,
+      "learning_rate": 0.002964,
+      "loss": 4.4512,
+      "step": 988
+    },
+    {
+      "epoch": 0.00989,
+      "grad_norm": 0.7474063595618389,
+      "learning_rate": 0.002967,
+      "loss": 4.3897,
+      "step": 989
+    },
+    {
+      "epoch": 0.0099,
+      "grad_norm": 0.7608901784540225,
+      "learning_rate": 0.00297,
+      "loss": 4.4296,
+      "step": 990
+    },
+    {
+      "epoch": 0.00991,
+      "grad_norm": 0.8136541225519112,
+      "learning_rate": 0.002973,
+      "loss": 4.4314,
+      "step": 991
+    },
+    {
+      "epoch": 0.00992,
+      "grad_norm": 0.7865701920195308,
+      "learning_rate": 0.002976,
+      "loss": 4.4266,
+      "step": 992
+    },
+    {
+      "epoch": 0.00993,
+      "grad_norm": 0.8315737176917932,
+      "learning_rate": 0.002979,
+      "loss": 4.4267,
+      "step": 993
+    },
+    {
+      "epoch": 0.00994,
+      "grad_norm": 0.7821802322270756,
+      "learning_rate": 0.002982,
+      "loss": 4.4281,
+      "step": 994
+    },
+    {
+      "epoch": 0.00995,
+      "grad_norm": 0.8705272973695986,
+      "learning_rate": 0.0029850000000000002,
+      "loss": 4.4331,
+      "step": 995
+    },
+    {
+      "epoch": 0.00996,
+      "grad_norm": 0.9954881536889274,
+      "learning_rate": 0.002988,
+      "loss": 4.4408,
+      "step": 996
+    },
+    {
+      "epoch": 0.00997,
+      "grad_norm": 0.9720370341600497,
+      "learning_rate": 0.002991,
+      "loss": 4.4354,
+      "step": 997
+    },
+    {
+      "epoch": 0.00998,
+      "grad_norm": 0.7522042168889891,
+      "learning_rate": 0.002994,
+      "loss": 4.4331,
+      "step": 998
+    },
+    {
+      "epoch": 0.00999,
+      "grad_norm": 0.9425882614336212,
+      "learning_rate": 0.002997,
+      "loss": 4.4261,
+      "step": 999
+    },
+    {
+      "epoch": 0.01,
+      "grad_norm": 0.9689827453070083,
+      "learning_rate": 0.003,
+      "loss": 4.458,
+      "step": 1000
+    },
+    {
+      "epoch": 0.01001,
+      "grad_norm": 0.9802085236972514,
+      "learning_rate": 0.003,
+      "loss": 4.4569,
+      "step": 1001
+    },
+    {
+      "epoch": 0.01002,
+      "grad_norm": 0.9742392022619593,
+      "learning_rate": 0.003,
+      "loss": 4.4262,
+      "step": 1002
+    },
+    {
+      "epoch": 0.01003,
+      "grad_norm": 0.9115330327806416,
+      "learning_rate": 0.003,
+      "loss": 4.4513,
+      "step": 1003
+    },
+    {
+      "epoch": 0.01004,
+      "grad_norm": 0.8562148232052564,
+      "learning_rate": 0.003,
+      "loss": 4.4397,
+      "step": 1004
+    },
+    {
+      "epoch": 0.01005,
+      "grad_norm": 0.9105663755998641,
+      "learning_rate": 0.003,
+      "loss": 4.4154,
+      "step": 1005
+    },
+    {
+      "epoch": 0.01006,
+      "grad_norm": 1.018045955439956,
+      "learning_rate": 0.003,
+      "loss": 4.433,
+      "step": 1006
+    },
+    {
+      "epoch": 0.01007,
+      "grad_norm": 0.9357911521230117,
+      "learning_rate": 0.003,
+      "loss": 4.4713,
+      "step": 1007
+    },
+    {
+      "epoch": 0.01008,
+      "grad_norm": 0.9681416399703533,
+      "learning_rate": 0.003,
+      "loss": 4.4846,
+      "step": 1008
+    },
+    {
+      "epoch": 0.01009,
+      "grad_norm": 1.030244827646589,
+      "learning_rate": 0.003,
+      "loss": 4.4619,
+      "step": 1009
+    },
+    {
+      "epoch": 0.0101,
+      "grad_norm": 1.0010362528613534,
+      "learning_rate": 0.003,
+      "loss": 4.4688,
+      "step": 1010
+    },
+    {
+      "epoch": 0.01011,
+      "grad_norm": 1.0284858383133122,
+      "learning_rate": 0.003,
+      "loss": 4.4881,
+      "step": 1011
+    },
+    {
+      "epoch": 0.01012,
+      "grad_norm": 0.9778345429910184,
+      "learning_rate": 0.003,
+      "loss": 4.4674,
+      "step": 1012
+    },
+    {
+      "epoch": 0.01013,
+      "grad_norm": 0.8705611016855861,
+      "learning_rate": 0.003,
+      "loss": 4.4919,
+      "step": 1013
+    },
+    {
+      "epoch": 0.01014,
+      "grad_norm": 0.8083144078788229,
+      "learning_rate": 0.003,
+      "loss": 4.4268,
+      "step": 1014
+    },
+    {
+      "epoch": 0.01015,
+      "grad_norm": 0.7155205086193928,
+      "learning_rate": 0.003,
+      "loss": 4.4657,
+      "step": 1015
+    },
+    {
+      "epoch": 0.01016,
+      "grad_norm": 0.5844948538982189,
+      "learning_rate": 0.003,
+      "loss": 4.466,
+      "step": 1016
+    },
+    {
+      "epoch": 0.01017,
+      "grad_norm": 0.50419422786756,
+      "learning_rate": 0.003,
+      "loss": 4.4019,
+      "step": 1017
+    },
+    {
+      "epoch": 0.01018,
+      "grad_norm": 0.5273528303530755,
+      "learning_rate": 0.003,
+      "loss": 4.4545,
+      "step": 1018
+    },
+    {
+      "epoch": 0.01019,
+      "grad_norm": 0.5329688106940915,
+      "learning_rate": 0.003,
+      "loss": 4.4315,
+      "step": 1019
+    },
+    {
+      "epoch": 0.0102,
+      "grad_norm": 0.6285459843623249,
+      "learning_rate": 0.003,
+      "loss": 4.4292,
+      "step": 1020
+    },
+    {
+      "epoch": 0.01021,
+      "grad_norm": 0.6444525489483212,
+      "learning_rate": 0.003,
+      "loss": 4.412,
+      "step": 1021
+    },
+    {
+      "epoch": 0.01022,
+      "grad_norm": 0.6609826768689684,
+      "learning_rate": 0.003,
+      "loss": 4.4194,
+      "step": 1022
+    },
+    {
+      "epoch": 0.01023,
+      "grad_norm": 0.6479610287689606,
+      "learning_rate": 0.003,
+      "loss": 4.4151,
+      "step": 1023
+    },
+    {
+      "epoch": 0.01024,
+      "grad_norm": 0.7091931447524652,
+      "learning_rate": 0.003,
+      "loss": 4.3798,
+      "step": 1024
+    },
+    {
+      "epoch": 0.01025,
+      "grad_norm": 0.7026391214213478,
+      "learning_rate": 0.003,
+      "loss": 4.4116,
+      "step": 1025
+    },
+    {
+      "epoch": 0.01026,
+      "grad_norm": 0.6731332289892269,
+      "learning_rate": 0.003,
+      "loss": 4.3991,
+      "step": 1026
+    },
+    {
+      "epoch": 0.01027,
+      "grad_norm": 0.6590644472165706,
+      "learning_rate": 0.003,
+      "loss": 4.4177,
+      "step": 1027
+    },
+    {
+      "epoch": 0.01028,
+      "grad_norm": 0.7485101036485022,
+      "learning_rate": 0.003,
+      "loss": 4.4014,
+      "step": 1028
+    },
+    {
+      "epoch": 0.01029,
+      "grad_norm": 0.7198477689690366,
+      "learning_rate": 0.003,
+      "loss": 4.3803,
+      "step": 1029
+    },
+    {
+      "epoch": 0.0103,
+      "grad_norm": 0.5542753635749327,
+      "learning_rate": 0.003,
+      "loss": 4.4023,
+      "step": 1030
+    },
+    {
+      "epoch": 0.01031,
+      "grad_norm": 0.5292390629019561,
+      "learning_rate": 0.003,
+      "loss": 4.4144,
+      "step": 1031
+    },
+    {
+      "epoch": 0.01032,
+      "grad_norm": 0.500926566427153,
+      "learning_rate": 0.003,
+      "loss": 4.3837,
+      "step": 1032
+    },
+    {
+      "epoch": 0.01033,
+      "grad_norm": 0.49056326531128164,
+      "learning_rate": 0.003,
+      "loss": 4.4201,
+      "step": 1033
+    },
+    {
+      "epoch": 0.01034,
+      "grad_norm": 0.5309292795236984,
+      "learning_rate": 0.003,
+      "loss": 4.3865,
+      "step": 1034
+    },
+    {
+      "epoch": 0.01035,
+      "grad_norm": 0.6084296376545847,
+      "learning_rate": 0.003,
+      "loss": 4.3892,
+      "step": 1035
+    },
+    {
+      "epoch": 0.01036,
+      "grad_norm": 0.8615895093148164,
+      "learning_rate": 0.003,
+      "loss": 4.3883,
+      "step": 1036
+    },
+    {
+      "epoch": 0.01037,
+      "grad_norm": 0.8936988458454226,
+      "learning_rate": 0.003,
+      "loss": 4.4336,
+      "step": 1037
+    },
+    {
+      "epoch": 0.01038,
+      "grad_norm": 0.6948017038229403,
+      "learning_rate": 0.003,
+      "loss": 4.4116,
+      "step": 1038
+    },
+    {
+      "epoch": 0.01039,
+      "grad_norm": 0.9114790426144561,
+      "learning_rate": 0.003,
+      "loss": 4.4152,
+      "step": 1039
+    },
+    {
+      "epoch": 0.0104,
+      "grad_norm": 0.8448966818619524,
+      "learning_rate": 0.003,
+      "loss": 4.4288,
+      "step": 1040
+    },
+    {
+      "epoch": 0.01041,
+      "grad_norm": 0.7995140521375168,
+      "learning_rate": 0.003,
+      "loss": 4.404,
+      "step": 1041
+    },
+    {
+      "epoch": 0.01042,
+      "grad_norm": 0.8979223927667839,
+      "learning_rate": 0.003,
+      "loss": 4.4357,
+      "step": 1042
+    },
+    {
+      "epoch": 0.01043,
+      "grad_norm": 0.7233892787514891,
+      "learning_rate": 0.003,
+      "loss": 4.3775,
+      "step": 1043
+    },
+    {
+      "epoch": 0.01044,
+      "grad_norm": 0.782866039824708,
+      "learning_rate": 0.003,
+      "loss": 4.4144,
+      "step": 1044
+    },
+    {
+      "epoch": 0.01045,
+      "grad_norm": 0.841755171402396,
+      "learning_rate": 0.003,
+      "loss": 4.4353,
+      "step": 1045
+    },
+    {
+      "epoch": 0.01046,
+      "grad_norm": 0.780348729633882,
+      "learning_rate": 0.003,
+      "loss": 4.4043,
+      "step": 1046
+    },
+    {
+      "epoch": 0.01047,
+      "grad_norm": 0.8623775519511051,
+      "learning_rate": 0.003,
+      "loss": 4.4135,
+      "step": 1047
+    },
+    {
+      "epoch": 0.01048,
+      "grad_norm": 0.9009461265034386,
+      "learning_rate": 0.003,
+      "loss": 4.4049,
+      "step": 1048
+    },
+    {
+      "epoch": 0.01049,
+      "grad_norm": 0.739794637514069,
+      "learning_rate": 0.003,
+      "loss": 4.3786,
+      "step": 1049
+    },
+    {
+      "epoch": 0.0105,
+      "grad_norm": 0.6259101207085414,
+      "learning_rate": 0.003,
+      "loss": 4.4154,
+      "step": 1050
+    },
+    {
+      "epoch": 0.01051,
+      "grad_norm": 0.6694575857098493,
+      "learning_rate": 0.003,
+      "loss": 4.393,
+      "step": 1051
+    },
+    {
+      "epoch": 0.01052,
+      "grad_norm": 0.7141380180592181,
+      "learning_rate": 0.003,
+      "loss": 4.4052,
+      "step": 1052
+    },
+    {
+      "epoch": 0.01053,
+      "grad_norm": 0.7943106393078887,
+      "learning_rate": 0.003,
+      "loss": 4.4143,
+      "step": 1053
+    },
+    {
+      "epoch": 0.01054,
+      "grad_norm": 0.7478655479661611,
+      "learning_rate": 0.003,
+      "loss": 4.4077,
+      "step": 1054
+    },
+    {
+      "epoch": 0.01055,
+      "grad_norm": 0.707966249364238,
+      "learning_rate": 0.003,
+      "loss": 4.3861,
+      "step": 1055
+    },
+    {
+      "epoch": 0.01056,
+      "grad_norm": 0.6358438963919717,
+      "learning_rate": 0.003,
+      "loss": 4.3823,
+      "step": 1056
+    },
+    {
+      "epoch": 0.01057,
+      "grad_norm": 0.5691063929797074,
+      "learning_rate": 0.003,
+      "loss": 4.3849,
+      "step": 1057
+    },
+    {
+      "epoch": 0.01058,
+      "grad_norm": 0.5109096197008457,
+      "learning_rate": 0.003,
+      "loss": 4.3737,
+      "step": 1058
+    },
+    {
+      "epoch": 0.01059,
+      "grad_norm": 0.419033279294021,
+      "learning_rate": 0.003,
+      "loss": 4.3749,
+      "step": 1059
+    },
+    {
+      "epoch": 0.0106,
+      "grad_norm": 0.4506837031966603,
+      "learning_rate": 0.003,
+      "loss": 4.3654,
+      "step": 1060
+    },
+    {
+      "epoch": 0.01061,
+      "grad_norm": 0.42662194113596513,
+      "learning_rate": 0.003,
+      "loss": 4.361,
+      "step": 1061
+    },
+    {
+      "epoch": 0.01062,
+      "grad_norm": 0.5018523046776621,
+      "learning_rate": 0.003,
+      "loss": 4.3568,
+      "step": 1062
+    },
+    {
+      "epoch": 0.01063,
+      "grad_norm": 0.6580287488917459,
+      "learning_rate": 0.003,
+      "loss": 4.3552,
+      "step": 1063
+    },
+    {
+      "epoch": 0.01064,
+      "grad_norm": 0.831793270479749,
+      "learning_rate": 0.003,
+      "loss": 4.3844,
+      "step": 1064
+    },
+    {
+      "epoch": 0.01065,
+      "grad_norm": 1.0701105543701324,
+      "learning_rate": 0.003,
+      "loss": 4.3903,
+      "step": 1065
+    },
+    {
+      "epoch": 0.01066,
+      "grad_norm": 0.943731953453328,
+      "learning_rate": 0.003,
+      "loss": 4.4118,
+      "step": 1066
+    },
+    {
+      "epoch": 0.01067,
+      "grad_norm": 0.9236025360095688,
+      "learning_rate": 0.003,
+      "loss": 4.4136,
+      "step": 1067
+    },
+    {
+      "epoch": 0.01068,
+      "grad_norm": 1.1282330525862354,
+      "learning_rate": 0.003,
+      "loss": 4.4118,
+      "step": 1068
+    },
+    {
+      "epoch": 0.01069,
+      "grad_norm": 0.840410887973158,
+      "learning_rate": 0.003,
+      "loss": 4.4054,
+      "step": 1069
+    },
+    {
+      "epoch": 0.0107,
+      "grad_norm": 0.733631994312363,
+      "learning_rate": 0.003,
+      "loss": 4.4566,
+      "step": 1070
+    },
+    {
+      "epoch": 0.01071,
+      "grad_norm": 0.7799052277350242,
+      "learning_rate": 0.003,
+      "loss": 4.3774,
+      "step": 1071
+    },
+    {
+      "epoch": 0.01072,
+      "grad_norm": 0.7960071687741883,
+      "learning_rate": 0.003,
+      "loss": 4.4136,
+      "step": 1072
+    },
+    {
+      "epoch": 0.01073,
+      "grad_norm": 0.6975430946737453,
+      "learning_rate": 0.003,
+      "loss": 4.4079,
+      "step": 1073
+    },
+    {
+      "epoch": 0.01074,
+      "grad_norm": 0.6813769264433432,
+      "learning_rate": 0.003,
+      "loss": 4.37,
+      "step": 1074
+    },
+    {
+      "epoch": 0.01075,
+      "grad_norm": 0.5971783233576602,
+      "learning_rate": 0.003,
+      "loss": 4.4063,
+      "step": 1075
+    },
+    {
+      "epoch": 0.01076,
+      "grad_norm": 0.5510713740534053,
+      "learning_rate": 0.003,
+      "loss": 4.3967,
+      "step": 1076
+    },
+    {
+      "epoch": 0.01077,
+      "grad_norm": 0.5595097233894012,
+      "learning_rate": 0.003,
+      "loss": 4.3917,
+      "step": 1077
+    },
+    {
+      "epoch": 0.01078,
+      "grad_norm": 0.6230934184673731,
+      "learning_rate": 0.003,
+      "loss": 4.3491,
+      "step": 1078
+    },
+    {
+      "epoch": 0.01079,
+      "grad_norm": 0.7178707958532615,
+      "learning_rate": 0.003,
+      "loss": 4.3825,
+      "step": 1079
+    },
+    {
+      "epoch": 0.0108,
+      "grad_norm": 0.7484447643520958,
+      "learning_rate": 0.003,
+      "loss": 4.3665,
+      "step": 1080
+    },
+    {
+      "epoch": 0.01081,
+      "grad_norm": 0.780034137982597,
+      "learning_rate": 0.003,
+      "loss": 4.3727,
+      "step": 1081
+    },
+    {
+      "epoch": 0.01082,
+      "grad_norm": 0.7403701975238451,
+      "learning_rate": 0.003,
+      "loss": 4.3894,
+      "step": 1082
+    },
+    {
+      "epoch": 0.01083,
+      "grad_norm": 0.6305537452051372,
+      "learning_rate": 0.003,
+      "loss": 4.3733,
+      "step": 1083
+    },
+    {
+      "epoch": 0.01084,
+      "grad_norm": 0.7435200213630843,
+      "learning_rate": 0.003,
+      "loss": 4.3825,
+      "step": 1084
+    },
+    {
+      "epoch": 0.01085,
+      "grad_norm": 0.7405129376016774,
+      "learning_rate": 0.003,
+      "loss": 4.3735,
+      "step": 1085
+    },
+    {
+      "epoch": 0.01086,
+      "grad_norm": 0.6037526750507048,
+      "learning_rate": 0.003,
+      "loss": 4.3449,
+      "step": 1086
+    },
+    {
+      "epoch": 0.01087,
+      "grad_norm": 0.5349574840042758,
+      "learning_rate": 0.003,
+      "loss": 4.3549,
+      "step": 1087
+    },
+    {
+      "epoch": 0.01088,
+      "grad_norm": 0.5555341529780033,
+      "learning_rate": 0.003,
+      "loss": 4.3917,
+      "step": 1088
+    },
+    {
+      "epoch": 0.01089,
+      "grad_norm": 0.6254527345250118,
+      "learning_rate": 0.003,
+      "loss": 4.3599,
+      "step": 1089
+    },
+    {
+      "epoch": 0.0109,
+      "grad_norm": 0.6828156926551208,
+      "learning_rate": 0.003,
+      "loss": 4.3884,
+      "step": 1090
+    },
+    {
+      "epoch": 0.01091,
+      "grad_norm": 0.5753021180468998,
+      "learning_rate": 0.003,
+      "loss": 4.3549,
+      "step": 1091
+    },
+    {
+      "epoch": 0.01092,
+      "grad_norm": 0.4077341561870379,
+      "learning_rate": 0.003,
+      "loss": 4.3436,
+      "step": 1092
+    },
+    {
+      "epoch": 0.01093,
+      "grad_norm": 0.47615819716416974,
+      "learning_rate": 0.003,
+      "loss": 4.3664,
+      "step": 1093
+    },
+    {
+      "epoch": 0.01094,
+      "grad_norm": 0.5845002574061751,
+      "learning_rate": 0.003,
+      "loss": 4.3553,
+      "step": 1094
+    },
+    {
+      "epoch": 0.01095,
+      "grad_norm": 0.8583788887394415,
+      "learning_rate": 0.003,
+      "loss": 4.3944,
+      "step": 1095
+    },
+    {
+      "epoch": 0.01096,
+      "grad_norm": 1.0479503783625854,
+      "learning_rate": 0.003,
+      "loss": 4.3824,
+      "step": 1096
+    },
+    {
+      "epoch": 0.01097,
+      "grad_norm": 0.9584408455564232,
+      "learning_rate": 0.003,
+      "loss": 4.3695,
+      "step": 1097
+    },
+    {
+      "epoch": 0.01098,
+      "grad_norm": 1.2545731298718157,
+      "learning_rate": 0.003,
+      "loss": 4.4068,
+      "step": 1098
+    },
+    {
+      "epoch": 0.01099,
+      "grad_norm": 0.8305012272413557,
+      "learning_rate": 0.003,
+      "loss": 4.3706,
+      "step": 1099
+    },
+    {
+      "epoch": 0.011,
+      "grad_norm": 0.7311868902259275,
+      "learning_rate": 0.003,
+      "loss": 4.3677,
+      "step": 1100
+    },
+    {
+      "epoch": 0.01101,
+      "grad_norm": 0.6742597336847759,
+      "learning_rate": 0.003,
+      "loss": 4.3752,
+      "step": 1101
+    },
+    {
+      "epoch": 0.01102,
+      "grad_norm": 0.6773795133108548,
+      "learning_rate": 0.003,
+      "loss": 4.3802,
+      "step": 1102
+    },
+    {
+      "epoch": 0.01103,
+      "grad_norm": 0.6414092553075785,
+      "learning_rate": 0.003,
+      "loss": 4.3701,
+      "step": 1103
+    },
+    {
+      "epoch": 0.01104,
+      "grad_norm": 0.6378763268399013,
+      "learning_rate": 0.003,
+      "loss": 4.3589,
+      "step": 1104
+    },
+    {
+      "epoch": 0.01105,
+      "grad_norm": 0.6837675260878959,
+      "learning_rate": 0.003,
+      "loss": 4.3856,
+      "step": 1105
+    },
+    {
+      "epoch": 0.01106,
+      "grad_norm": 0.667950958790211,
+      "learning_rate": 0.003,
+      "loss": 4.3574,
+      "step": 1106
+    },
+    {
+      "epoch": 0.01107,
+      "grad_norm": 0.6082482122734244,
+      "learning_rate": 0.003,
+      "loss": 4.3623,
+      "step": 1107
+    },
+    {
+      "epoch": 0.01108,
+      "grad_norm": 0.5983800302348057,
+      "learning_rate": 0.003,
+      "loss": 4.3313,
+      "step": 1108
+    },
+    {
+      "epoch": 0.01109,
+      "grad_norm": 0.6046679697278923,
+      "learning_rate": 0.003,
+      "loss": 4.3476,
+      "step": 1109
+    },
+    {
+      "epoch": 0.0111,
+      "grad_norm": 0.6353880163110637,
+      "learning_rate": 0.003,
+      "loss": 4.3675,
+      "step": 1110
+    },
+    {
+      "epoch": 0.01111,
+      "grad_norm": 0.7049628214149826,
+      "learning_rate": 0.003,
+      "loss": 4.3473,
+      "step": 1111
+    },
+    {
+      "epoch": 0.01112,
+      "grad_norm": 0.758960503794264,
+      "learning_rate": 0.003,
+      "loss": 4.369,
+      "step": 1112
+    },
+    {
+      "epoch": 0.01113,
+      "grad_norm": 0.6091686772357772,
+      "learning_rate": 0.003,
+      "loss": 4.3335,
+      "step": 1113
+    },
+    {
+      "epoch": 0.01114,
+      "grad_norm": 0.628169460357088,
+      "learning_rate": 0.003,
+      "loss": 4.3462,
+      "step": 1114
+    },
+    {
+      "epoch": 0.01115,
+      "grad_norm": 0.7453432538813878,
+      "learning_rate": 0.003,
+      "loss": 4.3779,
+      "step": 1115
+    },
+    {
+      "epoch": 0.01116,
+      "grad_norm": 0.9959163456029771,
+      "learning_rate": 0.003,
+      "loss": 4.3771,
+      "step": 1116
+    },
+    {
+      "epoch": 0.01117,
+      "grad_norm": 0.9566018042437344,
+      "learning_rate": 0.003,
+      "loss": 4.3915,
+      "step": 1117
+    },
+    {
+      "epoch": 0.01118,
+      "grad_norm": 0.6794219206934775,
+      "learning_rate": 0.003,
+      "loss": 4.372,
+      "step": 1118
+    },
+    {
+      "epoch": 0.01119,
+      "grad_norm": 0.6027310157842346,
+      "learning_rate": 0.003,
+      "loss": 4.375,
+      "step": 1119
+    },
+    {
+      "epoch": 0.0112,
+      "grad_norm": 0.6064414406921254,
+      "learning_rate": 0.003,
+      "loss": 4.3588,
+      "step": 1120
+    },
+    {
+      "epoch": 0.01121,
+      "grad_norm": 0.643465354938861,
+      "learning_rate": 0.003,
+      "loss": 4.3691,
+      "step": 1121
+    },
+    {
+      "epoch": 0.01122,
+      "grad_norm": 0.8439491151148678,
+      "learning_rate": 0.003,
+      "loss": 4.3539,
+      "step": 1122
+    },
+    {
+      "epoch": 0.01123,
+      "grad_norm": 0.857123487934385,
+      "learning_rate": 0.003,
+      "loss": 4.345,
+      "step": 1123
+    },
+    {
+      "epoch": 0.01124,
+      "grad_norm": 0.7412296015988188,
+      "learning_rate": 0.003,
+      "loss": 4.3865,
+      "step": 1124
+    },
+    {
+      "epoch": 0.01125,
+      "grad_norm": 0.8143761816040683,
+      "learning_rate": 0.003,
+      "loss": 4.3583,
+      "step": 1125
+    },
+    {
+      "epoch": 0.01126,
+      "grad_norm": 0.741767094028342,
+      "learning_rate": 0.003,
+      "loss": 4.3618,
+      "step": 1126
+    },
+    {
+      "epoch": 0.01127,
+      "grad_norm": 0.737420855232079,
+      "learning_rate": 0.003,
+      "loss": 4.3948,
+      "step": 1127
+    },
+    {
+      "epoch": 0.01128,
+      "grad_norm": 0.7656500975813709,
+      "learning_rate": 0.003,
+      "loss": 4.372,
+      "step": 1128
+    },
+    {
+      "epoch": 0.01129,
+      "grad_norm": 0.8248659664813461,
+      "learning_rate": 0.003,
+      "loss": 4.3685,
+      "step": 1129
+    },
+    {
+      "epoch": 0.0113,
+      "grad_norm": 0.7756542452356111,
+      "learning_rate": 0.003,
+      "loss": 4.3447,
+      "step": 1130
+    },
+    {
+      "epoch": 0.01131,
+      "grad_norm": 0.7965849423705411,
+      "learning_rate": 0.003,
+      "loss": 4.3615,
+      "step": 1131
+    },
+    {
+      "epoch": 0.01132,
+      "grad_norm": 0.718692737781693,
+      "learning_rate": 0.003,
+      "loss": 4.3824,
+      "step": 1132
+    },
+    {
+      "epoch": 0.01133,
+      "grad_norm": 0.6500905111846983,
+      "learning_rate": 0.003,
+      "loss": 4.34,
+      "step": 1133
+    },
+    {
+      "epoch": 0.01134,
+      "grad_norm": 0.6052834019155894,
+      "learning_rate": 0.003,
+      "loss": 4.3525,
+      "step": 1134
+    },
+    {
+      "epoch": 0.01135,
+      "grad_norm": 0.57728723025885,
+      "learning_rate": 0.003,
+      "loss": 4.3767,
+      "step": 1135
+    },
+    {
+      "epoch": 0.01136,
+      "grad_norm": 0.6622614298653483,
+      "learning_rate": 0.003,
+      "loss": 4.3514,
+      "step": 1136
+    },
+    {
+      "epoch": 0.01137,
+      "grad_norm": 0.6945355696134266,
+      "learning_rate": 0.003,
+      "loss": 4.3784,
+      "step": 1137
+    },
+    {
+      "epoch": 0.01138,
+      "grad_norm": 0.7423434054596434,
+      "learning_rate": 0.003,
+      "loss": 4.3361,
+      "step": 1138
+    },
+    {
+      "epoch": 0.01139,
+      "grad_norm": 0.7678127530736037,
+      "learning_rate": 0.003,
+      "loss": 4.3463,
+      "step": 1139
+    },
+    {
+      "epoch": 0.0114,
+      "grad_norm": 0.7470822118949633,
+      "learning_rate": 0.003,
+      "loss": 4.3322,
+      "step": 1140
+    },
+    {
+      "epoch": 0.01141,
+      "grad_norm": 0.903198095826591,
+      "learning_rate": 0.003,
+      "loss": 4.3608,
+      "step": 1141
+    },
+    {
+      "epoch": 0.01142,
+      "grad_norm": 1.0797547624524322,
+      "learning_rate": 0.003,
+      "loss": 4.3612,
+      "step": 1142
+    },
+    {
+      "epoch": 0.01143,
+      "grad_norm": 0.7877548466868329,
+      "learning_rate": 0.003,
+      "loss": 4.3542,
+      "step": 1143
+    },
+    {
+      "epoch": 0.01144,
+      "grad_norm": 0.8147882944795127,
+      "learning_rate": 0.003,
+      "loss": 4.3253,
+      "step": 1144
+    },
+    {
+      "epoch": 0.01145,
+      "grad_norm": 0.7148112106554612,
+      "learning_rate": 0.003,
+      "loss": 4.3655,
+      "step": 1145
+    },
+    {
+      "epoch": 0.01146,
+      "grad_norm": 0.6580426381015894,
+      "learning_rate": 0.003,
+      "loss": 4.3445,
+      "step": 1146
+    },
+    {
+      "epoch": 0.01147,
+      "grad_norm": 0.7448256416265617,
+      "learning_rate": 0.003,
+      "loss": 4.3367,
+      "step": 1147
+    },
+    {
+      "epoch": 0.01148,
+      "grad_norm": 0.6968832253815664,
+      "learning_rate": 0.003,
+      "loss": 4.3386,
+      "step": 1148
+    },
+    {
+      "epoch": 0.01149,
+      "grad_norm": 0.627190656091196,
+      "learning_rate": 0.003,
+      "loss": 4.3527,
+      "step": 1149
+    },
+    {
+      "epoch": 0.0115,
+      "grad_norm": 0.6739971655272956,
+      "learning_rate": 0.003,
+      "loss": 4.3445,
+      "step": 1150
+    },
+    {
+      "epoch": 0.01151,
+      "grad_norm": 0.7518825421443525,
+      "learning_rate": 0.003,
+      "loss": 4.3445,
+      "step": 1151
+    },
+    {
+      "epoch": 0.01152,
+      "grad_norm": 0.7628403894732813,
+      "learning_rate": 0.003,
+      "loss": 4.3529,
+      "step": 1152
+    },
+    {
+      "epoch": 0.01153,
+      "grad_norm": 0.726109999889872,
+      "learning_rate": 0.003,
+      "loss": 4.3668,
+      "step": 1153
+    },
+    {
+      "epoch": 0.01154,
+      "grad_norm": 0.7747132516080261,
+      "learning_rate": 0.003,
+      "loss": 4.3438,
+      "step": 1154
+    },
+    {
+      "epoch": 0.01155,
+      "grad_norm": 0.8180276502719757,
+      "learning_rate": 0.003,
+      "loss": 4.3369,
+      "step": 1155
+    },
+    {
+      "epoch": 0.01156,
+      "grad_norm": 0.7757994684934312,
+      "learning_rate": 0.003,
+      "loss": 4.3405,
+      "step": 1156
+    },
+    {
+      "epoch": 0.01157,
+      "grad_norm": 0.7233155240278132,
+      "learning_rate": 0.003,
+      "loss": 4.3659,
+      "step": 1157
+    },
+    {
+      "epoch": 0.01158,
+      "grad_norm": 0.6552894912204377,
+      "learning_rate": 0.003,
+      "loss": 4.332,
+      "step": 1158
+    },
+    {
+      "epoch": 0.01159,
+      "grad_norm": 0.7654422010962233,
+      "learning_rate": 0.003,
+      "loss": 4.3316,
+      "step": 1159
+    },
+    {
+      "epoch": 0.0116,
+      "grad_norm": 0.7129248873282286,
+      "learning_rate": 0.003,
+      "loss": 4.3462,
+      "step": 1160
+    },
+    {
+      "epoch": 0.01161,
+      "grad_norm": 0.728340877307682,
+      "learning_rate": 0.003,
+      "loss": 4.3213,
+      "step": 1161
+    },
+    {
+      "epoch": 0.01162,
+      "grad_norm": 0.6672912532139038,
+      "learning_rate": 0.003,
+      "loss": 4.3344,
+      "step": 1162
+    },
+    {
+      "epoch": 0.01163,
+      "grad_norm": 0.6087688372398936,
+      "learning_rate": 0.003,
+      "loss": 4.3512,
+      "step": 1163
+    },
+    {
+      "epoch": 0.01164,
+      "grad_norm": 0.5012921787632111,
+      "learning_rate": 0.003,
+      "loss": 4.3329,
+      "step": 1164
+    },
+    {
+      "epoch": 0.01165,
+      "grad_norm": 0.4972192156782153,
+      "learning_rate": 0.003,
+      "loss": 4.3277,
+      "step": 1165
+    },
+    {
+      "epoch": 0.01166,
+      "grad_norm": 0.48804976467805194,
+      "learning_rate": 0.003,
+      "loss": 4.3262,
+      "step": 1166
+    },
+    {
+      "epoch": 0.01167,
+      "grad_norm": 0.5481538887564317,
+      "learning_rate": 0.003,
+      "loss": 4.2952,
+      "step": 1167
+    },
+    {
+      "epoch": 0.01168,
+      "grad_norm": 0.6090792597366076,
+      "learning_rate": 0.003,
+      "loss": 4.3026,
+      "step": 1168
+    },
+    {
+      "epoch": 0.01169,
+      "grad_norm": 0.653085333922602,
+      "learning_rate": 0.003,
+      "loss": 4.347,
+      "step": 1169
+    },
+    {
+      "epoch": 0.0117,
+      "grad_norm": 0.62436544437129,
+      "learning_rate": 0.003,
+      "loss": 4.3309,
+      "step": 1170
+    },
+    {
+      "epoch": 0.01171,
+      "grad_norm": 0.654802486268078,
+      "learning_rate": 0.003,
+      "loss": 4.3143,
+      "step": 1171
+    },
+    {
+      "epoch": 0.01172,
+      "grad_norm": 0.6246624927886145,
+      "learning_rate": 0.003,
+      "loss": 4.3038,
+      "step": 1172
+    },
+    {
+      "epoch": 0.01173,
+      "grad_norm": 0.6813303457409844,
+      "learning_rate": 0.003,
+      "loss": 4.3382,
+      "step": 1173
+    },
+    {
+      "epoch": 0.01174,
+      "grad_norm": 0.7721136771646261,
+      "learning_rate": 0.003,
+      "loss": 4.3203,
+      "step": 1174
+    },
+    {
+      "epoch": 0.01175,
+      "grad_norm": 0.823782632958531,
+      "learning_rate": 0.003,
+      "loss": 4.3152,
+      "step": 1175
+    },
+    {
+      "epoch": 0.01176,
+      "grad_norm": 0.9763749462159719,
+      "learning_rate": 0.003,
+      "loss": 4.3509,
+      "step": 1176
+    },
+    {
+      "epoch": 0.01177,
+      "grad_norm": 1.0894015492716702,
+      "learning_rate": 0.003,
+      "loss": 4.3444,
+      "step": 1177
+    },
+    {
+      "epoch": 0.01178,
+      "grad_norm": 0.8156537670820067,
+      "learning_rate": 0.003,
+      "loss": 4.3473,
+      "step": 1178
+    },
+    {
+      "epoch": 0.01179,
+      "grad_norm": 0.8529362760715462,
+      "learning_rate": 0.003,
+      "loss": 4.3276,
+      "step": 1179
+    },
+    {
+      "epoch": 0.0118,
+      "grad_norm": 0.857392382910556,
+      "learning_rate": 0.003,
+      "loss": 4.395,
+      "step": 1180
+    },
+    {
+      "epoch": 0.01181,
+      "grad_norm": 0.8128436446210868,
+      "learning_rate": 0.003,
+      "loss": 4.3809,
+      "step": 1181
+    },
+    {
+      "epoch": 0.01182,
+      "grad_norm": 0.911584010746237,
+      "learning_rate": 0.003,
+      "loss": 4.3433,
+      "step": 1182
+    },
+    {
+      "epoch": 0.01183,
+      "grad_norm": 0.8460613119287651,
+      "learning_rate": 0.003,
+      "loss": 4.337,
+      "step": 1183
+    },
+    {
+      "epoch": 0.01184,
+      "grad_norm": 0.7730717809982315,
+      "learning_rate": 0.003,
+      "loss": 4.3753,
+      "step": 1184
+    },
+    {
+      "epoch": 0.01185,
+      "grad_norm": 0.7718355908910234,
+      "learning_rate": 0.003,
+      "loss": 4.3449,
+      "step": 1185
+    },
+    {
+      "epoch": 0.01186,
+      "grad_norm": 0.749856576054878,
+      "learning_rate": 0.003,
+      "loss": 4.3669,
+      "step": 1186
+    },
+    {
+      "epoch": 0.01187,
+      "grad_norm": 0.6703031442863907,
+      "learning_rate": 0.003,
+      "loss": 4.33,
+      "step": 1187
+    },
+    {
+      "epoch": 0.01188,
+      "grad_norm": 0.6597409636847196,
+      "learning_rate": 0.003,
+      "loss": 4.3387,
+      "step": 1188
+    },
+    {
+      "epoch": 0.01189,
+      "grad_norm": 0.7528540145175824,
+      "learning_rate": 0.003,
+      "loss": 4.3431,
+      "step": 1189
+    },
+    {
+      "epoch": 0.0119,
+      "grad_norm": 0.7990197433249405,
+      "learning_rate": 0.003,
+      "loss": 4.3273,
+      "step": 1190
+    },
+    {
+      "epoch": 0.01191,
+      "grad_norm": 0.8012185403960035,
+      "learning_rate": 0.003,
+      "loss": 4.3081,
+      "step": 1191
+    },
+    {
+      "epoch": 0.01192,
+      "grad_norm": 0.7571323847445259,
+      "learning_rate": 0.003,
+      "loss": 4.3381,
+      "step": 1192
+    },
+    {
+      "epoch": 0.01193,
+      "grad_norm": 0.6624809456477357,
+      "learning_rate": 0.003,
+      "loss": 4.3371,
+      "step": 1193
+    },
+    {
+      "epoch": 0.01194,
+      "grad_norm": 0.7303795208901569,
+      "learning_rate": 0.003,
+      "loss": 4.3395,
+      "step": 1194
+    },
+    {
+      "epoch": 0.01195,
+      "grad_norm": 0.7841704264970735,
+      "learning_rate": 0.003,
+      "loss": 4.3211,
+      "step": 1195
+    },
+    {
+      "epoch": 0.01196,
+      "grad_norm": 0.8218420512118195,
+      "learning_rate": 0.003,
+      "loss": 4.3286,
+      "step": 1196
+    },
+    {
+      "epoch": 0.01197,
+      "grad_norm": 0.6346054129298183,
+      "learning_rate": 0.003,
+      "loss": 4.2874,
+      "step": 1197
+    },
+    {
+      "epoch": 0.01198,
+      "grad_norm": 0.648198494061273,
+      "learning_rate": 0.003,
+      "loss": 4.3575,
+      "step": 1198
+    },
+    {
+      "epoch": 0.01199,
+      "grad_norm": 0.719551428234481,
+      "learning_rate": 0.003,
+      "loss": 4.3378,
+      "step": 1199
+    },
+    {
+      "epoch": 0.012,
+      "grad_norm": 0.7355036739818444,
+      "learning_rate": 0.003,
+      "loss": 4.3295,
+      "step": 1200
+    },
+    {
+      "epoch": 0.01201,
+      "grad_norm": 0.9984238489578499,
+      "learning_rate": 0.003,
+      "loss": 4.3657,
+      "step": 1201
+    },
+    {
+      "epoch": 0.01202,
+      "grad_norm": 1.2422658156142912,
+      "learning_rate": 0.003,
+      "loss": 4.3719,
+      "step": 1202
+    },
+    {
+      "epoch": 0.01203,
+      "grad_norm": 0.7461879644958674,
+      "learning_rate": 0.003,
+      "loss": 4.3511,
+      "step": 1203
+    },
+    {
+      "epoch": 0.01204,
+      "grad_norm": 0.8846740490522026,
+      "learning_rate": 0.003,
+      "loss": 4.3624,
+      "step": 1204
+    },
+    {
+      "epoch": 0.01205,
+      "grad_norm": 0.7423867106895455,
+      "learning_rate": 0.003,
+      "loss": 4.3403,
+      "step": 1205
+    },
+    {
+      "epoch": 0.01206,
+      "grad_norm": 0.7502877844645354,
+      "learning_rate": 0.003,
+      "loss": 4.3858,
+      "step": 1206
+    },
+    {
+      "epoch": 0.01207,
+      "grad_norm": 0.8010490453321524,
+      "learning_rate": 0.003,
+      "loss": 4.3053,
+      "step": 1207
+    },
+    {
+      "epoch": 0.01208,
+      "grad_norm": 0.7863992142209323,
+      "learning_rate": 0.003,
+      "loss": 4.3665,
+      "step": 1208
+    },
+    {
+      "epoch": 0.01209,
+      "grad_norm": 0.6936314129825434,
+      "learning_rate": 0.003,
+      "loss": 4.3419,
+      "step": 1209
+    },
+    {
+      "epoch": 0.0121,
+      "grad_norm": 0.5834058626530795,
+      "learning_rate": 0.003,
+      "loss": 4.3425,
+      "step": 1210
+    },
+    {
+      "epoch": 0.01211,
+      "grad_norm": 0.5442454390371272,
+      "learning_rate": 0.003,
+      "loss": 4.3248,
+      "step": 1211
+    },
+    {
+      "epoch": 0.01212,
+      "grad_norm": 0.5640531404426855,
+      "learning_rate": 0.003,
+      "loss": 4.3485,
+      "step": 1212
+    },
+    {
+      "epoch": 0.01213,
+      "grad_norm": 0.4686052961934471,
+      "learning_rate": 0.003,
+      "loss": 4.3204,
+      "step": 1213
+    },
+    {
+      "epoch": 0.01214,
+      "grad_norm": 0.4623933928179833,
+      "learning_rate": 0.003,
+      "loss": 4.3023,
+      "step": 1214
+    },
+    {
+      "epoch": 0.01215,
+      "grad_norm": 0.390314877629531,
+      "learning_rate": 0.003,
+      "loss": 4.3075,
+      "step": 1215
+    },
+    {
+      "epoch": 0.01216,
+      "grad_norm": 0.3573034139707242,
+      "learning_rate": 0.003,
+      "loss": 4.2995,
+      "step": 1216
+    },
+    {
+      "epoch": 0.01217,
+      "grad_norm": 0.38593293619021,
+      "learning_rate": 0.003,
+      "loss": 4.3063,
+      "step": 1217
+    },
+    {
+      "epoch": 0.01218,
+      "grad_norm": 0.40705674810377235,
+      "learning_rate": 0.003,
+      "loss": 4.3006,
+      "step": 1218
+    },
+    {
+      "epoch": 0.01219,
+      "grad_norm": 0.45251062517773044,
+      "learning_rate": 0.003,
+      "loss": 4.3,
+      "step": 1219
+    },
+    {
+      "epoch": 0.0122,
+      "grad_norm": 0.5627372695280709,
+      "learning_rate": 0.003,
+      "loss": 4.2943,
+      "step": 1220
+    },
+    {
+      "epoch": 0.01221,
+      "grad_norm": 0.6302297504421508,
+      "learning_rate": 0.003,
+      "loss": 4.3122,
+      "step": 1221
+    },
+    {
+      "epoch": 0.01222,
+      "grad_norm": 0.695146633094906,
+      "learning_rate": 0.003,
+      "loss": 4.2975,
+      "step": 1222
+    },
+    {
+      "epoch": 0.01223,
+      "grad_norm": 0.6745684022343855,
+      "learning_rate": 0.003,
+      "loss": 4.2965,
+      "step": 1223
+    },
+    {
+      "epoch": 0.01224,
+      "grad_norm": 0.6275537412165774,
+      "learning_rate": 0.003,
+      "loss": 4.2794,
+      "step": 1224
+    },
+    {
+      "epoch": 0.01225,
+      "grad_norm": 0.732873035624088,
+      "learning_rate": 0.003,
+      "loss": 4.3084,
+      "step": 1225
+    },
+    {
+      "epoch": 0.01226,
+      "grad_norm": 0.8536265776058696,
+      "learning_rate": 0.003,
+      "loss": 4.3061,
+      "step": 1226
+    },
+    {
+      "epoch": 0.01227,
+      "grad_norm": 0.7710687106511338,
+      "learning_rate": 0.003,
+      "loss": 4.3371,
+      "step": 1227
+    },
+    {
+      "epoch": 0.01228,
+      "grad_norm": 0.6158196915186589,
+      "learning_rate": 0.003,
+      "loss": 4.3122,
+      "step": 1228
+    },
+    {
+      "epoch": 0.01229,
+      "grad_norm": 0.5647355795097493,
+      "learning_rate": 0.003,
+      "loss": 4.2952,
+      "step": 1229
+    },
+    {
+      "epoch": 0.0123,
+      "grad_norm": 0.5989687141441427,
+      "learning_rate": 0.003,
+      "loss": 4.3304,
+      "step": 1230
+    },
+    {
+      "epoch": 0.01231,
+      "grad_norm": 0.6070784312189734,
+      "learning_rate": 0.003,
+      "loss": 4.2993,
+      "step": 1231
+    },
+    {
+      "epoch": 0.01232,
+      "grad_norm": 0.6098189672837228,
+      "learning_rate": 0.003,
+      "loss": 4.276,
+      "step": 1232
+    },
+    {
+      "epoch": 0.01233,
+      "grad_norm": 0.6187452049200426,
+      "learning_rate": 0.003,
+      "loss": 4.2843,
+      "step": 1233
+    },
+    {
+      "epoch": 0.01234,
+      "grad_norm": 0.6847326322735632,
+      "learning_rate": 0.003,
+      "loss": 4.3016,
+      "step": 1234
+    },
+    {
+      "epoch": 0.01235,
+      "grad_norm": 0.7347950056629514,
+      "learning_rate": 0.003,
+      "loss": 4.2975,
+      "step": 1235
+    },
+    {
+      "epoch": 0.01236,
+      "grad_norm": 0.8404243381213906,
+      "learning_rate": 0.003,
+      "loss": 4.3037,
+      "step": 1236
+    },
+    {
+      "epoch": 0.01237,
+      "grad_norm": 1.1666025530795177,
+      "learning_rate": 0.003,
+      "loss": 4.3338,
+      "step": 1237
+    },
+    {
+      "epoch": 0.01238,
+      "grad_norm": 0.7706517401085584,
+      "learning_rate": 0.003,
+      "loss": 4.3184,
+      "step": 1238
+    },
+    {
+      "epoch": 0.01239,
+      "grad_norm": 0.8892529662707559,
+      "learning_rate": 0.003,
+      "loss": 4.3055,
+      "step": 1239
+    },
+    {
+      "epoch": 0.0124,
+      "grad_norm": 0.9665932126023419,
+      "learning_rate": 0.003,
+      "loss": 4.3004,
+      "step": 1240
+    },
+    {
+      "epoch": 0.01241,
+      "grad_norm": 1.0935280340663984,
+      "learning_rate": 0.003,
+      "loss": 4.3233,
+      "step": 1241
+    },
+    {
+      "epoch": 0.01242,
+      "grad_norm": 1.117726648141726,
+      "learning_rate": 0.003,
+      "loss": 4.3312,
+      "step": 1242
+    },
+    {
+      "epoch": 0.01243,
+      "grad_norm": 1.0553809973617114,
+      "learning_rate": 0.003,
+      "loss": 4.3181,
+      "step": 1243
+    },
+    {
+      "epoch": 0.01244,
+      "grad_norm": 0.9192132297518948,
+      "learning_rate": 0.003,
+      "loss": 4.3267,
+      "step": 1244
+    },
+    {
+      "epoch": 0.01245,
+      "grad_norm": 0.9382654947912115,
+      "learning_rate": 0.003,
+      "loss": 4.3187,
+      "step": 1245
+    },
+    {
+      "epoch": 0.01246,
+      "grad_norm": 1.0892504099540619,
+      "learning_rate": 0.003,
+      "loss": 4.3062,
+      "step": 1246
+    },
+    {
+      "epoch": 0.01247,
+      "grad_norm": 0.8993491326759976,
+      "learning_rate": 0.003,
+      "loss": 4.3511,
+      "step": 1247
+    },
+    {
+      "epoch": 0.01248,
+      "grad_norm": 0.908268119830267,
+      "learning_rate": 0.003,
+      "loss": 4.3379,
+      "step": 1248
+    },
+    {
+      "epoch": 0.01249,
+      "grad_norm": 0.8800131758337482,
+      "learning_rate": 0.003,
+      "loss": 4.3278,
+      "step": 1249
+    },
+    {
+      "epoch": 0.0125,
+      "grad_norm": 0.8337196500160362,
+      "learning_rate": 0.003,
+      "loss": 4.3052,
+      "step": 1250
+    },
+    {
+      "epoch": 0.01251,
+      "grad_norm": 0.9249131658343839,
+      "learning_rate": 0.003,
+      "loss": 4.3424,
+      "step": 1251
+    },
+    {
+      "epoch": 0.01252,
+      "grad_norm": 0.8524650338611843,
+      "learning_rate": 0.003,
+      "loss": 4.3172,
+      "step": 1252
+    },
+    {
+      "epoch": 0.01253,
+      "grad_norm": 0.7163285364378025,
+      "learning_rate": 0.003,
+      "loss": 4.2931,
+      "step": 1253
+    },
+    {
+      "epoch": 0.01254,
+      "grad_norm": 0.6991567907525832,
+      "learning_rate": 0.003,
+      "loss": 4.3341,
+      "step": 1254
+    },
+    {
+      "epoch": 0.01255,
+      "grad_norm": 0.6949992430981787,
+      "learning_rate": 0.003,
+      "loss": 4.2856,
+      "step": 1255
+    },
+    {
+      "epoch": 0.01256,
+      "grad_norm": 0.7100922258125638,
+      "learning_rate": 0.003,
+      "loss": 4.3118,
+      "step": 1256
+    },
+    {
+      "epoch": 0.01257,
+      "grad_norm": 0.6612217096643498,
+      "learning_rate": 0.003,
+      "loss": 4.2941,
+      "step": 1257
+    },
+    {
+      "epoch": 0.01258,
+      "grad_norm": 0.7747579760551054,
+      "learning_rate": 0.003,
+      "loss": 4.3107,
+      "step": 1258
+    },
+    {
+      "epoch": 0.01259,
+      "grad_norm": 1.124107981669201,
+      "learning_rate": 0.003,
+      "loss": 4.3222,
+      "step": 1259
+    },
+    {
+      "epoch": 0.0126,
+      "grad_norm": 0.9528743820284102,
+      "learning_rate": 0.003,
+      "loss": 4.3627,
+      "step": 1260
+    },
+    {
+      "epoch": 0.01261,
+      "grad_norm": 0.842570464638209,
+      "learning_rate": 0.003,
+      "loss": 4.3486,
+      "step": 1261
+    },
+    {
+      "epoch": 0.01262,
+      "grad_norm": 0.8393239407149024,
+      "learning_rate": 0.003,
+      "loss": 4.3397,
+      "step": 1262
+    },
+    {
+      "epoch": 0.01263,
+      "grad_norm": 0.8299261180456607,
+      "learning_rate": 0.003,
+      "loss": 4.3362,
+      "step": 1263
+    },
+    {
+      "epoch": 0.01264,
+      "grad_norm": 0.8141230470068299,
+      "learning_rate": 0.003,
+      "loss": 4.3187,
+      "step": 1264
+    },
+    {
+      "epoch": 0.01265,
+      "grad_norm": 1.0141550421015684,
+      "learning_rate": 0.003,
+      "loss": 4.3395,
+      "step": 1265
+    },
+    {
+      "epoch": 0.01266,
+      "grad_norm": 0.7612600647582426,
+      "learning_rate": 0.003,
+      "loss": 4.3297,
+      "step": 1266
+    },
+    {
+      "epoch": 0.01267,
+      "grad_norm": 0.6187340841665547,
+      "learning_rate": 0.003,
+      "loss": 4.3447,
+      "step": 1267
+    },
+    {
+      "epoch": 0.01268,
+      "grad_norm": 0.5822458314315527,
+      "learning_rate": 0.003,
+      "loss": 4.3275,
+      "step": 1268
+    },
+    {
+      "epoch": 0.01269,
+      "grad_norm": 0.4492593229301203,
+      "learning_rate": 0.003,
+      "loss": 4.2855,
+      "step": 1269
+    },
+    {
+      "epoch": 0.0127,
+      "grad_norm": 0.5016783253235925,
+      "learning_rate": 0.003,
+      "loss": 4.3213,
+      "step": 1270
+    },
+    {
+      "epoch": 0.01271,
+      "grad_norm": 0.44977006545763915,
+      "learning_rate": 0.003,
+      "loss": 4.297,
+      "step": 1271
+    },
+    {
+      "epoch": 0.01272,
+      "grad_norm": 0.43028280089396154,
+      "learning_rate": 0.003,
+      "loss": 4.291,
+      "step": 1272
+    },
+    {
+      "epoch": 0.01273,
+      "grad_norm": 0.4520162301705406,
+      "learning_rate": 0.003,
+      "loss": 4.3158,
+      "step": 1273
+    },
+    {
+      "epoch": 0.01274,
+      "grad_norm": 0.451409910695501,
+      "learning_rate": 0.003,
+      "loss": 4.3063,
+      "step": 1274
+    },
+    {
+      "epoch": 0.01275,
+      "grad_norm": 0.4633201534438662,
+      "learning_rate": 0.003,
+      "loss": 4.2756,
+      "step": 1275
+    },
+    {
+      "epoch": 0.01276,
+      "grad_norm": 0.4818283038515168,
+      "learning_rate": 0.003,
+      "loss": 4.2804,
+      "step": 1276
+    },
+    {
+      "epoch": 0.01277,
+      "grad_norm": 0.5612989347677225,
+      "learning_rate": 0.003,
+      "loss": 4.2895,
+      "step": 1277
+    },
+    {
+      "epoch": 0.01278,
+      "grad_norm": 0.6134565833773333,
+      "learning_rate": 0.003,
+      "loss": 4.2966,
+      "step": 1278
+    },
+    {
+      "epoch": 0.01279,
+      "grad_norm": 0.548830310802857,
+      "learning_rate": 0.003,
+      "loss": 4.2778,
+      "step": 1279
+    },
+    {
+      "epoch": 0.0128,
+      "grad_norm": 0.5192954436192417,
+      "learning_rate": 0.003,
+      "loss": 4.2736,
+      "step": 1280
+    },
+    {
+      "epoch": 0.01281,
+      "grad_norm": 0.7079203023923951,
+      "learning_rate": 0.003,
+      "loss": 4.2645,
+      "step": 1281
+    },
+    {
+      "epoch": 0.01282,
+      "grad_norm": 0.8640412732426497,
+      "learning_rate": 0.003,
+      "loss": 4.3216,
+      "step": 1282
+    },
+    {
+      "epoch": 0.01283,
+      "grad_norm": 0.7685939231737142,
+      "learning_rate": 0.003,
+      "loss": 4.2859,
+      "step": 1283
+    },
+    {
+      "epoch": 0.01284,
+      "grad_norm": 0.6000861053702143,
+      "learning_rate": 0.003,
+      "loss": 4.2792,
+      "step": 1284
+    },
+    {
+      "epoch": 0.01285,
+      "grad_norm": 0.7254306454469097,
+      "learning_rate": 0.003,
+      "loss": 4.2881,
+      "step": 1285
+    },
+    {
+      "epoch": 0.01286,
+      "grad_norm": 0.6733987067909273,
+      "learning_rate": 0.003,
+      "loss": 4.3069,
+      "step": 1286
+    },
+    {
+      "epoch": 0.01287,
+      "grad_norm": 0.6347430576087306,
+      "learning_rate": 0.003,
+      "loss": 4.2706,
+      "step": 1287
+    },
+    {
+      "epoch": 0.01288,
+      "grad_norm": 0.6742039696219508,
+      "learning_rate": 0.003,
+      "loss": 4.2877,
+      "step": 1288
+    },
+    {
+      "epoch": 0.01289,
+      "grad_norm": 0.7964754563506287,
+      "learning_rate": 0.003,
+      "loss": 4.2851,
+      "step": 1289
+    },
+    {
+      "epoch": 0.0129,
+      "grad_norm": 0.7466700913788555,
+      "learning_rate": 0.003,
+      "loss": 4.3108,
+      "step": 1290
+    },
+    {
+      "epoch": 0.01291,
+      "grad_norm": 0.7257835946382555,
+      "learning_rate": 0.003,
+      "loss": 4.2878,
+      "step": 1291
+    },
+    {
+      "epoch": 0.01292,
+      "grad_norm": 0.7356576793031216,
+      "learning_rate": 0.003,
+      "loss": 4.3222,
+      "step": 1292
+    },
+    {
+      "epoch": 0.01293,
+      "grad_norm": 0.7652258994712906,
+      "learning_rate": 0.003,
+      "loss": 4.312,
+      "step": 1293
+    },
+    {
+      "epoch": 0.01294,
+      "grad_norm": 0.8074663392977137,
+      "learning_rate": 0.003,
+      "loss": 4.2851,
+      "step": 1294
+    },
+    {
+      "epoch": 0.01295,
+      "grad_norm": 0.8419246282427761,
+      "learning_rate": 0.003,
+      "loss": 4.3171,
+      "step": 1295
+    },
+    {
+      "epoch": 0.01296,
+      "grad_norm": 0.9067886743130651,
+      "learning_rate": 0.003,
+      "loss": 4.2936,
+      "step": 1296
+    },
+    {
+      "epoch": 0.01297,
+      "grad_norm": 0.9867944428505203,
+      "learning_rate": 0.003,
+      "loss": 4.3168,
+      "step": 1297
+    },
+    {
+      "epoch": 0.01298,
+      "grad_norm": 0.9008581643124973,
+      "learning_rate": 0.003,
+      "loss": 4.2927,
+      "step": 1298
+    },
+    {
+      "epoch": 0.01299,
+      "grad_norm": 0.9659427448976995,
+      "learning_rate": 0.003,
+      "loss": 4.3085,
+      "step": 1299
+    },
+    {
+      "epoch": 0.013,
+      "grad_norm": 1.004138764919463,
+      "learning_rate": 0.003,
+      "loss": 4.3011,
+      "step": 1300
+    },
+    {
+      "epoch": 0.01301,
+      "grad_norm": 0.9518499756671687,
+      "learning_rate": 0.003,
+      "loss": 4.3191,
+      "step": 1301
+    },
+    {
+      "epoch": 0.01302,
+      "grad_norm": 0.8711824338075493,
+      "learning_rate": 0.003,
+      "loss": 4.361,
+      "step": 1302
+    },
+    {
+      "epoch": 0.01303,
+      "grad_norm": 0.8676764239234932,
+      "learning_rate": 0.003,
+      "loss": 4.3063,
+      "step": 1303
+    },
+    {
+      "epoch": 0.01304,
+      "grad_norm": 0.7125360490503003,
+      "learning_rate": 0.003,
+      "loss": 4.3284,
+      "step": 1304
+    },
+    {
+      "epoch": 0.01305,
+      "grad_norm": 0.6046564741110017,
+      "learning_rate": 0.003,
+      "loss": 4.3089,
+      "step": 1305
+    },
+    {
+      "epoch": 0.01306,
+      "grad_norm": 0.5647927538938984,
+      "learning_rate": 0.003,
+      "loss": 4.2992,
+      "step": 1306
+    },
+    {
+      "epoch": 0.01307,
+      "grad_norm": 0.5913576309809552,
+      "learning_rate": 0.003,
+      "loss": 4.3149,
+      "step": 1307
+    },
+    {
+      "epoch": 0.01308,
+      "grad_norm": 0.5924251650199668,
+      "learning_rate": 0.003,
+      "loss": 4.2932,
+      "step": 1308
+    },
+    {
+      "epoch": 0.01309,
+      "grad_norm": 0.507504674715556,
+      "learning_rate": 0.003,
+      "loss": 4.326,
+      "step": 1309
+    },
+    {
+      "epoch": 0.0131,
+      "grad_norm": 0.6387323300673942,
+      "learning_rate": 0.003,
+      "loss": 4.2756,
+      "step": 1310
+    },
+    {
+      "epoch": 0.01311,
+      "grad_norm": 0.7281996988052075,
+      "learning_rate": 0.003,
+      "loss": 4.3202,
+      "step": 1311
+    },
+    {
+      "epoch": 0.01312,
+      "grad_norm": 0.8834351506579509,
+      "learning_rate": 0.003,
+      "loss": 4.2908,
+      "step": 1312
+    },
+    {
+      "epoch": 0.01313,
+      "grad_norm": 0.840185011015699,
+      "learning_rate": 0.003,
+      "loss": 4.2898,
+      "step": 1313
+    },
+    {
+      "epoch": 0.01314,
+      "grad_norm": 0.6830490768340751,
+      "learning_rate": 0.003,
+      "loss": 4.2745,
+      "step": 1314
+    },
+    {
+      "epoch": 0.01315,
+      "grad_norm": 0.6358981723627178,
+      "learning_rate": 0.003,
+      "loss": 4.2992,
+      "step": 1315
+    },
+    {
+      "epoch": 0.01316,
+      "grad_norm": 0.6007791887068558,
+      "learning_rate": 0.003,
+      "loss": 4.2829,
+      "step": 1316
+    },
+    {
+      "epoch": 0.01317,
+      "grad_norm": 0.5048125779943076,
+      "learning_rate": 0.003,
+      "loss": 4.3051,
+      "step": 1317
+    },
+    {
+      "epoch": 0.01318,
+      "grad_norm": 0.49450675942854344,
+      "learning_rate": 0.003,
+      "loss": 4.2837,
+      "step": 1318
+    },
+    {
+      "epoch": 0.01319,
+      "grad_norm": 0.5258905508186564,
+      "learning_rate": 0.003,
+      "loss": 4.292,
+      "step": 1319
+    },
+    {
+      "epoch": 0.0132,
+      "grad_norm": 0.5585710036479011,
+      "learning_rate": 0.003,
+      "loss": 4.2888,
+      "step": 1320
+    },
+    {
+      "epoch": 0.01321,
+      "grad_norm": 0.5365220915283813,
+      "learning_rate": 0.003,
+      "loss": 4.2608,
+      "step": 1321
+    },
+    {
+      "epoch": 0.01322,
+      "grad_norm": 0.5557887511106342,
+      "learning_rate": 0.003,
+      "loss": 4.292,
+      "step": 1322
+    },
+    {
+      "epoch": 0.01323,
+      "grad_norm": 0.5531377716759448,
+      "learning_rate": 0.003,
+      "loss": 4.2837,
+      "step": 1323
+    },
+    {
+      "epoch": 0.01324,
+      "grad_norm": 0.48156733948461256,
+      "learning_rate": 0.003,
+      "loss": 4.2672,
+      "step": 1324
+    },
+    {
+      "epoch": 0.01325,
+      "grad_norm": 0.4219074369900267,
+      "learning_rate": 0.003,
+      "loss": 4.2693,
+      "step": 1325
+    },
+    {
+      "epoch": 0.01326,
+      "grad_norm": 0.43890778305013683,
+      "learning_rate": 0.003,
+      "loss": 4.2559,
+      "step": 1326
+    },
+    {
+      "epoch": 0.01327,
+      "grad_norm": 0.4164892745664214,
+      "learning_rate": 0.003,
+      "loss": 4.2724,
+      "step": 1327
+    },
+    {
+      "epoch": 0.01328,
+      "grad_norm": 0.49229273243603294,
+      "learning_rate": 0.003,
+      "loss": 4.2646,
+      "step": 1328
+    },
+    {
+      "epoch": 0.01329,
+      "grad_norm": 0.6121730970661665,
+      "learning_rate": 0.003,
+      "loss": 4.2329,
+      "step": 1329
+    },
+    {
+      "epoch": 0.0133,
+      "grad_norm": 0.8848354653817555,
+      "learning_rate": 0.003,
+      "loss": 4.282,
+      "step": 1330
+    },
+    {
+      "epoch": 0.01331,
+      "grad_norm": 1.0334105334221095,
+      "learning_rate": 0.003,
+      "loss": 4.2945,
+      "step": 1331
+    },
+    {
+      "epoch": 0.01332,
+      "grad_norm": 0.8234894915328277,
+      "learning_rate": 0.003,
+      "loss": 4.2994,
+      "step": 1332
+    },
+    {
+      "epoch": 0.01333,
+      "grad_norm": 0.7856986790322864,
+      "learning_rate": 0.003,
+      "loss": 4.2716,
+      "step": 1333
+    },
+    {
+      "epoch": 0.01334,
+      "grad_norm": 0.7455452553988269,
+      "learning_rate": 0.003,
+      "loss": 4.2662,
+      "step": 1334
+    },
+    {
+      "epoch": 0.01335,
+      "grad_norm": 0.623751194689146,
+      "learning_rate": 0.003,
+      "loss": 4.2787,
+      "step": 1335
+    },
+    {
+      "epoch": 0.01336,
+      "grad_norm": 0.8238963440270202,
+      "learning_rate": 0.003,
+      "loss": 4.2839,
+      "step": 1336
+    },
+    {
+      "epoch": 0.01337,
+      "grad_norm": 0.8159528784128957,
+      "learning_rate": 0.003,
+      "loss": 4.3035,
+      "step": 1337
+    },
+    {
+      "epoch": 0.01338,
+      "grad_norm": 0.691075290250966,
+      "learning_rate": 0.003,
+      "loss": 4.2694,
+      "step": 1338
+    },
+    {
+      "epoch": 0.01339,
+      "grad_norm": 0.6872888519447673,
+      "learning_rate": 0.003,
+      "loss": 4.2893,
+      "step": 1339
+    },
+    {
+      "epoch": 0.0134,
+      "grad_norm": 0.6938872016813308,
+      "learning_rate": 0.003,
+      "loss": 4.2701,
+      "step": 1340
+    },
+    {
+      "epoch": 0.01341,
+      "grad_norm": 0.7051901323106005,
+      "learning_rate": 0.003,
+      "loss": 4.2883,
+      "step": 1341
+    },
+    {
+      "epoch": 0.01342,
+      "grad_norm": 0.7365511131614688,
+      "learning_rate": 0.003,
+      "loss": 4.257,
+      "step": 1342
+    },
+    {
+      "epoch": 0.01343,
+      "grad_norm": 0.7564141855011662,
+      "learning_rate": 0.003,
+      "loss": 4.2701,
+      "step": 1343
+    },
+    {
+      "epoch": 0.01344,
+      "grad_norm": 0.695387503279461,
+      "learning_rate": 0.003,
+      "loss": 4.2649,
+      "step": 1344
+    },
+    {
+      "epoch": 0.01345,
+      "grad_norm": 0.6914588150610467,
+      "learning_rate": 0.003,
+      "loss": 4.3033,
+      "step": 1345
+    },
+    {
+      "epoch": 0.01346,
+      "grad_norm": 0.6768362060970508,
+      "learning_rate": 0.003,
+      "loss": 4.277,
+      "step": 1346
+    },
+    {
+      "epoch": 0.01347,
+      "grad_norm": 0.5492073753536512,
+      "learning_rate": 0.003,
+      "loss": 4.2633,
+      "step": 1347
+    },
+    {
+      "epoch": 0.01348,
+      "grad_norm": 0.5626283903224933,
+      "learning_rate": 0.003,
+      "loss": 4.2635,
+      "step": 1348
+    },
+    {
+      "epoch": 0.01349,
+      "grad_norm": 0.6438865489267382,
+      "learning_rate": 0.003,
+      "loss": 4.2856,
+      "step": 1349
+    },
+    {
+      "epoch": 0.0135,
+      "grad_norm": 0.7199005579630849,
+      "learning_rate": 0.003,
+      "loss": 4.2796,
+      "step": 1350
+    },
+    {
+      "epoch": 0.01351,
+      "grad_norm": 0.5428932527008233,
+      "learning_rate": 0.003,
+      "loss": 4.3038,
+      "step": 1351
+    },
+    {
+      "epoch": 0.01352,
+      "grad_norm": 0.5284901745728212,
+      "learning_rate": 0.003,
+      "loss": 4.2779,
+      "step": 1352
+    },
+    {
+      "epoch": 0.01353,
+      "grad_norm": 0.5600878945602594,
+      "learning_rate": 0.003,
+      "loss": 4.2635,
+      "step": 1353
+    },
+    {
+      "epoch": 0.01354,
+      "grad_norm": 0.6391444612725596,
+      "learning_rate": 0.003,
+      "loss": 4.2525,
+      "step": 1354
+    },
+    {
+      "epoch": 0.01355,
+      "grad_norm": 0.5641606375474643,
+      "learning_rate": 0.003,
+      "loss": 4.2634,
+      "step": 1355
+    },
+    {
+      "epoch": 0.01356,
+      "grad_norm": 0.5080744885475224,
+      "learning_rate": 0.003,
+      "loss": 4.2545,
+      "step": 1356
+    },
+    {
+      "epoch": 0.01357,
+      "grad_norm": 0.5093639445431166,
+      "learning_rate": 0.003,
+      "loss": 4.2441,
+      "step": 1357
+    },
+    {
+      "epoch": 0.01358,
+      "grad_norm": 0.5049437394862782,
+      "learning_rate": 0.003,
+      "loss": 4.2383,
+      "step": 1358
+    },
+    {
+      "epoch": 0.01359,
+      "grad_norm": 0.534762392832106,
+      "learning_rate": 0.003,
+      "loss": 4.2789,
+      "step": 1359
+    },
+    {
+      "epoch": 0.0136,
+      "grad_norm": 0.5654562399234901,
+      "learning_rate": 0.003,
+      "loss": 4.2447,
+      "step": 1360
+    },
+    {
+      "epoch": 0.01361,
+      "grad_norm": 0.7911685516807686,
+      "learning_rate": 0.003,
+      "loss": 4.2666,
+      "step": 1361
+    },
+    {
+      "epoch": 0.01362,
+      "grad_norm": 1.133172158468294,
+      "learning_rate": 0.003,
+      "loss": 4.3073,
+      "step": 1362
+    },
+    {
+      "epoch": 0.01363,
+      "grad_norm": 0.9631930238400119,
+      "learning_rate": 0.003,
+      "loss": 4.2987,
+      "step": 1363
+    },
+    {
+      "epoch": 0.01364,
+      "grad_norm": 0.7443319461425727,
+      "learning_rate": 0.003,
+      "loss": 4.2839,
+      "step": 1364
+    },
+    {
+      "epoch": 0.01365,
+      "grad_norm": 0.7368707977522249,
+      "learning_rate": 0.003,
+      "loss": 4.3041,
+      "step": 1365
+    },
+    {
+      "epoch": 0.01366,
+      "grad_norm": 0.6506933778775009,
+      "learning_rate": 0.003,
+      "loss": 4.255,
+      "step": 1366
+    },
+    {
+      "epoch": 0.01367,
+      "grad_norm": 0.7425630695378723,
+      "learning_rate": 0.003,
+      "loss": 4.3072,
+      "step": 1367
+    },
+    {
+      "epoch": 0.01368,
+      "grad_norm": 0.7495522896004446,
+      "learning_rate": 0.003,
+      "loss": 4.2833,
+      "step": 1368
+    },
+    {
+      "epoch": 0.01369,
+      "grad_norm": 0.7542187487500857,
+      "learning_rate": 0.003,
+      "loss": 4.2721,
+      "step": 1369
+    },
+    {
+      "epoch": 0.0137,
+      "grad_norm": 0.9330809300322568,
+      "learning_rate": 0.003,
+      "loss": 4.3029,
+      "step": 1370
+    },
+    {
+      "epoch": 0.01371,
+      "grad_norm": 1.2015218016331504,
+      "learning_rate": 0.003,
+      "loss": 4.3156,
+      "step": 1371
+    },
+    {
+      "epoch": 0.01372,
+      "grad_norm": 1.0453049109497266,
+      "learning_rate": 0.003,
+      "loss": 4.2577,
+      "step": 1372
+    },
+    {
+      "epoch": 0.01373,
+      "grad_norm": 1.086666562724284,
+      "learning_rate": 0.003,
+      "loss": 4.308,
+      "step": 1373
+    },
+    {
+      "epoch": 0.01374,
+      "grad_norm": 1.0320592526365153,
+      "learning_rate": 0.003,
+      "loss": 4.3227,
+      "step": 1374
+    },
+    {
+      "epoch": 0.01375,
+      "grad_norm": 1.1967086046120219,
+      "learning_rate": 0.003,
+      "loss": 4.2884,
+      "step": 1375
+    },
+    {
+      "epoch": 0.01376,
+      "grad_norm": 0.8548157633990001,
+      "learning_rate": 0.003,
+      "loss": 4.307,
+      "step": 1376
+    },
+    {
+      "epoch": 0.01377,
+      "grad_norm": 0.9102072373918407,
+      "learning_rate": 0.003,
+      "loss": 4.2985,
+      "step": 1377
+    },
+    {
+      "epoch": 0.01378,
+      "grad_norm": 0.9843975361934545,
+      "learning_rate": 0.003,
+      "loss": 4.3358,
+      "step": 1378
+    },
+    {
+      "epoch": 0.01379,
+      "grad_norm": 1.0283723711953892,
+      "learning_rate": 0.003,
+      "loss": 4.3102,
+      "step": 1379
+    },
+    {
+      "epoch": 0.0138,
+      "grad_norm": 0.9006954176701093,
+      "learning_rate": 0.003,
+      "loss": 4.3045,
+      "step": 1380
+    },
+    {
+      "epoch": 0.01381,
+      "grad_norm": 1.123717795950056,
+      "learning_rate": 0.003,
+      "loss": 4.3217,
+      "step": 1381
+    },
+    {
+      "epoch": 0.01382,
+      "grad_norm": 1.0986902300743402,
+      "learning_rate": 0.003,
+      "loss": 4.3218,
+      "step": 1382
+    },
+    {
+      "epoch": 0.01383,
+      "grad_norm": 0.9410643097534779,
+      "learning_rate": 0.003,
+      "loss": 4.3113,
+      "step": 1383
+    },
+    {
+      "epoch": 0.01384,
+      "grad_norm": 0.8436673398281763,
+      "learning_rate": 0.003,
+      "loss": 4.3327,
+      "step": 1384
+    },
+    {
+      "epoch": 0.01385,
+      "grad_norm": 0.8422095588989059,
+      "learning_rate": 0.003,
+      "loss": 4.3233,
+      "step": 1385
+    },
+    {
+      "epoch": 0.01386,
+      "grad_norm": 0.9633211233231406,
+      "learning_rate": 0.003,
+      "loss": 4.3006,
+      "step": 1386
+    },
+    {
+      "epoch": 0.01387,
+      "grad_norm": 1.2716300561274656,
+      "learning_rate": 0.003,
+      "loss": 4.3052,
+      "step": 1387
+    },
+    {
+      "epoch": 0.01388,
+      "grad_norm": 0.786746450686237,
+      "learning_rate": 0.003,
+      "loss": 4.33,
+      "step": 1388
+    },
+    {
+      "epoch": 0.01389,
+      "grad_norm": 0.763957561776965,
+      "learning_rate": 0.003,
+      "loss": 4.3069,
+      "step": 1389
+    },
+    {
+      "epoch": 0.0139,
+      "grad_norm": 0.7874363277644975,
+      "learning_rate": 0.003,
+      "loss": 4.3041,
+      "step": 1390
+    },
+    {
+      "epoch": 0.01391,
+      "grad_norm": 0.7493071050038139,
+      "learning_rate": 0.003,
+      "loss": 4.3036,
+      "step": 1391
+    },
+    {
+      "epoch": 0.01392,
+      "grad_norm": 0.5930612778435612,
+      "learning_rate": 0.003,
+      "loss": 4.3076,
+      "step": 1392
+    },
+    {
+      "epoch": 0.01393,
+      "grad_norm": 0.5012167316836135,
+      "learning_rate": 0.003,
+      "loss": 4.2738,
+      "step": 1393
+    },
+    {
+      "epoch": 0.01394,
+      "grad_norm": 0.4948661937091383,
+      "learning_rate": 0.003,
+      "loss": 4.2568,
+      "step": 1394
+    },
+    {
+      "epoch": 0.01395,
+      "grad_norm": 0.4622888611133768,
+      "learning_rate": 0.003,
+      "loss": 4.2868,
+      "step": 1395
+    },
+    {
+      "epoch": 0.01396,
+      "grad_norm": 0.4205886993889684,
+      "learning_rate": 0.003,
+      "loss": 4.275,
+      "step": 1396
+    },
+    {
+      "epoch": 0.01397,
+      "grad_norm": 0.47767051412590744,
+      "learning_rate": 0.003,
+      "loss": 4.2862,
+      "step": 1397
+    },
+    {
+      "epoch": 0.01398,
+      "grad_norm": 0.417341308437963,
+      "learning_rate": 0.003,
+      "loss": 4.2647,
+      "step": 1398
+    },
+    {
+      "epoch": 0.01399,
+      "grad_norm": 0.38332601666132154,
+      "learning_rate": 0.003,
+      "loss": 4.2835,
+      "step": 1399
+    },
+    {
+      "epoch": 0.014,
+      "grad_norm": 0.49255586643940574,
+      "learning_rate": 0.003,
+      "loss": 4.2444,
+      "step": 1400
+    },
+    {
+      "epoch": 0.01401,
+      "grad_norm": 0.8450629843994065,
+      "learning_rate": 0.003,
+      "loss": 4.2739,
+      "step": 1401
+    },
+    {
+      "epoch": 0.01402,
+      "grad_norm": 1.0913061976723808,
+      "learning_rate": 0.003,
+      "loss": 4.3172,
+      "step": 1402
+    },
+    {
+      "epoch": 0.01403,
+      "grad_norm": 0.6222948640126749,
+      "learning_rate": 0.003,
+      "loss": 4.2634,
+      "step": 1403
+    },
+    {
+      "epoch": 0.01404,
+      "grad_norm": 0.6692390535634434,
+      "learning_rate": 0.003,
+      "loss": 4.291,
+      "step": 1404
+    },
+    {
+      "epoch": 0.01405,
+      "grad_norm": 0.531565118139576,
+      "learning_rate": 0.003,
+      "loss": 4.2997,
+      "step": 1405
+    },
+    {
+      "epoch": 0.01406,
+      "grad_norm": 0.4824477961295427,
+      "learning_rate": 0.003,
+      "loss": 4.2779,
+      "step": 1406
+    },
+    {
+      "epoch": 0.01407,
+      "grad_norm": 0.5759838031356613,
+      "learning_rate": 0.003,
+      "loss": 4.2942,
+      "step": 1407
+    },
+    {
+      "epoch": 0.01408,
+      "grad_norm": 0.4476558981995588,
+      "learning_rate": 0.003,
+      "loss": 4.245,
+      "step": 1408
+    },
+    {
+      "epoch": 0.01409,
+      "grad_norm": 0.43291671999910936,
+      "learning_rate": 0.003,
+      "loss": 4.2753,
+      "step": 1409
+    },
+    {
+      "epoch": 0.0141,
+      "grad_norm": 0.48354199581441126,
+      "learning_rate": 0.003,
+      "loss": 4.2565,
+      "step": 1410
+    },
+    {
+      "epoch": 0.01411,
+      "grad_norm": 0.4777579756136276,
+      "learning_rate": 0.003,
+      "loss": 4.2302,
+      "step": 1411
+    },
+    {
+      "epoch": 0.01412,
+      "grad_norm": 0.5224654831649898,
+      "learning_rate": 0.003,
+      "loss": 4.2693,
+      "step": 1412
+    },
+    {
+      "epoch": 0.01413,
+      "grad_norm": 0.5669597734894755,
+      "learning_rate": 0.003,
+      "loss": 4.2639,
+      "step": 1413
+    },
+    {
+      "epoch": 0.01414,
+      "grad_norm": 0.5645597477136786,
+      "learning_rate": 0.003,
+      "loss": 4.2453,
+      "step": 1414
+    },
+    {
+      "epoch": 0.01415,
+      "grad_norm": 0.5887759339552011,
+      "learning_rate": 0.003,
+      "loss": 4.238,
+      "step": 1415
+    },
+    {
+      "epoch": 0.01416,
+      "grad_norm": 0.6104838140849604,
+      "learning_rate": 0.003,
+      "loss": 4.2641,
+      "step": 1416
+    },
+    {
+      "epoch": 0.01417,
+      "grad_norm": 0.6908167320390044,
+      "learning_rate": 0.003,
+      "loss": 4.2559,
+      "step": 1417
+    },
+    {
+      "epoch": 0.01418,
+      "grad_norm": 0.8241127076876378,
+      "learning_rate": 0.003,
+      "loss": 4.28,
+      "step": 1418
+    },
+    {
+      "epoch": 0.01419,
+      "grad_norm": 0.8554302831681504,
+      "learning_rate": 0.003,
+      "loss": 4.2733,
+      "step": 1419
+    },
+    {
+      "epoch": 0.0142,
+      "grad_norm": 0.8153359619668534,
+      "learning_rate": 0.003,
+      "loss": 4.28,
+      "step": 1420
+    },
+    {
+      "epoch": 0.01421,
+      "grad_norm": 0.808004884443839,
+      "learning_rate": 0.003,
+      "loss": 4.2527,
+      "step": 1421
+    },
+    {
+      "epoch": 0.01422,
+      "grad_norm": 0.6302573298023686,
+      "learning_rate": 0.003,
+      "loss": 4.2516,
+      "step": 1422
+    },
+    {
+      "epoch": 0.01423,
+      "grad_norm": 0.6834582033971294,
+      "learning_rate": 0.003,
+      "loss": 4.2585,
+      "step": 1423
+    },
+    {
+      "epoch": 0.01424,
+      "grad_norm": 0.7665806417295189,
+      "learning_rate": 0.003,
+      "loss": 4.2761,
+      "step": 1424
+    },
+    {
+      "epoch": 0.01425,
+      "grad_norm": 0.8479056405573007,
+      "learning_rate": 0.003,
+      "loss": 4.2857,
+      "step": 1425
+    },
+    {
+      "epoch": 0.01426,
+      "grad_norm": 0.8307259541179206,
+      "learning_rate": 0.003,
+      "loss": 4.2636,
+      "step": 1426
+    },
+    {
+      "epoch": 0.01427,
+      "grad_norm": 0.7328418116871758,
+      "learning_rate": 0.003,
+      "loss": 4.257,
+      "step": 1427
+    },
+    {
+      "epoch": 0.01428,
+      "grad_norm": 0.834478644245489,
+      "learning_rate": 0.003,
+      "loss": 4.2596,
+      "step": 1428
+    },
+    {
+      "epoch": 0.01429,
+      "grad_norm": 0.8679311866430979,
+      "learning_rate": 0.003,
+      "loss": 4.2742,
+      "step": 1429
+    },
+    {
+      "epoch": 0.0143,
+      "grad_norm": 0.9567780417694972,
+      "learning_rate": 0.003,
+      "loss": 4.2902,
+      "step": 1430
+    },
+    {
+      "epoch": 0.01431,
+      "grad_norm": 0.99274156172833,
+      "learning_rate": 0.003,
+      "loss": 4.2901,
+      "step": 1431
+    },
+    {
+      "epoch": 0.01432,
+      "grad_norm": 0.9575505507003149,
+      "learning_rate": 0.003,
+      "loss": 4.2782,
+      "step": 1432
+    },
+    {
+      "epoch": 0.01433,
+      "grad_norm": 1.0120649680762432,
+      "learning_rate": 0.003,
+      "loss": 4.2802,
+      "step": 1433
+    },
+    {
+      "epoch": 0.01434,
+      "grad_norm": 0.9698542777423387,
+      "learning_rate": 0.003,
+      "loss": 4.2694,
+      "step": 1434
+    },
+    {
+      "epoch": 0.01435,
+      "grad_norm": 0.8592446572371273,
+      "learning_rate": 0.003,
+      "loss": 4.2745,
+      "step": 1435
+    },
+    {
+      "epoch": 0.01436,
+      "grad_norm": 0.7299491282894184,
+      "learning_rate": 0.003,
+      "loss": 4.2863,
+      "step": 1436
+    },
+    {
+      "epoch": 0.01437,
+      "grad_norm": 0.6523703034324407,
+      "learning_rate": 0.003,
+      "loss": 4.296,
+      "step": 1437
+    },
+    {
+      "epoch": 0.01438,
+      "grad_norm": 0.6201165515106615,
+      "learning_rate": 0.003,
+      "loss": 4.2864,
+      "step": 1438
+    },
+    {
+      "epoch": 0.01439,
+      "grad_norm": 0.6536973612494624,
+      "learning_rate": 0.003,
+      "loss": 4.25,
+      "step": 1439
+    },
+    {
+      "epoch": 0.0144,
+      "grad_norm": 0.729776581664813,
+      "learning_rate": 0.003,
+      "loss": 4.2648,
+      "step": 1440
+    },
+    {
+      "epoch": 0.01441,
+      "grad_norm": 0.7206829643898741,
+      "learning_rate": 0.003,
+      "loss": 4.2702,
+      "step": 1441
+    },
+    {
+      "epoch": 0.01442,
+      "grad_norm": 0.7419558696417659,
+      "learning_rate": 0.003,
+      "loss": 4.2561,
+      "step": 1442
+    },
+    {
+      "epoch": 0.01443,
+      "grad_norm": 0.8093095130841327,
+      "learning_rate": 0.003,
+      "loss": 4.3153,
+      "step": 1443
+    },
+    {
+      "epoch": 0.01444,
+      "grad_norm": 0.7933277975079275,
+      "learning_rate": 0.003,
+      "loss": 4.2825,
+      "step": 1444
+    },
+    {
+      "epoch": 0.01445,
+      "grad_norm": 0.7264821716906565,
+      "learning_rate": 0.003,
+      "loss": 4.2399,
+      "step": 1445
+    },
+    {
+      "epoch": 0.01446,
+      "grad_norm": 0.6240641235999153,
+      "learning_rate": 0.003,
+      "loss": 4.2506,
+      "step": 1446
+    },
+    {
+      "epoch": 0.01447,
+      "grad_norm": 0.5288921284639453,
+      "learning_rate": 0.003,
+      "loss": 4.256,
+      "step": 1447
+    },
+    {
+      "epoch": 0.01448,
+      "grad_norm": 0.534171003657093,
+      "learning_rate": 0.003,
+      "loss": 4.2664,
+      "step": 1448
+    },
+    {
+      "epoch": 0.01449,
+      "grad_norm": 0.504252276028137,
+      "learning_rate": 0.003,
+      "loss": 4.2565,
+      "step": 1449
+    },
+    {
+      "epoch": 0.0145,
+      "grad_norm": 0.44556324681710546,
+      "learning_rate": 0.003,
+      "loss": 4.27,
+      "step": 1450
+    },
+    {
+      "epoch": 0.01451,
+      "grad_norm": 0.4269835752600227,
+      "learning_rate": 0.003,
+      "loss": 4.2459,
+      "step": 1451
+    },
+    {
+      "epoch": 0.01452,
+      "grad_norm": 0.43297277393213274,
+      "learning_rate": 0.003,
+      "loss": 4.2457,
+      "step": 1452
+    },
+    {
+      "epoch": 0.01453,
+      "grad_norm": 0.48066055951097997,
+      "learning_rate": 0.003,
+      "loss": 4.2578,
+      "step": 1453
+    },
+    {
+      "epoch": 0.01454,
+      "grad_norm": 0.7307010489462921,
+      "learning_rate": 0.003,
+      "loss": 4.2308,
+      "step": 1454
+    },
+    {
+      "epoch": 0.01455,
+      "grad_norm": 1.061032754030789,
+      "learning_rate": 0.003,
+      "loss": 4.2921,
+      "step": 1455
+    },
+    {
+      "epoch": 0.01456,
+      "grad_norm": 0.8307341818819957,
+      "learning_rate": 0.003,
+      "loss": 4.2586,
+      "step": 1456
+    },
+    {
+      "epoch": 0.01457,
+      "grad_norm": 0.6035503218291812,
+      "learning_rate": 0.003,
+      "loss": 4.2382,
+      "step": 1457
+    },
+    {
+      "epoch": 0.01458,
+      "grad_norm": 0.6933692267109066,
+      "learning_rate": 0.003,
+      "loss": 4.242,
+      "step": 1458
+    },
+    {
+      "epoch": 0.01459,
+      "grad_norm": 0.6220443333798514,
+      "learning_rate": 0.003,
+      "loss": 4.2607,
+      "step": 1459
+    },
+    {
+      "epoch": 0.0146,
+      "grad_norm": 0.8143223428105976,
+      "learning_rate": 0.003,
+      "loss": 4.2715,
+      "step": 1460
+    },
+    {
+      "epoch": 0.01461,
+      "grad_norm": 0.7987144915177533,
+      "learning_rate": 0.003,
+      "loss": 4.2513,
+      "step": 1461
+    },
+    {
+      "epoch": 0.01462,
+      "grad_norm": 0.6408744129720689,
+      "learning_rate": 0.003,
+      "loss": 4.2512,
+      "step": 1462
+    },
+    {
+      "epoch": 0.01463,
+      "grad_norm": 0.7757864438293093,
+      "learning_rate": 0.003,
+      "loss": 4.2548,
+      "step": 1463
+    },
+    {
+      "epoch": 0.01464,
+      "grad_norm": 0.7699033474111219,
+      "learning_rate": 0.003,
+      "loss": 4.2388,
+      "step": 1464
+    },
+    {
+      "epoch": 0.01465,
+      "grad_norm": 0.5797229129062035,
+      "learning_rate": 0.003,
+      "loss": 4.2736,
+      "step": 1465
+    },
+    {
+      "epoch": 0.01466,
+      "grad_norm": 0.611021916593894,
+      "learning_rate": 0.003,
+      "loss": 4.238,
+      "step": 1466
+    },
+    {
+      "epoch": 0.01467,
+      "grad_norm": 0.7716393804810123,
+      "learning_rate": 0.003,
+      "loss": 4.2809,
+      "step": 1467
+    },
+    {
+      "epoch": 0.01468,
+      "grad_norm": 0.684598948135191,
+      "learning_rate": 0.003,
+      "loss": 4.2576,
+      "step": 1468
+    },
+    {
+      "epoch": 0.01469,
+      "grad_norm": 0.5983303592308452,
+      "learning_rate": 0.003,
+      "loss": 4.2537,
+      "step": 1469
+    },
+    {
+      "epoch": 0.0147,
+      "grad_norm": 0.5951720244159906,
+      "learning_rate": 0.003,
+      "loss": 4.246,
+      "step": 1470
+    },
+    {
+      "epoch": 0.01471,
+      "grad_norm": 0.595230494931454,
+      "learning_rate": 0.003,
+      "loss": 4.2579,
+      "step": 1471
+    },
+    {
+      "epoch": 0.01472,
+      "grad_norm": 0.5179540783713997,
+      "learning_rate": 0.003,
+      "loss": 4.2385,
+      "step": 1472
+    },
+    {
+      "epoch": 0.01473,
+      "grad_norm": 0.513255678190744,
+      "learning_rate": 0.003,
+      "loss": 4.2524,
+      "step": 1473
+    },
+    {
+      "epoch": 0.01474,
+      "grad_norm": 0.5312567488001211,
+      "learning_rate": 0.003,
+      "loss": 4.2368,
+      "step": 1474
+    },
+    {
+      "epoch": 0.01475,
+      "grad_norm": 0.5231277112786824,
+      "learning_rate": 0.003,
+      "loss": 4.2547,
+      "step": 1475
+    },
+    {
+      "epoch": 0.01476,
+      "grad_norm": 0.524915592276574,
+      "learning_rate": 0.003,
+      "loss": 4.2412,
+      "step": 1476
+    },
+    {
+      "epoch": 0.01477,
+      "grad_norm": 0.4868094804842935,
+      "learning_rate": 0.003,
+      "loss": 4.2453,
+      "step": 1477
+    },
+    {
+      "epoch": 0.01478,
+      "grad_norm": 0.46072622581307954,
+      "learning_rate": 0.003,
+      "loss": 4.2277,
+      "step": 1478
+    },
+    {
+      "epoch": 0.01479,
+      "grad_norm": 0.5672195126213446,
+      "learning_rate": 0.003,
+      "loss": 4.2625,
+      "step": 1479
+    },
+    {
+      "epoch": 0.0148,
+      "grad_norm": 0.7550630041249586,
+      "learning_rate": 0.003,
+      "loss": 4.2383,
+      "step": 1480
+    },
+    {
+      "epoch": 0.01481,
+      "grad_norm": 0.9561266542090932,
+      "learning_rate": 0.003,
+      "loss": 4.2523,
+      "step": 1481
+    },
+    {
+      "epoch": 0.01482,
+      "grad_norm": 1.4635954400671285,
+      "learning_rate": 0.003,
+      "loss": 4.2602,
+      "step": 1482
+    },
+    {
+      "epoch": 0.01483,
+      "grad_norm": 0.7350989176290839,
+      "learning_rate": 0.003,
+      "loss": 4.2401,
+      "step": 1483
+    },
+    {
+      "epoch": 0.01484,
+      "grad_norm": 0.687260607047077,
+      "learning_rate": 0.003,
+      "loss": 4.2906,
+      "step": 1484
+    },
+    {
+      "epoch": 0.01485,
+      "grad_norm": 0.6759306581993629,
+      "learning_rate": 0.003,
+      "loss": 4.2492,
+      "step": 1485
+    },
+    {
+      "epoch": 0.01486,
+      "grad_norm": 0.5738231891215967,
+      "learning_rate": 0.003,
+      "loss": 4.2438,
+      "step": 1486
+    },
+    {
+      "epoch": 0.01487,
+      "grad_norm": 0.67662740183017,
+      "learning_rate": 0.003,
+      "loss": 4.2486,
+      "step": 1487
+    },
+    {
+      "epoch": 0.01488,
+      "grad_norm": 0.6465587043004464,
+      "learning_rate": 0.003,
+      "loss": 4.2584,
+      "step": 1488
+    },
+    {
+      "epoch": 0.01489,
+      "grad_norm": 0.6278262825245318,
+      "learning_rate": 0.003,
+      "loss": 4.251,
+      "step": 1489
+    },
+    {
+      "epoch": 0.0149,
+      "grad_norm": 0.7125204242602325,
+      "learning_rate": 0.003,
+      "loss": 4.2472,
+      "step": 1490
+    },
+    {
+      "epoch": 0.01491,
+      "grad_norm": 0.739446677383364,
+      "learning_rate": 0.003,
+      "loss": 4.263,
+      "step": 1491
+    },
+    {
+      "epoch": 0.01492,
+      "grad_norm": 0.7351492446611294,
+      "learning_rate": 0.003,
+      "loss": 4.2298,
+      "step": 1492
+    },
+    {
+      "epoch": 0.01493,
+      "grad_norm": 0.7206112830630914,
+      "learning_rate": 0.003,
+      "loss": 4.2585,
+      "step": 1493
+    },
+    {
+      "epoch": 0.01494,
+      "grad_norm": 0.7389347083990496,
+      "learning_rate": 0.003,
+      "loss": 4.239,
+      "step": 1494
+    },
+    {
+      "epoch": 0.01495,
+      "grad_norm": 0.6924446817172788,
+      "learning_rate": 0.003,
+      "loss": 4.2476,
+      "step": 1495
+    },
+    {
+      "epoch": 0.01496,
+      "grad_norm": 0.6954181917083473,
+      "learning_rate": 0.003,
+      "loss": 4.2266,
+      "step": 1496
+    },
+    {
+      "epoch": 0.01497,
+      "grad_norm": 0.6994096236505176,
+      "learning_rate": 0.003,
+      "loss": 4.1995,
+      "step": 1497
+    },
+    {
+      "epoch": 0.01498,
+      "grad_norm": 0.737027067879948,
+      "learning_rate": 0.003,
+      "loss": 4.2577,
+      "step": 1498
+    },
+    {
+      "epoch": 0.01499,
+      "grad_norm": 0.8018965005148498,
+      "learning_rate": 0.003,
+      "loss": 4.2828,
+      "step": 1499
+    },
+    {
+      "epoch": 0.015,
+      "grad_norm": 0.673322646768884,
+      "learning_rate": 0.003,
+      "loss": 4.2302,
+      "step": 1500
+    },
+    {
+      "epoch": 0.01501,
+      "grad_norm": 0.6899086847228731,
+      "learning_rate": 0.003,
+      "loss": 4.23,
+      "step": 1501
+    },
+    {
+      "epoch": 0.01502,
+      "grad_norm": 0.6591283488970741,
+      "learning_rate": 0.003,
+      "loss": 4.2708,
+      "step": 1502
+    },
+    {
+      "epoch": 0.01503,
+      "grad_norm": 0.6852371530334846,
+      "learning_rate": 0.003,
+      "loss": 4.2501,
+      "step": 1503
+    },
+    {
+      "epoch": 0.01504,
+      "grad_norm": 0.6823538206406574,
+      "learning_rate": 0.003,
+      "loss": 4.2505,
+      "step": 1504
+    },
+    {
+      "epoch": 0.01505,
+      "grad_norm": 0.7899988696399359,
+      "learning_rate": 0.003,
+      "loss": 4.2395,
+      "step": 1505
+    },
+    {
+      "epoch": 0.01506,
+      "grad_norm": 0.8794700649213404,
+      "learning_rate": 0.003,
+      "loss": 4.2637,
+      "step": 1506
+    },
+    {
+      "epoch": 0.01507,
+      "grad_norm": 0.8573682870616134,
+      "learning_rate": 0.003,
+      "loss": 4.2788,
+      "step": 1507
+    },
+    {
+      "epoch": 0.01508,
+      "grad_norm": 0.9379452075670608,
+      "learning_rate": 0.003,
+      "loss": 4.257,
+      "step": 1508
+    },
+    {
+      "epoch": 0.01509,
+      "grad_norm": 1.155120366213554,
+      "learning_rate": 0.003,
+      "loss": 4.2868,
+      "step": 1509
+    },
+    {
+      "epoch": 0.0151,
+      "grad_norm": 0.8564988533087747,
+      "learning_rate": 0.003,
+      "loss": 4.2523,
+      "step": 1510
+    },
+    {
+      "epoch": 0.01511,
+      "grad_norm": 0.7943851233402885,
+      "learning_rate": 0.003,
+      "loss": 4.242,
+      "step": 1511
+    },
+    {
+      "epoch": 0.01512,
+      "grad_norm": 0.7290925664752087,
+      "learning_rate": 0.003,
+      "loss": 4.2483,
+      "step": 1512
+    },
+    {
+      "epoch": 0.01513,
+      "grad_norm": 0.8302189630029514,
+      "learning_rate": 0.003,
+      "loss": 4.2538,
+      "step": 1513
+    },
+    {
+      "epoch": 0.01514,
+      "grad_norm": 0.9180439778149861,
+      "learning_rate": 0.003,
+      "loss": 4.2658,
+      "step": 1514
+    },
+    {
+      "epoch": 0.01515,
+      "grad_norm": 0.8252164561072897,
+      "learning_rate": 0.003,
+      "loss": 4.2572,
+      "step": 1515
+    },
+    {
+      "epoch": 0.01516,
+      "grad_norm": 0.7235677802672953,
+      "learning_rate": 0.003,
+      "loss": 4.2628,
+      "step": 1516
+    },
+    {
+      "epoch": 0.01517,
+      "grad_norm": 0.669771960126658,
+      "learning_rate": 0.003,
+      "loss": 4.2114,
+      "step": 1517
+    },
+    {
+      "epoch": 0.01518,
+      "grad_norm": 0.6555410571298043,
+      "learning_rate": 0.003,
+      "loss": 4.248,
+      "step": 1518
+    },
+    {
+      "epoch": 0.01519,
+      "grad_norm": 0.7596573642242733,
+      "learning_rate": 0.003,
+      "loss": 4.2822,
+      "step": 1519
+    },
+    {
+      "epoch": 0.0152,
+      "grad_norm": 0.8963421668718663,
+      "learning_rate": 0.003,
+      "loss": 4.2455,
+      "step": 1520
+    },
+    {
+      "epoch": 0.01521,
+      "grad_norm": 1.008660081317184,
+      "learning_rate": 0.003,
+      "loss": 4.2626,
+      "step": 1521
+    },
+    {
+      "epoch": 0.01522,
+      "grad_norm": 1.0941342292979577,
+      "learning_rate": 0.003,
+      "loss": 4.2769,
+      "step": 1522
+    },
+    {
+      "epoch": 0.01523,
+      "grad_norm": 0.8501402046319685,
+      "learning_rate": 0.003,
+      "loss": 4.2571,
+      "step": 1523
+    },
+    {
+      "epoch": 0.01524,
+      "grad_norm": 0.9113983493960105,
+      "learning_rate": 0.003,
+      "loss": 4.2967,
+      "step": 1524
+    },
+    {
+      "epoch": 0.01525,
+      "grad_norm": 0.7627206625135139,
+      "learning_rate": 0.003,
+      "loss": 4.2846,
+      "step": 1525
+    },
+    {
+      "epoch": 0.01526,
+      "grad_norm": 0.7118386576342678,
+      "learning_rate": 0.003,
+      "loss": 4.2733,
+      "step": 1526
+    },
+    {
+      "epoch": 0.01527,
+      "grad_norm": 0.6832379742298202,
+      "learning_rate": 0.003,
+      "loss": 4.2396,
+      "step": 1527
+    },
+    {
+      "epoch": 0.01528,
+      "grad_norm": 0.6558129881498832,
+      "learning_rate": 0.003,
+      "loss": 4.2558,
+      "step": 1528
+    },
+    {
+      "epoch": 0.01529,
+      "grad_norm": 0.653598932900876,
+      "learning_rate": 0.003,
+      "loss": 4.2506,
+      "step": 1529
+    },
+    {
+      "epoch": 0.0153,
+      "grad_norm": 0.6481863699832326,
+      "learning_rate": 0.003,
+      "loss": 4.2569,
+      "step": 1530
+    },
+    {
+      "epoch": 0.01531,
+      "grad_norm": 0.7137971422876086,
+      "learning_rate": 0.003,
+      "loss": 4.2516,
+      "step": 1531
+    },
+    {
+      "epoch": 0.01532,
+      "grad_norm": 0.7682370963055534,
+      "learning_rate": 0.003,
+      "loss": 4.232,
+      "step": 1532
+    },
+    {
+      "epoch": 0.01533,
+      "grad_norm": 0.6869067765486859,
+      "learning_rate": 0.003,
+      "loss": 4.2552,
+      "step": 1533
+    },
+    {
+      "epoch": 0.01534,
+      "grad_norm": 0.6763479705568691,
+      "learning_rate": 0.003,
+      "loss": 4.251,
+      "step": 1534
+    },
+    {
+      "epoch": 0.01535,
+      "grad_norm": 0.6278011204652878,
+      "learning_rate": 0.003,
+      "loss": 4.2624,
+      "step": 1535
+    },
+    {
+      "epoch": 0.01536,
+      "grad_norm": 0.5449726408675756,
+      "learning_rate": 0.003,
+      "loss": 4.2371,
+      "step": 1536
+    },
+    {
+      "epoch": 0.01537,
+      "grad_norm": 0.5702070777355899,
+      "learning_rate": 0.003,
+      "loss": 4.2348,
+      "step": 1537
+    },
+    {
+      "epoch": 0.01538,
+      "grad_norm": 0.5568148406111664,
+      "learning_rate": 0.003,
+      "loss": 4.2106,
+      "step": 1538
+    },
+    {
+      "epoch": 0.01539,
+      "grad_norm": 0.5990162223338897,
+      "learning_rate": 0.003,
+      "loss": 4.2411,
+      "step": 1539
+    },
+    {
+      "epoch": 0.0154,
+      "grad_norm": 0.6709932169117396,
+      "learning_rate": 0.003,
+      "loss": 4.2471,
+      "step": 1540
+    },
+    {
+      "epoch": 0.01541,
+      "grad_norm": 0.839479307713138,
+      "learning_rate": 0.003,
+      "loss": 4.2333,
+      "step": 1541
+    },
+    {
+      "epoch": 0.01542,
+      "grad_norm": 0.998609935659052,
+      "learning_rate": 0.003,
+      "loss": 4.2723,
+      "step": 1542
+    },
+    {
+      "epoch": 0.01543,
+      "grad_norm": 0.9175226186557726,
+      "learning_rate": 0.003,
+      "loss": 4.2597,
+      "step": 1543
+    },
+    {
+      "epoch": 0.01544,
+      "grad_norm": 0.9698336862541426,
+      "learning_rate": 0.003,
+      "loss": 4.2493,
+      "step": 1544
+    },
+    {
+      "epoch": 0.01545,
+      "grad_norm": 0.8646283897782253,
+      "learning_rate": 0.003,
+      "loss": 4.3133,
+      "step": 1545
+    },
+    {
+      "epoch": 0.01546,
+      "grad_norm": 0.7489694669979721,
+      "learning_rate": 0.003,
+      "loss": 4.2786,
+      "step": 1546
+    },
+    {
+      "epoch": 0.01547,
+      "grad_norm": 0.7346787237600204,
+      "learning_rate": 0.003,
+      "loss": 4.2793,
+      "step": 1547
+    },
+    {
+      "epoch": 0.01548,
+      "grad_norm": 0.7843295792698941,
+      "learning_rate": 0.003,
+      "loss": 4.2451,
+      "step": 1548
+    },
+    {
+      "epoch": 0.01549,
+      "grad_norm": 0.7547938162635378,
+      "learning_rate": 0.003,
+      "loss": 4.2323,
+      "step": 1549
+    },
+    {
+      "epoch": 0.0155,
+      "grad_norm": 0.8033101017591833,
+      "learning_rate": 0.003,
+      "loss": 4.2543,
+      "step": 1550
+    },
+    {
+      "epoch": 0.01551,
+      "grad_norm": 0.784324716245673,
+      "learning_rate": 0.003,
+      "loss": 4.2319,
+      "step": 1551
+    },
+    {
+      "epoch": 0.01552,
+      "grad_norm": 0.6675659235069369,
+      "learning_rate": 0.003,
+      "loss": 4.2485,
+      "step": 1552
+    },
+    {
+      "epoch": 0.01553,
+      "grad_norm": 0.7200584927165973,
+      "learning_rate": 0.003,
+      "loss": 4.2735,
+      "step": 1553
+    },
+    {
+      "epoch": 0.01554,
+      "grad_norm": 0.833889265994982,
+      "learning_rate": 0.003,
+      "loss": 4.259,
+      "step": 1554
+    },
+    {
+      "epoch": 0.01555,
+      "grad_norm": 0.7292713050790204,
+      "learning_rate": 0.003,
+      "loss": 4.2302,
+      "step": 1555
+    },
+    {
+      "epoch": 0.01556,
+      "grad_norm": 0.6334535318603711,
+      "learning_rate": 0.003,
+      "loss": 4.2647,
+      "step": 1556
+    },
+    {
+      "epoch": 0.01557,
+      "grad_norm": 0.6665206815174863,
+      "learning_rate": 0.003,
+      "loss": 4.2396,
+      "step": 1557
+    },
+    {
+      "epoch": 0.01558,
+      "grad_norm": 0.5467983416304218,
+      "learning_rate": 0.003,
+      "loss": 4.2264,
+      "step": 1558
+    },
+    {
+      "epoch": 0.01559,
+      "grad_norm": 0.5303338136727427,
+      "learning_rate": 0.003,
+      "loss": 4.2625,
+      "step": 1559
+    },
+    {
+      "epoch": 0.0156,
+      "grad_norm": 0.5964427896199266,
+      "learning_rate": 0.003,
+      "loss": 4.2526,
+      "step": 1560
+    },
+    {
+      "epoch": 0.01561,
+      "grad_norm": 0.7487394215839274,
+      "learning_rate": 0.003,
+      "loss": 4.2358,
+      "step": 1561
+    },
+    {
+      "epoch": 0.01562,
+      "grad_norm": 0.8937121416966828,
+      "learning_rate": 0.003,
+      "loss": 4.2394,
+      "step": 1562
+    },
+    {
+      "epoch": 0.01563,
+      "grad_norm": 1.2085214855734254,
+      "learning_rate": 0.003,
+      "loss": 4.2602,
+      "step": 1563
+    },
+    {
+      "epoch": 0.01564,
+      "grad_norm": 0.8696563758847599,
+      "learning_rate": 0.003,
+      "loss": 4.284,
+      "step": 1564
+    },
+    {
+      "epoch": 0.01565,
+      "grad_norm": 0.739740990652482,
+      "learning_rate": 0.003,
+      "loss": 4.2566,
+      "step": 1565
+    },
+    {
+      "epoch": 0.01566,
+      "grad_norm": 0.7600676271323985,
+      "learning_rate": 0.003,
+      "loss": 4.2738,
+      "step": 1566
+    },
+    {
+      "epoch": 0.01567,
+      "grad_norm": 0.7222900608043384,
+      "learning_rate": 0.003,
+      "loss": 4.2646,
+      "step": 1567
+    },
+    {
+      "epoch": 0.01568,
+      "grad_norm": 0.7569225436342659,
+      "learning_rate": 0.003,
+      "loss": 4.2466,
+      "step": 1568
+    },
+    {
+      "epoch": 0.01569,
+      "grad_norm": 0.7441561581750152,
+      "learning_rate": 0.003,
+      "loss": 4.252,
+      "step": 1569
+    },
+    {
+      "epoch": 0.0157,
+      "grad_norm": 0.7580218902280629,
+      "learning_rate": 0.003,
+      "loss": 4.2727,
+      "step": 1570
+    },
+    {
+      "epoch": 0.01571,
+      "grad_norm": 0.771634247408187,
+      "learning_rate": 0.003,
+      "loss": 4.2581,
+      "step": 1571
+    },
+    {
+      "epoch": 0.01572,
+      "grad_norm": 0.95958549182467,
+      "learning_rate": 0.003,
+      "loss": 4.2713,
+      "step": 1572
+    },
+    {
+      "epoch": 0.01573,
+      "grad_norm": 0.9029057415025205,
+      "learning_rate": 0.003,
+      "loss": 4.261,
+      "step": 1573
+    },
+    {
+      "epoch": 0.01574,
+      "grad_norm": 0.8044062509450788,
+      "learning_rate": 0.003,
+      "loss": 4.2692,
+      "step": 1574
+    },
+    {
+      "epoch": 0.01575,
+      "grad_norm": 0.7286639057606195,
+      "learning_rate": 0.003,
+      "loss": 4.2305,
+      "step": 1575
+    },
+    {
+      "epoch": 0.01576,
+      "grad_norm": 0.6821195872107877,
+      "learning_rate": 0.003,
+      "loss": 4.2492,
+      "step": 1576
+    },
+    {
+      "epoch": 0.01577,
+      "grad_norm": 0.6033878670810847,
+      "learning_rate": 0.003,
+      "loss": 4.2068,
+      "step": 1577
+    },
+    {
+      "epoch": 0.01578,
+      "grad_norm": 0.519693589168086,
+      "learning_rate": 0.003,
+      "loss": 4.2343,
+      "step": 1578
+    },
+    {
+      "epoch": 0.01579,
+      "grad_norm": 0.46078437812161954,
+      "learning_rate": 0.003,
+      "loss": 4.2271,
+      "step": 1579
+    },
+    {
+      "epoch": 0.0158,
+      "grad_norm": 0.3811995998581025,
+      "learning_rate": 0.003,
+      "loss": 4.2344,
+      "step": 1580
+    },
+    {
+      "epoch": 0.01581,
+      "grad_norm": 0.40258363386782764,
+      "learning_rate": 0.003,
+      "loss": 4.2406,
+      "step": 1581
+    },
+    {
+      "epoch": 0.01582,
+      "grad_norm": 0.4070479328001984,
+      "learning_rate": 0.003,
+      "loss": 4.2222,
+      "step": 1582
+    },
+    {
+      "epoch": 0.01583,
+      "grad_norm": 0.39481584768858025,
+      "learning_rate": 0.003,
+      "loss": 4.2093,
+      "step": 1583
+    },
+    {
+      "epoch": 0.01584,
+      "grad_norm": 0.4212949759278112,
+      "learning_rate": 0.003,
+      "loss": 4.24,
+      "step": 1584
+    },
+    {
+      "epoch": 0.01585,
+      "grad_norm": 0.6375222350508811,
+      "learning_rate": 0.003,
+      "loss": 4.2351,
+      "step": 1585
+    },
+    {
+      "epoch": 0.01586,
+      "grad_norm": 1.1087267261978884,
+      "learning_rate": 0.003,
+      "loss": 4.2389,
+      "step": 1586
+    },
+    {
+      "epoch": 0.01587,
+      "grad_norm": 1.095427209738268,
+      "learning_rate": 0.003,
+      "loss": 4.2576,
+      "step": 1587
+    },
+    {
+      "epoch": 0.01588,
+      "grad_norm": 0.5974585636629688,
+      "learning_rate": 0.003,
+      "loss": 4.2673,
+      "step": 1588
+    },
+    {
+      "epoch": 0.01589,
+      "grad_norm": 0.6434065905263348,
+      "learning_rate": 0.003,
+      "loss": 4.2657,
+      "step": 1589
+    },
+    {
+      "epoch": 0.0159,
+      "grad_norm": 0.5922765275307262,
+      "learning_rate": 0.003,
+      "loss": 4.2507,
+      "step": 1590
+    },
+    {
+      "epoch": 0.01591,
+      "grad_norm": 0.6559622239667369,
+      "learning_rate": 0.003,
+      "loss": 4.2576,
+      "step": 1591
+    },
+    {
+      "epoch": 0.01592,
+      "grad_norm": 0.5876956364208827,
+      "learning_rate": 0.003,
+      "loss": 4.2446,
+      "step": 1592
+    },
+    {
+      "epoch": 0.01593,
+      "grad_norm": 0.4847713630300587,
+      "learning_rate": 0.003,
+      "loss": 4.2428,
+      "step": 1593
+    },
+    {
+      "epoch": 0.01594,
+      "grad_norm": 0.5605434129471829,
+      "learning_rate": 0.003,
+      "loss": 4.2313,
+      "step": 1594
+    },
+    {
+      "epoch": 0.01595,
+      "grad_norm": 0.5718187801259297,
+      "learning_rate": 0.003,
+      "loss": 4.2581,
+      "step": 1595
+    },
+    {
+      "epoch": 0.01596,
+      "grad_norm": 0.620870673073496,
+      "learning_rate": 0.003,
+      "loss": 4.2509,
+      "step": 1596
+    },
+    {
+      "epoch": 0.01597,
+      "grad_norm": 0.6969552121884817,
+      "learning_rate": 0.003,
+      "loss": 4.2154,
+      "step": 1597
+    },
+    {
+      "epoch": 0.01598,
+      "grad_norm": 0.8094238052393485,
+      "learning_rate": 0.003,
+      "loss": 4.2172,
+      "step": 1598
+    },
+    {
+      "epoch": 0.01599,
+      "grad_norm": 0.7713054312041273,
+      "learning_rate": 0.003,
+      "loss": 4.2333,
+      "step": 1599
+    },
+    {
+      "epoch": 0.016,
+      "grad_norm": 0.7787996781195005,
+      "learning_rate": 0.003,
+      "loss": 4.2062,
+      "step": 1600
+    },
+    {
+      "epoch": 0.01601,
+      "grad_norm": 0.620719741277366,
+      "learning_rate": 0.003,
+      "loss": 4.2478,
+      "step": 1601
+    },
+    {
+      "epoch": 0.01602,
+      "grad_norm": 0.5965590372558249,
+      "learning_rate": 0.003,
+      "loss": 4.223,
+      "step": 1602
+    },
+    {
+      "epoch": 0.01603,
+      "grad_norm": 0.6057948188618382,
+      "learning_rate": 0.003,
+      "loss": 4.2386,
+      "step": 1603
+    },
+    {
+      "epoch": 0.01604,
+      "grad_norm": 0.5859806802733505,
+      "learning_rate": 0.003,
+      "loss": 4.2313,
+      "step": 1604
+    },
+    {
+      "epoch": 0.01605,
+      "grad_norm": 0.5985785202970987,
+      "learning_rate": 0.003,
+      "loss": 4.2093,
+      "step": 1605
+    },
+    {
+      "epoch": 0.01606,
+      "grad_norm": 0.6329923375985832,
+      "learning_rate": 0.003,
+      "loss": 4.2131,
+      "step": 1606
+    },
+    {
+      "epoch": 0.01607,
+      "grad_norm": 0.6777814987430973,
+      "learning_rate": 0.003,
+      "loss": 4.2418,
+      "step": 1607
+    },
+    {
+      "epoch": 0.01608,
+      "grad_norm": 0.6454825679602344,
+      "learning_rate": 0.003,
+      "loss": 4.2152,
+      "step": 1608
+    },
+    {
+      "epoch": 0.01609,
+      "grad_norm": 0.6234331303306465,
+      "learning_rate": 0.003,
+      "loss": 4.222,
+      "step": 1609
+    },
+    {
+      "epoch": 0.0161,
+      "grad_norm": 0.717661852780552,
+      "learning_rate": 0.003,
+      "loss": 4.1917,
+      "step": 1610
+    },
+    {
+      "epoch": 0.01611,
+      "grad_norm": 0.814514408732785,
+      "learning_rate": 0.003,
+      "loss": 4.2133,
+      "step": 1611
+    },
+    {
+      "epoch": 0.01612,
+      "grad_norm": 0.968378243767811,
+      "learning_rate": 0.003,
+      "loss": 4.2184,
+      "step": 1612
+    },
+    {
+      "epoch": 0.01613,
+      "grad_norm": 1.1889382213143287,
+      "learning_rate": 0.003,
+      "loss": 4.2747,
+      "step": 1613
+    },
+    {
+      "epoch": 0.01614,
+      "grad_norm": 0.902755449051569,
+      "learning_rate": 0.003,
+      "loss": 4.2894,
+      "step": 1614
+    },
+    {
+      "epoch": 0.01615,
+      "grad_norm": 0.8743919537144361,
+      "learning_rate": 0.003,
+      "loss": 4.2622,
+      "step": 1615
+    },
+    {
+      "epoch": 0.01616,
+      "grad_norm": 0.8601657492637806,
+      "learning_rate": 0.003,
+      "loss": 4.2396,
+      "step": 1616
+    },
+    {
+      "epoch": 0.01617,
+      "grad_norm": 0.8301577875616364,
+      "learning_rate": 0.003,
+      "loss": 4.2333,
+      "step": 1617
+    },
+    {
+      "epoch": 0.01618,
+      "grad_norm": 0.829332785699801,
+      "learning_rate": 0.003,
+      "loss": 4.2387,
+      "step": 1618
+    },
+    {
+      "epoch": 0.01619,
+      "grad_norm": 0.7047548182139096,
+      "learning_rate": 0.003,
+      "loss": 4.2209,
+      "step": 1619
+    },
+    {
+      "epoch": 0.0162,
+      "grad_norm": 0.7270289420377364,
+      "learning_rate": 0.003,
+      "loss": 4.2317,
+      "step": 1620
+    },
+    {
+      "epoch": 0.01621,
+      "grad_norm": 0.6929887184456018,
+      "learning_rate": 0.003,
+      "loss": 4.2257,
+      "step": 1621
+    },
+    {
+      "epoch": 0.01622,
+      "grad_norm": 0.7471865137769718,
+      "learning_rate": 0.003,
+      "loss": 4.2589,
+      "step": 1622
+    },
+    {
+      "epoch": 0.01623,
+      "grad_norm": 0.8282860007733814,
+      "learning_rate": 0.003,
+      "loss": 4.2399,
+      "step": 1623
+    },
+    {
+      "epoch": 0.01624,
+      "grad_norm": 0.8871911840665138,
+      "learning_rate": 0.003,
+      "loss": 4.2581,
+      "step": 1624
+    },
+    {
+      "epoch": 0.01625,
+      "grad_norm": 1.0259944995678087,
+      "learning_rate": 0.003,
+      "loss": 4.2469,
+      "step": 1625
+    },
+    {
+      "epoch": 0.01626,
+      "grad_norm": 0.937815191301713,
+      "learning_rate": 0.003,
+      "loss": 4.2602,
+      "step": 1626
+    },
+    {
+      "epoch": 0.01627,
+      "grad_norm": 1.064022068457424,
+      "learning_rate": 0.003,
+      "loss": 4.2368,
+      "step": 1627
+    },
+    {
+      "epoch": 0.01628,
+      "grad_norm": 1.0710330182336567,
+      "learning_rate": 0.003,
+      "loss": 4.2567,
+      "step": 1628
+    },
+    {
+      "epoch": 0.01629,
+      "grad_norm": 0.7877325862421115,
+      "learning_rate": 0.003,
+      "loss": 4.2414,
+      "step": 1629
+    },
+    {
+      "epoch": 0.0163,
+      "grad_norm": 0.6743454531988056,
+      "learning_rate": 0.003,
+      "loss": 4.2288,
+      "step": 1630
+    },
+    {
+      "epoch": 0.01631,
+      "grad_norm": 0.701073161255015,
+      "learning_rate": 0.003,
+      "loss": 4.2499,
+      "step": 1631
+    },
+    {
+      "epoch": 0.01632,
+      "grad_norm": 0.6004677978717051,
+      "learning_rate": 0.003,
+      "loss": 4.252,
+      "step": 1632
+    },
+    {
+      "epoch": 0.01633,
+      "grad_norm": 0.5772521307266697,
+      "learning_rate": 0.003,
+      "loss": 4.2255,
+      "step": 1633
+    },
+    {
+      "epoch": 0.01634,
+      "grad_norm": 0.5454153935480286,
+      "learning_rate": 0.003,
+      "loss": 4.2406,
+      "step": 1634
+    },
+    {
+      "epoch": 0.01635,
+      "grad_norm": 0.5154455969479952,
+      "learning_rate": 0.003,
+      "loss": 4.218,
+      "step": 1635
+    },
+    {
+      "epoch": 0.01636,
+      "grad_norm": 0.5267549110628605,
+      "learning_rate": 0.003,
+      "loss": 4.2348,
+      "step": 1636
+    },
+    {
+      "epoch": 0.01637,
+      "grad_norm": 0.5971697216641335,
+      "learning_rate": 0.003,
+      "loss": 4.2205,
+      "step": 1637
+    },
+    {
+      "epoch": 0.01638,
+      "grad_norm": 0.7211767188224439,
+      "learning_rate": 0.003,
+      "loss": 4.229,
+      "step": 1638
+    },
+    {
+      "epoch": 0.01639,
+      "grad_norm": 0.7816125211534669,
+      "learning_rate": 0.003,
+      "loss": 4.2548,
+      "step": 1639
+    },
+    {
+      "epoch": 0.0164,
+      "grad_norm": 0.7389027343982134,
+      "learning_rate": 0.003,
+      "loss": 4.2362,
+      "step": 1640
+    },
+    {
+      "epoch": 0.01641,
+      "grad_norm": 0.5975265072148448,
+      "learning_rate": 0.003,
+      "loss": 4.2026,
+      "step": 1641
+    },
+    {
+      "epoch": 0.01642,
+      "grad_norm": 0.7083476744478381,
+      "learning_rate": 0.003,
+      "loss": 4.2507,
+      "step": 1642
+    },
+    {
+      "epoch": 0.01643,
+      "grad_norm": 0.7791000270261257,
+      "learning_rate": 0.003,
+      "loss": 4.2378,
+      "step": 1643
+    },
+    {
+      "epoch": 0.01644,
+      "grad_norm": 0.9006394984546569,
+      "learning_rate": 0.003,
+      "loss": 4.2625,
+      "step": 1644
+    },
+    {
+      "epoch": 0.01645,
+      "grad_norm": 1.2334686487745912,
+      "learning_rate": 0.003,
+      "loss": 4.2236,
+      "step": 1645
+    },
+    {
+      "epoch": 0.01646,
+      "grad_norm": 0.9932721101220016,
+      "learning_rate": 0.003,
+      "loss": 4.263,
+      "step": 1646
+    },
+    {
+      "epoch": 0.01647,
+      "grad_norm": 0.989511986212608,
+      "learning_rate": 0.003,
+      "loss": 4.2576,
+      "step": 1647
+    },
+    {
+      "epoch": 0.01648,
+      "grad_norm": 0.8142750385583939,
+      "learning_rate": 0.003,
+      "loss": 4.23,
+      "step": 1648
+    },
+    {
+      "epoch": 0.01649,
+      "grad_norm": 0.6967651791258922,
+      "learning_rate": 0.003,
+      "loss": 4.2583,
+      "step": 1649
+    },
+    {
+      "epoch": 0.0165,
+      "grad_norm": 0.7348292229935104,
+      "learning_rate": 0.003,
+      "loss": 4.2361,
+      "step": 1650
+    },
+    {
+      "epoch": 0.01651,
+      "grad_norm": 0.6565673319405235,
+      "learning_rate": 0.003,
+      "loss": 4.2402,
+      "step": 1651
+    },
+    {
+      "epoch": 0.01652,
+      "grad_norm": 0.5674400404701213,
+      "learning_rate": 0.003,
+      "loss": 4.1936,
+      "step": 1652
+    },
+    {
+      "epoch": 0.01653,
+      "grad_norm": 0.5571261023539824,
+      "learning_rate": 0.003,
+      "loss": 4.2378,
+      "step": 1653
+    },
+    {
+      "epoch": 0.01654,
+      "grad_norm": 0.5083863828402352,
+      "learning_rate": 0.003,
+      "loss": 4.2229,
+      "step": 1654
+    },
+    {
+      "epoch": 0.01655,
+      "grad_norm": 0.49820131551804164,
+      "learning_rate": 0.003,
+      "loss": 4.23,
+      "step": 1655
+    },
+    {
+      "epoch": 0.01656,
+      "grad_norm": 0.5201001189396881,
+      "learning_rate": 0.003,
+      "loss": 4.2012,
+      "step": 1656
+    },
+    {
+      "epoch": 0.01657,
+      "grad_norm": 0.6652455003065759,
+      "learning_rate": 0.003,
+      "loss": 4.2237,
+      "step": 1657
+    },
+    {
+      "epoch": 0.01658,
+      "grad_norm": 0.8870121051704852,
+      "learning_rate": 0.003,
+      "loss": 4.219,
+      "step": 1658
+    },
+    {
+      "epoch": 0.01659,
+      "grad_norm": 1.1634698909054708,
+      "learning_rate": 0.003,
+      "loss": 4.2361,
+      "step": 1659
+    },
+    {
+      "epoch": 0.0166,
+      "grad_norm": 0.6579478278546654,
+      "learning_rate": 0.003,
+      "loss": 4.2096,
+      "step": 1660
+    },
+    {
+      "epoch": 0.01661,
+      "grad_norm": 0.5917680182889076,
+      "learning_rate": 0.003,
+      "loss": 4.2092,
+      "step": 1661
+    },
+    {
+      "epoch": 0.01662,
+      "grad_norm": 0.7821000738049482,
+      "learning_rate": 0.003,
+      "loss": 4.2171,
+      "step": 1662
+    },
+    {
+      "epoch": 0.01663,
+      "grad_norm": 0.812312164693218,
+      "learning_rate": 0.003,
+      "loss": 4.2422,
+      "step": 1663
+    },
+    {
+      "epoch": 0.01664,
+      "grad_norm": 0.7053733462479915,
+      "learning_rate": 0.003,
+      "loss": 4.2567,
+      "step": 1664
+    },
+    {
+      "epoch": 0.01665,
+      "grad_norm": 0.713897124959619,
+      "learning_rate": 0.003,
+      "loss": 4.2198,
+      "step": 1665
+    },
+    {
+      "epoch": 0.01666,
+      "grad_norm": 0.6600299709203347,
+      "learning_rate": 0.003,
+      "loss": 4.2205,
+      "step": 1666
+    },
+    {
+      "epoch": 0.01667,
+      "grad_norm": 0.5832085680202569,
+      "learning_rate": 0.003,
+      "loss": 4.2321,
+      "step": 1667
+    },
+    {
+      "epoch": 0.01668,
+      "grad_norm": 0.6589799435966834,
+      "learning_rate": 0.003,
+      "loss": 4.2397,
+      "step": 1668
+    },
+    {
+      "epoch": 0.01669,
+      "grad_norm": 0.7719970182316414,
+      "learning_rate": 0.003,
+      "loss": 4.2273,
+      "step": 1669
+    },
+    {
+      "epoch": 0.0167,
+      "grad_norm": 0.7846205843605817,
+      "learning_rate": 0.003,
+      "loss": 4.2184,
+      "step": 1670
+    },
+    {
+      "epoch": 0.01671,
+      "grad_norm": 0.7635694428992538,
+      "learning_rate": 0.003,
+      "loss": 4.2087,
+      "step": 1671
+    },
+    {
+      "epoch": 0.01672,
+      "grad_norm": 0.7758449881926132,
+      "learning_rate": 0.003,
+      "loss": 4.2199,
+      "step": 1672
+    },
+    {
+      "epoch": 0.01673,
+      "grad_norm": 0.6618290355489082,
+      "learning_rate": 0.003,
+      "loss": 4.2134,
+      "step": 1673
+    },
+    {
+      "epoch": 0.01674,
+      "grad_norm": 0.6727466502726199,
+      "learning_rate": 0.003,
+      "loss": 4.232,
+      "step": 1674
+    },
+    {
+      "epoch": 0.01675,
+      "grad_norm": 0.6690360284678838,
+      "learning_rate": 0.003,
+      "loss": 4.2158,
+      "step": 1675
+    },
+    {
+      "epoch": 0.01676,
+      "grad_norm": 0.6672382539021496,
+      "learning_rate": 0.003,
+      "loss": 4.2242,
+      "step": 1676
+    },
+    {
+      "epoch": 0.01677,
+      "grad_norm": 0.6311361325143124,
+      "learning_rate": 0.003,
+      "loss": 4.2132,
+      "step": 1677
+    },
+    {
+      "epoch": 0.01678,
+      "grad_norm": 0.727198962494881,
+      "learning_rate": 0.003,
+      "loss": 4.2299,
+      "step": 1678
+    },
+    {
+      "epoch": 0.01679,
+      "grad_norm": 0.8433802333808034,
+      "learning_rate": 0.003,
+      "loss": 4.2177,
+      "step": 1679
+    },
+    {
+      "epoch": 0.0168,
+      "grad_norm": 0.8402151525100402,
+      "learning_rate": 0.003,
+      "loss": 4.2352,
+      "step": 1680
+    },
+    {
+      "epoch": 0.01681,
+      "grad_norm": 0.9608253642789906,
+      "learning_rate": 0.003,
+      "loss": 4.2433,
+      "step": 1681
+    },
+    {
+      "epoch": 0.01682,
+      "grad_norm": 1.160024891342376,
+      "learning_rate": 0.003,
+      "loss": 4.246,
+      "step": 1682
+    },
+    {
+      "epoch": 0.01683,
+      "grad_norm": 0.8201452744681862,
+      "learning_rate": 0.003,
+      "loss": 4.2539,
+      "step": 1683
+    },
+    {
+      "epoch": 0.01684,
+      "grad_norm": 0.7434576192749681,
+      "learning_rate": 0.003,
+      "loss": 4.2242,
+      "step": 1684
+    },
+    {
+      "epoch": 0.01685,
+      "grad_norm": 0.8733167238677911,
+      "learning_rate": 0.003,
+      "loss": 4.2448,
+      "step": 1685
+    },
+    {
+      "epoch": 0.01686,
+      "grad_norm": 0.7251849705005492,
+      "learning_rate": 0.003,
+      "loss": 4.2342,
+      "step": 1686
+    },
+    {
+      "epoch": 0.01687,
+      "grad_norm": 0.623355393019925,
+      "learning_rate": 0.003,
+      "loss": 4.2258,
+      "step": 1687
+    },
+    {
+      "epoch": 0.01688,
+      "grad_norm": 0.6485004834811148,
+      "learning_rate": 0.003,
+      "loss": 4.2113,
+      "step": 1688
+    },
+    {
+      "epoch": 0.01689,
+      "grad_norm": 0.6622097332232696,
+      "learning_rate": 0.003,
+      "loss": 4.2246,
+      "step": 1689
+    },
+    {
+      "epoch": 0.0169,
+      "grad_norm": 0.5609920955895745,
+      "learning_rate": 0.003,
+      "loss": 4.2095,
+      "step": 1690
+    },
+    {
+      "epoch": 0.01691,
+      "grad_norm": 0.5554950872814335,
+      "learning_rate": 0.003,
+      "loss": 4.2208,
+      "step": 1691
+    },
+    {
+      "epoch": 0.01692,
+      "grad_norm": 0.578754263387868,
+      "learning_rate": 0.003,
+      "loss": 4.2223,
+      "step": 1692
+    },
+    {
+      "epoch": 0.01693,
+      "grad_norm": 0.54763155586202,
+      "learning_rate": 0.003,
+      "loss": 4.2453,
+      "step": 1693
+    },
+    {
+      "epoch": 0.01694,
+      "grad_norm": 0.5925337152955293,
+      "learning_rate": 0.003,
+      "loss": 4.1963,
+      "step": 1694
+    },
+    {
+      "epoch": 0.01695,
+      "grad_norm": 0.5883008115307695,
+      "learning_rate": 0.003,
+      "loss": 4.2285,
+      "step": 1695
+    },
+    {
+      "epoch": 0.01696,
+      "grad_norm": 0.5777962347479031,
+      "learning_rate": 0.003,
+      "loss": 4.2174,
+      "step": 1696
+    },
+    {
+      "epoch": 0.01697,
+      "grad_norm": 0.6137264663560853,
+      "learning_rate": 0.003,
+      "loss": 4.1953,
+      "step": 1697
+    },
+    {
+      "epoch": 0.01698,
+      "grad_norm": 0.5567779485618047,
+      "learning_rate": 0.003,
+      "loss": 4.1887,
+      "step": 1698
+    },
+    {
+      "epoch": 0.01699,
+      "grad_norm": 0.563692648440794,
+      "learning_rate": 0.003,
+      "loss": 4.1995,
+      "step": 1699
+    },
+    {
+      "epoch": 0.017,
+      "grad_norm": 0.5378862525995165,
+      "learning_rate": 0.003,
+      "loss": 4.2165,
+      "step": 1700
+    },
+    {
+      "epoch": 0.01701,
+      "grad_norm": 0.5131924880718308,
+      "learning_rate": 0.003,
+      "loss": 4.1973,
+      "step": 1701
+    },
+    {
+      "epoch": 0.01702,
+      "grad_norm": 0.5473378733884345,
+      "learning_rate": 0.003,
+      "loss": 4.2207,
+      "step": 1702
+    },
+    {
+      "epoch": 0.01703,
+      "grad_norm": 0.659714976195776,
+      "learning_rate": 0.003,
+      "loss": 4.2099,
+      "step": 1703
+    },
+    {
+      "epoch": 0.01704,
+      "grad_norm": 0.7407833108135735,
+      "learning_rate": 0.003,
+      "loss": 4.2001,
+      "step": 1704
+    },
+    {
+      "epoch": 0.01705,
+      "grad_norm": 0.7567928276503215,
+      "learning_rate": 0.003,
+      "loss": 4.2073,
+      "step": 1705
+    },
+    {
+      "epoch": 0.01706,
+      "grad_norm": 0.7694420146818104,
+      "learning_rate": 0.003,
+      "loss": 4.2055,
+      "step": 1706
+    },
+    {
+      "epoch": 0.01707,
+      "grad_norm": 1.0045884849963176,
+      "learning_rate": 0.003,
+      "loss": 4.2374,
+      "step": 1707
+    },
+    {
+      "epoch": 0.01708,
+      "grad_norm": 1.2575213437630939,
+      "learning_rate": 0.003,
+      "loss": 4.2465,
+      "step": 1708
+    },
+    {
+      "epoch": 0.01709,
+      "grad_norm": 0.848642979076845,
+      "learning_rate": 0.003,
+      "loss": 4.2418,
+      "step": 1709
+    },
+    {
+      "epoch": 0.0171,
+      "grad_norm": 0.7781603648787689,
+      "learning_rate": 0.003,
+      "loss": 4.1936,
+      "step": 1710
+    },
+    {
+      "epoch": 0.01711,
+      "grad_norm": 0.7996923845242195,
+      "learning_rate": 0.003,
+      "loss": 4.2245,
+      "step": 1711
+    },
+    {
+      "epoch": 0.01712,
+      "grad_norm": 0.7302785582736923,
+      "learning_rate": 0.003,
+      "loss": 4.2298,
+      "step": 1712
+    },
+    {
+      "epoch": 0.01713,
+      "grad_norm": 0.7495189068505729,
+      "learning_rate": 0.003,
+      "loss": 4.204,
+      "step": 1713
+    },
+    {
+      "epoch": 0.01714,
+      "grad_norm": 0.68320470368433,
+      "learning_rate": 0.003,
+      "loss": 4.2407,
+      "step": 1714
+    },
+    {
+      "epoch": 0.01715,
+      "grad_norm": 0.744728870764733,
+      "learning_rate": 0.003,
+      "loss": 4.2481,
+      "step": 1715
+    },
+    {
+      "epoch": 0.01716,
+      "grad_norm": 0.7728817520220844,
+      "learning_rate": 0.003,
+      "loss": 4.2317,
+      "step": 1716
+    },
+    {
+      "epoch": 0.01717,
+      "grad_norm": 0.8671487727140617,
+      "learning_rate": 0.003,
+      "loss": 4.2664,
+      "step": 1717
+    },
+    {
+      "epoch": 0.01718,
+      "grad_norm": 0.8286537250448458,
+      "learning_rate": 0.003,
+      "loss": 4.237,
+      "step": 1718
+    },
+    {
+      "epoch": 0.01719,
+      "grad_norm": 0.7719901017400573,
+      "learning_rate": 0.003,
+      "loss": 4.2535,
+      "step": 1719
+    },
+    {
+      "epoch": 0.0172,
+      "grad_norm": 0.7363513590865703,
+      "learning_rate": 0.003,
+      "loss": 4.2105,
+      "step": 1720
+    },
+    {
+      "epoch": 0.01721,
+      "grad_norm": 0.6255496683743654,
+      "learning_rate": 0.003,
+      "loss": 4.2258,
+      "step": 1721
+    },
+    {
+      "epoch": 0.01722,
+      "grad_norm": 0.5928702135655549,
+      "learning_rate": 0.003,
+      "loss": 4.2129,
+      "step": 1722
+    },
+    {
+      "epoch": 0.01723,
+      "grad_norm": 0.6570038276937221,
+      "learning_rate": 0.003,
+      "loss": 4.2248,
+      "step": 1723
+    },
+    {
+      "epoch": 0.01724,
+      "grad_norm": 0.6380856228410711,
+      "learning_rate": 0.003,
+      "loss": 4.2241,
+      "step": 1724
+    },
+    {
+      "epoch": 0.01725,
+      "grad_norm": 0.7477214539729845,
+      "learning_rate": 0.003,
+      "loss": 4.2088,
+      "step": 1725
+    },
+    {
+      "epoch": 0.01726,
+      "grad_norm": 0.7064643502935442,
+      "learning_rate": 0.003,
+      "loss": 4.2027,
+      "step": 1726
+    },
+    {
+      "epoch": 0.01727,
+      "grad_norm": 0.7018895227983273,
+      "learning_rate": 0.003,
+      "loss": 4.2082,
+      "step": 1727
+    },
+    {
+      "epoch": 0.01728,
+      "grad_norm": 0.7268534843228023,
+      "learning_rate": 0.003,
+      "loss": 4.2116,
+      "step": 1728
+    },
+    {
+      "epoch": 0.01729,
+      "grad_norm": 0.8265185231394759,
+      "learning_rate": 0.003,
+      "loss": 4.2226,
+      "step": 1729
+    },
+    {
+      "epoch": 0.0173,
+      "grad_norm": 0.9216458529069881,
+      "learning_rate": 0.003,
+      "loss": 4.2023,
+      "step": 1730
+    },
+    {
+      "epoch": 0.01731,
+      "grad_norm": 1.0984877424905655,
+      "learning_rate": 0.003,
+      "loss": 4.2294,
+      "step": 1731
+    },
+    {
+      "epoch": 0.01732,
+      "grad_norm": 0.9457932448227158,
+      "learning_rate": 0.003,
+      "loss": 4.2417,
+      "step": 1732
+    },
+    {
+      "epoch": 0.01733,
+      "grad_norm": 0.8744510824253744,
+      "learning_rate": 0.003,
+      "loss": 4.2124,
+      "step": 1733
+    },
+    {
+      "epoch": 0.01734,
+      "grad_norm": 0.8464357668562363,
+      "learning_rate": 0.003,
+      "loss": 4.2277,
+      "step": 1734
+    },
+    {
+      "epoch": 0.01735,
+      "grad_norm": 0.7933851274075868,
+      "learning_rate": 0.003,
+      "loss": 4.1993,
+      "step": 1735
+    },
+    {
+      "epoch": 0.01736,
+      "grad_norm": 0.7800431566899245,
+      "learning_rate": 0.003,
+      "loss": 4.2002,
+      "step": 1736
+    },
+    {
+      "epoch": 0.01737,
+      "grad_norm": 0.7049958430065779,
+      "learning_rate": 0.003,
+      "loss": 4.2254,
+      "step": 1737
+    },
+    {
+      "epoch": 0.01738,
+      "grad_norm": 0.7322441940210889,
+      "learning_rate": 0.003,
+      "loss": 4.2135,
+      "step": 1738
+    },
+    {
+      "epoch": 0.01739,
+      "grad_norm": 0.7327690195119888,
+      "learning_rate": 0.003,
+      "loss": 4.2297,
+      "step": 1739
+    },
+    {
+      "epoch": 0.0174,
+      "grad_norm": 0.7530152956649067,
+      "learning_rate": 0.003,
+      "loss": 4.231,
+      "step": 1740
+    },
+    {
+      "epoch": 0.01741,
+      "grad_norm": 0.7681419574064243,
+      "learning_rate": 0.003,
+      "loss": 4.2369,
+      "step": 1741
+    },
+    {
+      "epoch": 0.01742,
+      "grad_norm": 0.8179621872439043,
+      "learning_rate": 0.003,
+      "loss": 4.2278,
+      "step": 1742
+    },
+    {
+      "epoch": 0.01743,
+      "grad_norm": 0.9578047731817773,
+      "learning_rate": 0.003,
+      "loss": 4.2432,
+      "step": 1743
+    },
+    {
+      "epoch": 0.01744,
+      "grad_norm": 1.0853544694502002,
+      "learning_rate": 0.003,
+      "loss": 4.2354,
+      "step": 1744
+    },
+    {
+      "epoch": 0.01745,
+      "grad_norm": 0.901300254011202,
+      "learning_rate": 0.003,
+      "loss": 4.1985,
+      "step": 1745
+    },
+    {
+      "epoch": 0.01746,
+      "grad_norm": 0.7977925606337166,
+      "learning_rate": 0.003,
+      "loss": 4.2177,
+      "step": 1746
+    },
+    {
+      "epoch": 0.01747,
+      "grad_norm": 0.7805003864858971,
+      "learning_rate": 0.003,
+      "loss": 4.2153,
+      "step": 1747
+    },
+    {
+      "epoch": 0.01748,
+      "grad_norm": 0.5958068748685724,
+      "learning_rate": 0.003,
+      "loss": 4.2004,
+      "step": 1748
+    },
+    {
+      "epoch": 0.01749,
+      "grad_norm": 0.5609955740586448,
+      "learning_rate": 0.003,
+      "loss": 4.1969,
+      "step": 1749
+    },
+    {
+      "epoch": 0.0175,
+      "grad_norm": 0.5677742109277946,
+      "learning_rate": 0.003,
+      "loss": 4.2121,
+      "step": 1750
+    },
+    {
+      "epoch": 0.01751,
+      "grad_norm": 0.5067010918467684,
+      "learning_rate": 0.003,
+      "loss": 4.2009,
+      "step": 1751
+    },
+    {
+      "epoch": 0.01752,
+      "grad_norm": 0.4469462107071404,
+      "learning_rate": 0.003,
+      "loss": 4.239,
+      "step": 1752
+    },
+    {
+      "epoch": 0.01753,
+      "grad_norm": 0.4299529922000094,
+      "learning_rate": 0.003,
+      "loss": 4.1859,
+      "step": 1753
+    },
+    {
+      "epoch": 0.01754,
+      "grad_norm": 0.39327472210537173,
+      "learning_rate": 0.003,
+      "loss": 4.2141,
+      "step": 1754
+    },
+    {
+      "epoch": 0.01755,
+      "grad_norm": 0.4833428525390586,
+      "learning_rate": 0.003,
+      "loss": 4.1582,
+      "step": 1755
+    },
+    {
+      "epoch": 0.01756,
+      "grad_norm": 0.5692339112673285,
+      "learning_rate": 0.003,
+      "loss": 4.2351,
+      "step": 1756
+    },
+    {
+      "epoch": 0.01757,
+      "grad_norm": 0.765825485079122,
+      "learning_rate": 0.003,
+      "loss": 4.2204,
+      "step": 1757
+    },
+    {
+      "epoch": 0.01758,
+      "grad_norm": 0.9042836101782444,
+      "learning_rate": 0.003,
+      "loss": 4.2384,
+      "step": 1758
+    },
+    {
+      "epoch": 0.01759,
+      "grad_norm": 0.8742540565152447,
+      "learning_rate": 0.003,
+      "loss": 4.2064,
+      "step": 1759
+    },
+    {
+      "epoch": 0.0176,
+      "grad_norm": 0.7243758331092184,
+      "learning_rate": 0.003,
+      "loss": 4.2193,
+      "step": 1760
+    },
+    {
+      "epoch": 0.01761,
+      "grad_norm": 0.6848453720193391,
+      "learning_rate": 0.003,
+      "loss": 4.2074,
+      "step": 1761
+    },
+    {
+      "epoch": 0.01762,
+      "grad_norm": 0.8704455967545811,
+      "learning_rate": 0.003,
+      "loss": 4.2328,
+      "step": 1762
+    },
+    {
+      "epoch": 0.01763,
+      "grad_norm": 0.8471557315960334,
+      "learning_rate": 0.003,
+      "loss": 4.2246,
+      "step": 1763
+    },
+    {
+      "epoch": 0.01764,
+      "grad_norm": 0.7827279567320375,
+      "learning_rate": 0.003,
+      "loss": 4.1921,
+      "step": 1764
+    },
+    {
+      "epoch": 0.01765,
+      "grad_norm": 0.8790545608575575,
+      "learning_rate": 0.003,
+      "loss": 4.2237,
+      "step": 1765
+    },
+    {
+      "epoch": 0.01766,
+      "grad_norm": 0.763900084595892,
+      "learning_rate": 0.003,
+      "loss": 4.2322,
+      "step": 1766
+    },
+    {
+      "epoch": 0.01767,
+      "grad_norm": 0.7449808930716393,
+      "learning_rate": 0.003,
+      "loss": 4.1962,
+      "step": 1767
+    },
+    {
+      "epoch": 0.01768,
+      "grad_norm": 0.6931774004435453,
+      "learning_rate": 0.003,
+      "loss": 4.2017,
+      "step": 1768
+    },
+    {
+      "epoch": 0.01769,
+      "grad_norm": 0.7426550774463054,
+      "learning_rate": 0.003,
+      "loss": 4.2066,
+      "step": 1769
+    },
+    {
+      "epoch": 0.0177,
+      "grad_norm": 0.7834040480889046,
+      "learning_rate": 0.003,
+      "loss": 4.1901,
+      "step": 1770
+    },
+    {
+      "epoch": 0.01771,
+      "grad_norm": 0.6750175361724853,
+      "learning_rate": 0.003,
+      "loss": 4.2195,
+      "step": 1771
+    },
+    {
+      "epoch": 0.01772,
+      "grad_norm": 0.6376494933849937,
+      "learning_rate": 0.003,
+      "loss": 4.1888,
+      "step": 1772
+    },
+    {
+      "epoch": 0.01773,
+      "grad_norm": 0.6192068250431033,
+      "learning_rate": 0.003,
+      "loss": 4.1807,
+      "step": 1773
+    },
+    {
+      "epoch": 0.01774,
+      "grad_norm": 0.6106095483716042,
+      "learning_rate": 0.003,
+      "loss": 4.2034,
+      "step": 1774
+    },
+    {
+      "epoch": 0.01775,
+      "grad_norm": 0.6196510426340663,
+      "learning_rate": 0.003,
+      "loss": 4.1895,
+      "step": 1775
+    },
+    {
+      "epoch": 0.01776,
+      "grad_norm": 0.7097137120766858,
+      "learning_rate": 0.003,
+      "loss": 4.1828,
+      "step": 1776
+    },
+    {
+      "epoch": 0.01777,
+      "grad_norm": 0.7063319061887237,
+      "learning_rate": 0.003,
+      "loss": 4.2044,
+      "step": 1777
+    },
+    {
+      "epoch": 0.01778,
+      "grad_norm": 0.629621456935323,
+      "learning_rate": 0.003,
+      "loss": 4.194,
+      "step": 1778
+    },
+    {
+      "epoch": 0.01779,
+      "grad_norm": 0.7088930396533265,
+      "learning_rate": 0.003,
+      "loss": 4.215,
+      "step": 1779
+    },
+    {
+      "epoch": 0.0178,
+      "grad_norm": 0.7253919374521124,
+      "learning_rate": 0.003,
+      "loss": 4.191,
+      "step": 1780
+    },
+    {
+      "epoch": 0.01781,
+      "grad_norm": 0.7852702442407208,
+      "learning_rate": 0.003,
+      "loss": 4.2044,
+      "step": 1781
+    },
+    {
+      "epoch": 0.01782,
+      "grad_norm": 0.8394755648372281,
+      "learning_rate": 0.003,
+      "loss": 4.2328,
+      "step": 1782
+    },
+    {
+      "epoch": 0.01783,
+      "grad_norm": 0.6910931858922343,
+      "learning_rate": 0.003,
+      "loss": 4.2068,
+      "step": 1783
+    },
+    {
+      "epoch": 0.01784,
+      "grad_norm": 0.7674520462715927,
+      "learning_rate": 0.003,
+      "loss": 4.2107,
+      "step": 1784
+    },
+    {
+      "epoch": 0.01785,
+      "grad_norm": 0.8199387977488617,
+      "learning_rate": 0.003,
+      "loss": 4.1741,
+      "step": 1785
+    },
+    {
+      "epoch": 0.01786,
+      "grad_norm": 0.9173162484249777,
+      "learning_rate": 0.003,
+      "loss": 4.2074,
+      "step": 1786
+    },
+    {
+      "epoch": 0.01787,
+      "grad_norm": 1.334253988039247,
+      "learning_rate": 0.003,
+      "loss": 4.2336,
+      "step": 1787
+    },
+    {
+      "epoch": 0.01788,
+      "grad_norm": 1.0880162732024794,
+      "learning_rate": 0.003,
+      "loss": 4.2464,
+      "step": 1788
+    },
+    {
+      "epoch": 0.01789,
+      "grad_norm": 0.8768448988019708,
+      "learning_rate": 0.003,
+      "loss": 4.2114,
+      "step": 1789
+    },
+    {
+      "epoch": 0.0179,
+      "grad_norm": 0.9478883510444388,
+      "learning_rate": 0.003,
+      "loss": 4.2362,
+      "step": 1790
+    },
+    {
+      "epoch": 0.01791,
+      "grad_norm": 0.9789788338963192,
+      "learning_rate": 0.003,
+      "loss": 4.2475,
+      "step": 1791
+    },
+    {
+      "epoch": 0.01792,
+      "grad_norm": 0.9379061880413183,
+      "learning_rate": 0.003,
+      "loss": 4.2484,
+      "step": 1792
+    },
+    {
+      "epoch": 0.01793,
+      "grad_norm": 0.9327271640213346,
+      "learning_rate": 0.003,
+      "loss": 4.215,
+      "step": 1793
+    },
+    {
+      "epoch": 0.01794,
+      "grad_norm": 0.7690204695866035,
+      "learning_rate": 0.003,
+      "loss": 4.2222,
+      "step": 1794
+    },
+    {
+      "epoch": 0.01795,
+      "grad_norm": 0.6971902449310058,
+      "learning_rate": 0.003,
+      "loss": 4.2172,
+      "step": 1795
+    },
+    {
+      "epoch": 0.01796,
+      "grad_norm": 0.762901849453012,
+      "learning_rate": 0.003,
+      "loss": 4.2347,
+      "step": 1796
+    },
+    {
+      "epoch": 0.01797,
+      "grad_norm": 0.8440627560282756,
+      "learning_rate": 0.003,
+      "loss": 4.2037,
+      "step": 1797
+    },
+    {
+      "epoch": 0.01798,
+      "grad_norm": 0.8368125917294373,
+      "learning_rate": 0.003,
+      "loss": 4.2148,
+      "step": 1798
+    },
+    {
+      "epoch": 0.01799,
+      "grad_norm": 0.7348763481882639,
+      "learning_rate": 0.003,
+      "loss": 4.2247,
+      "step": 1799
+    },
+    {
+      "epoch": 0.018,
+      "grad_norm": 0.7680359832381647,
+      "learning_rate": 0.003,
+      "loss": 4.2184,
+      "step": 1800
+    },
+    {
+      "epoch": 0.01801,
+      "grad_norm": 0.7404783794030995,
+      "learning_rate": 0.003,
+      "loss": 4.2101,
+      "step": 1801
+    },
+    {
+      "epoch": 0.01802,
+      "grad_norm": 0.6580953290368566,
+      "learning_rate": 0.003,
+      "loss": 4.2238,
+      "step": 1802
+    },
+    {
+      "epoch": 0.01803,
+      "grad_norm": 0.6630631878191348,
+      "learning_rate": 0.003,
+      "loss": 4.2284,
+      "step": 1803
+    },
+    {
+      "epoch": 0.01804,
+      "grad_norm": 0.6396012711111247,
+      "learning_rate": 0.003,
+      "loss": 4.1992,
+      "step": 1804
+    },
+    {
+      "epoch": 0.01805,
+      "grad_norm": 0.5559219636409923,
+      "learning_rate": 0.003,
+      "loss": 4.2154,
+      "step": 1805
+    },
+    {
+      "epoch": 0.01806,
+      "grad_norm": 0.6144421410960899,
+      "learning_rate": 0.003,
+      "loss": 4.1763,
+      "step": 1806
+    },
+    {
+      "epoch": 0.01807,
+      "grad_norm": 0.6146777760728932,
+      "learning_rate": 0.003,
+      "loss": 4.2121,
+      "step": 1807
+    },
+    {
+      "epoch": 0.01808,
+      "grad_norm": 0.6116485891270191,
+      "learning_rate": 0.003,
+      "loss": 4.2001,
+      "step": 1808
+    },
+    {
+      "epoch": 0.01809,
+      "grad_norm": 0.8096274979504547,
+      "learning_rate": 0.003,
+      "loss": 4.2275,
+      "step": 1809
+    },
+    {
+      "epoch": 0.0181,
+      "grad_norm": 1.1102312559563035,
+      "learning_rate": 0.003,
+      "loss": 4.2346,
+      "step": 1810
+    },
+    {
+      "epoch": 0.01811,
+      "grad_norm": 0.8754620418282368,
+      "learning_rate": 0.003,
+      "loss": 4.2216,
+      "step": 1811
+    },
+    {
+      "epoch": 0.01812,
+      "grad_norm": 0.6567434234036816,
+      "learning_rate": 0.003,
+      "loss": 4.1947,
+      "step": 1812
+    },
+    {
+      "epoch": 0.01813,
+      "grad_norm": 0.7375490051000831,
+      "learning_rate": 0.003,
+      "loss": 4.2409,
+      "step": 1813
+    },
+    {
+      "epoch": 0.01814,
+      "grad_norm": 0.7842570681491089,
+      "learning_rate": 0.003,
+      "loss": 4.2075,
+      "step": 1814
+    },
+    {
+      "epoch": 0.01815,
+      "grad_norm": 0.6332278380543743,
+      "learning_rate": 0.003,
+      "loss": 4.1891,
+      "step": 1815
+    },
+    {
+      "epoch": 0.01816,
+      "grad_norm": 0.6578340036445237,
+      "learning_rate": 0.003,
+      "loss": 4.2166,
+      "step": 1816
+    },
+    {
+      "epoch": 0.01817,
+      "grad_norm": 0.6624785828428507,
+      "learning_rate": 0.003,
+      "loss": 4.2068,
+      "step": 1817
+    },
+    {
+      "epoch": 0.01818,
+      "grad_norm": 0.5897502755122268,
+      "learning_rate": 0.003,
+      "loss": 4.2034,
+      "step": 1818
+    },
+    {
+      "epoch": 0.01819,
+      "grad_norm": 0.49357607734419184,
+      "learning_rate": 0.003,
+      "loss": 4.1821,
+      "step": 1819
+    },
+    {
+      "epoch": 0.0182,
+      "grad_norm": 0.5025205629657381,
+      "learning_rate": 0.003,
+      "loss": 4.2022,
+      "step": 1820
+    },
+    {
+      "epoch": 0.01821,
+      "grad_norm": 0.5231764315153676,
+      "learning_rate": 0.003,
+      "loss": 4.188,
+      "step": 1821
+    },
+    {
+      "epoch": 0.01822,
+      "grad_norm": 0.589855961825142,
+      "learning_rate": 0.003,
+      "loss": 4.2017,
+      "step": 1822
+    },
+    {
+      "epoch": 0.01823,
+      "grad_norm": 0.6454531707936206,
+      "learning_rate": 0.003,
+      "loss": 4.2159,
+      "step": 1823
+    },
+    {
+      "epoch": 0.01824,
+      "grad_norm": 0.616450651672451,
+      "learning_rate": 0.003,
+      "loss": 4.1987,
+      "step": 1824
+    },
+    {
+      "epoch": 0.01825,
+      "grad_norm": 0.5393299556773667,
+      "learning_rate": 0.003,
+      "loss": 4.199,
+      "step": 1825
+    },
+    {
+      "epoch": 0.01826,
+      "grad_norm": 0.6162633558651508,
+      "learning_rate": 0.003,
+      "loss": 4.2135,
+      "step": 1826
+    },
+    {
+      "epoch": 0.01827,
+      "grad_norm": 0.6366286833338192,
+      "learning_rate": 0.003,
+      "loss": 4.1847,
+      "step": 1827
+    },
+    {
+      "epoch": 0.01828,
+      "grad_norm": 0.5519308619846083,
+      "learning_rate": 0.003,
+      "loss": 4.1935,
+      "step": 1828
+    },
+    {
+      "epoch": 0.01829,
+      "grad_norm": 0.556700588713125,
+      "learning_rate": 0.003,
+      "loss": 4.2083,
+      "step": 1829
+    },
+    {
+      "epoch": 0.0183,
+      "grad_norm": 0.6225243144725614,
+      "learning_rate": 0.003,
+      "loss": 4.2039,
+      "step": 1830
+    },
+    {
+      "epoch": 0.01831,
+      "grad_norm": 0.8092938095153425,
+      "learning_rate": 0.003,
+      "loss": 4.2017,
+      "step": 1831
+    },
+    {
+      "epoch": 0.01832,
+      "grad_norm": 1.156880999962459,
+      "learning_rate": 0.003,
+      "loss": 4.214,
+      "step": 1832
+    },
+    {
+      "epoch": 0.01833,
+      "grad_norm": 0.8498167035958548,
+      "learning_rate": 0.003,
+      "loss": 4.2422,
+      "step": 1833
+    },
+    {
+      "epoch": 0.01834,
+      "grad_norm": 0.6651811196480657,
+      "learning_rate": 0.003,
+      "loss": 4.2008,
+      "step": 1834
+    },
+    {
+      "epoch": 0.01835,
+      "grad_norm": 0.7438128363368247,
+      "learning_rate": 0.003,
+      "loss": 4.205,
+      "step": 1835
+    },
+    {
+      "epoch": 0.01836,
+      "grad_norm": 0.7712205326044778,
+      "learning_rate": 0.003,
+      "loss": 4.2014,
+      "step": 1836
+    },
+    {
+      "epoch": 0.01837,
+      "grad_norm": 0.7094945295905764,
+      "learning_rate": 0.003,
+      "loss": 4.1968,
+      "step": 1837
+    },
+    {
+      "epoch": 0.01838,
+      "grad_norm": 0.8255996162995448,
+      "learning_rate": 0.003,
+      "loss": 4.2049,
+      "step": 1838
+    },
+    {
+      "epoch": 0.01839,
+      "grad_norm": 0.7001056258854736,
+      "learning_rate": 0.003,
+      "loss": 4.2234,
+      "step": 1839
+    },
+    {
+      "epoch": 0.0184,
+      "grad_norm": 0.7156089343491636,
+      "learning_rate": 0.003,
+      "loss": 4.2248,
+      "step": 1840
+    },
+    {
+      "epoch": 0.01841,
+      "grad_norm": 0.7514284093505675,
+      "learning_rate": 0.003,
+      "loss": 4.164,
+      "step": 1841
+    },
+    {
+      "epoch": 0.01842,
+      "grad_norm": 0.7804042052375482,
+      "learning_rate": 0.003,
+      "loss": 4.1995,
+      "step": 1842
+    },
+    {
+      "epoch": 0.01843,
+      "grad_norm": 0.7741073572881331,
+      "learning_rate": 0.003,
+      "loss": 4.1826,
+      "step": 1843
+    },
+    {
+      "epoch": 0.01844,
+      "grad_norm": 0.8011416312871611,
+      "learning_rate": 0.003,
+      "loss": 4.1842,
+      "step": 1844
+    },
+    {
+      "epoch": 0.01845,
+      "grad_norm": 0.8386720056960224,
+      "learning_rate": 0.003,
+      "loss": 4.187,
+      "step": 1845
+    },
+    {
+      "epoch": 0.01846,
+      "grad_norm": 0.9781723483143008,
+      "learning_rate": 0.003,
+      "loss": 4.2242,
+      "step": 1846
+    },
+    {
+      "epoch": 0.01847,
+      "grad_norm": 0.9173016227544075,
+      "learning_rate": 0.003,
+      "loss": 4.2189,
+      "step": 1847
+    },
+    {
+      "epoch": 0.01848,
+      "grad_norm": 0.961352235410328,
+      "learning_rate": 0.003,
+      "loss": 4.2359,
+      "step": 1848
+    },
+    {
+      "epoch": 0.01849,
+      "grad_norm": 0.9115360913743059,
+      "learning_rate": 0.003,
+      "loss": 4.241,
+      "step": 1849
+    },
+    {
+      "epoch": 0.0185,
+      "grad_norm": 0.949227003474929,
+      "learning_rate": 0.003,
+      "loss": 4.2298,
+      "step": 1850
+    },
+    {
+      "epoch": 0.01851,
+      "grad_norm": 0.756851342238193,
+      "learning_rate": 0.003,
+      "loss": 4.2063,
+      "step": 1851
+    },
+    {
+      "epoch": 0.01852,
+      "grad_norm": 0.7062657388831308,
+      "learning_rate": 0.003,
+      "loss": 4.2058,
+      "step": 1852
+    },
+    {
+      "epoch": 0.01853,
+      "grad_norm": 0.804960952863702,
+      "learning_rate": 0.003,
+      "loss": 4.206,
+      "step": 1853
+    },
+    {
+      "epoch": 0.01854,
+      "grad_norm": 0.8048402157100555,
+      "learning_rate": 0.003,
+      "loss": 4.2379,
+      "step": 1854
+    },
+    {
+      "epoch": 0.01855,
+      "grad_norm": 0.8514933979131342,
+      "learning_rate": 0.003,
+      "loss": 4.2236,
+      "step": 1855
+    },
+    {
+      "epoch": 0.01856,
+      "grad_norm": 0.9293561457561126,
+      "learning_rate": 0.003,
+      "loss": 4.2144,
+      "step": 1856
+    },
+    {
+      "epoch": 0.01857,
+      "grad_norm": 0.8452785281639297,
+      "learning_rate": 0.003,
+      "loss": 4.1937,
+      "step": 1857
+    },
+    {
+      "epoch": 0.01858,
+      "grad_norm": 0.805524803230092,
+      "learning_rate": 0.003,
+      "loss": 4.2015,
+      "step": 1858
+    },
+    {
+      "epoch": 0.01859,
+      "grad_norm": 0.826226040676062,
+      "learning_rate": 0.003,
+      "loss": 4.2023,
+      "step": 1859
+    },
+    {
+      "epoch": 0.0186,
+      "grad_norm": 0.7646190876843869,
+      "learning_rate": 0.003,
+      "loss": 4.1808,
+      "step": 1860
+    },
+    {
+      "epoch": 0.01861,
+      "grad_norm": 0.9398444707205434,
+      "learning_rate": 0.003,
+      "loss": 4.229,
+      "step": 1861
+    },
+    {
+      "epoch": 0.01862,
+      "grad_norm": 1.0799822817783482,
+      "learning_rate": 0.003,
+      "loss": 4.208,
+      "step": 1862
+    },
+    {
+      "epoch": 0.01863,
+      "grad_norm": 1.0663393244093409,
+      "learning_rate": 0.003,
+      "loss": 4.2176,
+      "step": 1863
+    },
+    {
+      "epoch": 0.01864,
+      "grad_norm": 0.9063119158507146,
+      "learning_rate": 0.003,
+      "loss": 4.243,
+      "step": 1864
+    },
+    {
+      "epoch": 0.01865,
+      "grad_norm": 0.8878085186485055,
+      "learning_rate": 0.003,
+      "loss": 4.222,
+      "step": 1865
+    },
+    {
+      "epoch": 0.01866,
+      "grad_norm": 0.8278859311900081,
+      "learning_rate": 0.003,
+      "loss": 4.2161,
+      "step": 1866
+    },
+    {
+      "epoch": 0.01867,
+      "grad_norm": 0.9175811468419192,
+      "learning_rate": 0.003,
+      "loss": 4.1983,
+      "step": 1867
+    },
+    {
+      "epoch": 0.01868,
+      "grad_norm": 0.8195895794655929,
+      "learning_rate": 0.003,
+      "loss": 4.2097,
+      "step": 1868
+    },
+    {
+      "epoch": 0.01869,
+      "grad_norm": 0.6958671652018995,
+      "learning_rate": 0.003,
+      "loss": 4.2233,
+      "step": 1869
+    },
+    {
+      "epoch": 0.0187,
+      "grad_norm": 0.7276701584872959,
+      "learning_rate": 0.003,
+      "loss": 4.2132,
+      "step": 1870
+    },
+    {
+      "epoch": 0.01871,
+      "grad_norm": 0.700252662578374,
+      "learning_rate": 0.003,
+      "loss": 4.1886,
+      "step": 1871
+    },
+    {
+      "epoch": 0.01872,
+      "grad_norm": 0.7936164889803413,
+      "learning_rate": 0.003,
+      "loss": 4.2323,
+      "step": 1872
+    },
+    {
+      "epoch": 0.01873,
+      "grad_norm": 0.8026794723116766,
+      "learning_rate": 0.003,
+      "loss": 4.1938,
+      "step": 1873
+    },
+    {
+      "epoch": 0.01874,
+      "grad_norm": 0.6813841695856067,
+      "learning_rate": 0.003,
+      "loss": 4.1939,
+      "step": 1874
+    },
+    {
+      "epoch": 0.01875,
+      "grad_norm": 0.7703759265763755,
+      "learning_rate": 0.003,
+      "loss": 4.2154,
+      "step": 1875
+    },
+    {
+      "epoch": 0.01876,
+      "grad_norm": 0.7048464208203703,
+      "learning_rate": 0.003,
+      "loss": 4.2195,
+      "step": 1876
+    },
+    {
+      "epoch": 0.01877,
+      "grad_norm": 0.6552231659596246,
+      "learning_rate": 0.003,
+      "loss": 4.2204,
+      "step": 1877
+    },
+    {
+      "epoch": 0.01878,
+      "grad_norm": 0.6594564394551935,
+      "learning_rate": 0.003,
+      "loss": 4.2061,
+      "step": 1878
+    },
+    {
+      "epoch": 0.01879,
+      "grad_norm": 0.6775220505077814,
+      "learning_rate": 0.003,
+      "loss": 4.1841,
+      "step": 1879
+    },
+    {
+      "epoch": 0.0188,
+      "grad_norm": 0.7469198030113778,
+      "learning_rate": 0.003,
+      "loss": 4.1948,
+      "step": 1880
+    },
+    {
+      "epoch": 0.01881,
+      "grad_norm": 0.8052292593043413,
+      "learning_rate": 0.003,
+      "loss": 4.1959,
+      "step": 1881
+    },
+    {
+      "epoch": 0.01882,
+      "grad_norm": 0.9310051602523918,
+      "learning_rate": 0.003,
+      "loss": 4.2256,
+      "step": 1882
+    },
+    {
+      "epoch": 0.01883,
+      "grad_norm": 0.8470067207894152,
+      "learning_rate": 0.003,
+      "loss": 4.2057,
+      "step": 1883
+    },
+    {
+      "epoch": 0.01884,
+      "grad_norm": 0.7163623431526733,
+      "learning_rate": 0.003,
+      "loss": 4.1948,
+      "step": 1884
+    },
+    {
+      "epoch": 0.01885,
+      "grad_norm": 0.7917195791610646,
+      "learning_rate": 0.003,
+      "loss": 4.1745,
+      "step": 1885
+    },
+    {
+      "epoch": 0.01886,
+      "grad_norm": 0.7950048534183088,
+      "learning_rate": 0.003,
+      "loss": 4.2171,
+      "step": 1886
+    },
+    {
+      "epoch": 0.01887,
+      "grad_norm": 0.7209518927340428,
+      "learning_rate": 0.003,
+      "loss": 4.2025,
+      "step": 1887
+    },
+    {
+      "epoch": 0.01888,
+      "grad_norm": 0.633107358042472,
+      "learning_rate": 0.003,
+      "loss": 4.1883,
+      "step": 1888
+    },
+    {
+      "epoch": 0.01889,
+      "grad_norm": 0.6143038001546575,
+      "learning_rate": 0.003,
+      "loss": 4.1996,
+      "step": 1889
+    },
+    {
+      "epoch": 0.0189,
+      "grad_norm": 0.5158052453680656,
+      "learning_rate": 0.003,
+      "loss": 4.1757,
+      "step": 1890
+    },
+    {
+      "epoch": 0.01891,
+      "grad_norm": 0.4633760186396196,
+      "learning_rate": 0.003,
+      "loss": 4.2224,
+      "step": 1891
+    },
+    {
+      "epoch": 0.01892,
+      "grad_norm": 0.4927650862046063,
+      "learning_rate": 0.003,
+      "loss": 4.1956,
+      "step": 1892
+    },
+    {
+      "epoch": 0.01893,
+      "grad_norm": 0.4373024700349087,
+      "learning_rate": 0.003,
+      "loss": 4.1773,
+      "step": 1893
+    },
+    {
+      "epoch": 0.01894,
+      "grad_norm": 0.40335105556509215,
+      "learning_rate": 0.003,
+      "loss": 4.1784,
+      "step": 1894
+    },
+    {
+      "epoch": 0.01895,
+      "grad_norm": 0.3931015972388579,
+      "learning_rate": 0.003,
+      "loss": 4.1649,
+      "step": 1895
+    },
+    {
+      "epoch": 0.01896,
+      "grad_norm": 0.369330374322213,
+      "learning_rate": 0.003,
+      "loss": 4.1861,
+      "step": 1896
+    },
+    {
+      "epoch": 0.01897,
+      "grad_norm": 0.38394762475942135,
+      "learning_rate": 0.003,
+      "loss": 4.1841,
+      "step": 1897
+    },
+    {
+      "epoch": 0.01898,
+      "grad_norm": 0.4616420599151427,
+      "learning_rate": 0.003,
+      "loss": 4.1533,
+      "step": 1898
+    },
+    {
+      "epoch": 0.01899,
+      "grad_norm": 0.6786086644128849,
+      "learning_rate": 0.003,
+      "loss": 4.1744,
+      "step": 1899
+    },
+    {
+      "epoch": 0.019,
+      "grad_norm": 1.1028955308001205,
+      "learning_rate": 0.003,
+      "loss": 4.1821,
+      "step": 1900
+    },
+    {
+      "epoch": 0.01901,
+      "grad_norm": 1.0381658886424407,
+      "learning_rate": 0.003,
+      "loss": 4.2416,
+      "step": 1901
+    },
+    {
+      "epoch": 0.01902,
+      "grad_norm": 0.6228046274507568,
+      "learning_rate": 0.003,
+      "loss": 4.202,
+      "step": 1902
+    },
+    {
+      "epoch": 0.01903,
+      "grad_norm": 0.552687373313921,
+      "learning_rate": 0.003,
+      "loss": 4.1547,
+      "step": 1903
+    },
+    {
+      "epoch": 0.01904,
+      "grad_norm": 0.5642583939104693,
+      "learning_rate": 0.003,
+      "loss": 4.2113,
+      "step": 1904
+    },
+    {
+      "epoch": 0.01905,
+      "grad_norm": 0.4866940659015072,
+      "learning_rate": 0.003,
+      "loss": 4.1815,
+      "step": 1905
+    },
+    {
+      "epoch": 0.01906,
+      "grad_norm": 0.6770303387740269,
+      "learning_rate": 0.003,
+      "loss": 4.2038,
+      "step": 1906
+    },
+    {
+      "epoch": 0.01907,
+      "grad_norm": 0.6799220004643979,
+      "learning_rate": 0.003,
+      "loss": 4.199,
+      "step": 1907
+    },
+    {
+      "epoch": 0.01908,
+      "grad_norm": 0.5738027965979297,
+      "learning_rate": 0.003,
+      "loss": 4.2084,
+      "step": 1908
+    },
+    {
+      "epoch": 0.01909,
+      "grad_norm": 0.5646602374989558,
+      "learning_rate": 0.003,
+      "loss": 4.1743,
+      "step": 1909
+    },
+    {
+      "epoch": 0.0191,
+      "grad_norm": 0.6702075278822099,
+      "learning_rate": 0.003,
+      "loss": 4.1866,
+      "step": 1910
+    },
+    {
+      "epoch": 0.01911,
+      "grad_norm": 0.7731308719042311,
+      "learning_rate": 0.003,
+      "loss": 4.1672,
+      "step": 1911
+    },
+    {
+      "epoch": 0.01912,
+      "grad_norm": 0.6258475347121828,
+      "learning_rate": 0.003,
+      "loss": 4.1744,
+      "step": 1912
+    },
+    {
+      "epoch": 0.01913,
+      "grad_norm": 0.6373334035841824,
+      "learning_rate": 0.003,
+      "loss": 4.1923,
+      "step": 1913
+    },
+    {
+      "epoch": 0.01914,
+      "grad_norm": 0.7497697739080498,
+      "learning_rate": 0.003,
+      "loss": 4.212,
+      "step": 1914
+    },
+    {
+      "epoch": 0.01915,
+      "grad_norm": 0.8443649873420552,
+      "learning_rate": 0.003,
+      "loss": 4.204,
+      "step": 1915
+    },
+    {
+      "epoch": 0.01916,
+      "grad_norm": 0.8327185244516057,
+      "learning_rate": 0.003,
+      "loss": 4.1829,
+      "step": 1916
+    },
+    {
+      "epoch": 0.01917,
+      "grad_norm": 0.8831134466171928,
+      "learning_rate": 0.003,
+      "loss": 4.2052,
+      "step": 1917
+    },
+    {
+      "epoch": 0.01918,
+      "grad_norm": 0.9318795049142128,
+      "learning_rate": 0.003,
+      "loss": 4.1944,
+      "step": 1918
+    },
+    {
+      "epoch": 0.01919,
+      "grad_norm": 0.944334593915859,
+      "learning_rate": 0.003,
+      "loss": 4.2049,
+      "step": 1919
+    },
+    {
+      "epoch": 0.0192,
+      "grad_norm": 0.9365986219494583,
+      "learning_rate": 0.003,
+      "loss": 4.2076,
+      "step": 1920
+    },
+    {
+      "epoch": 0.01921,
+      "grad_norm": 0.9617762005888578,
+      "learning_rate": 0.003,
+      "loss": 4.2016,
+      "step": 1921
+    },
+    {
+      "epoch": 0.01922,
+      "grad_norm": 0.9805293516298391,
+      "learning_rate": 0.003,
+      "loss": 4.2181,
+      "step": 1922
+    },
+    {
+      "epoch": 0.01923,
+      "grad_norm": 0.8498471851851511,
+      "learning_rate": 0.003,
+      "loss": 4.2204,
+      "step": 1923
+    },
+    {
+      "epoch": 0.01924,
+      "grad_norm": 0.9257747078341632,
+      "learning_rate": 0.003,
+      "loss": 4.2284,
+      "step": 1924
+    },
+    {
+      "epoch": 0.01925,
+      "grad_norm": 1.0324220402854314,
+      "learning_rate": 0.003,
+      "loss": 4.2182,
+      "step": 1925
+    },
+    {
+      "epoch": 0.01926,
+      "grad_norm": 0.9745230962825202,
+      "learning_rate": 0.003,
+      "loss": 4.2282,
+      "step": 1926
+    },
+    {
+      "epoch": 0.01927,
+      "grad_norm": 0.7705843813355484,
+      "learning_rate": 0.003,
+      "loss": 4.2129,
+      "step": 1927
+    },
+    {
+      "epoch": 0.01928,
+      "grad_norm": 0.5911790602013839,
+      "learning_rate": 0.003,
+      "loss": 4.1789,
+      "step": 1928
+    },
+    {
+      "epoch": 0.01929,
+      "grad_norm": 0.5782217294801848,
+      "learning_rate": 0.003,
+      "loss": 4.1986,
+      "step": 1929
+    },
+    {
+      "epoch": 0.0193,
+      "grad_norm": 0.5556254348196169,
+      "learning_rate": 0.003,
+      "loss": 4.1871,
+      "step": 1930
+    },
+    {
+      "epoch": 0.01931,
+      "grad_norm": 0.6337897693351197,
+      "learning_rate": 0.003,
+      "loss": 4.188,
+      "step": 1931
+    },
+    {
+      "epoch": 0.01932,
+      "grad_norm": 0.6851141809276902,
+      "learning_rate": 0.003,
+      "loss": 4.1982,
+      "step": 1932
+    },
+    {
+      "epoch": 0.01933,
+      "grad_norm": 0.6532471380653952,
+      "learning_rate": 0.003,
+      "loss": 4.1838,
+      "step": 1933
+    },
+    {
+      "epoch": 0.01934,
+      "grad_norm": 0.660311918680962,
+      "learning_rate": 0.003,
+      "loss": 4.1881,
+      "step": 1934
+    },
+    {
+      "epoch": 0.01935,
+      "grad_norm": 0.6202434802996027,
+      "learning_rate": 0.003,
+      "loss": 4.2019,
+      "step": 1935
+    },
+    {
+      "epoch": 0.01936,
+      "grad_norm": 0.6456697693032586,
+      "learning_rate": 0.003,
+      "loss": 4.1829,
+      "step": 1936
+    },
+    {
+      "epoch": 0.01937,
+      "grad_norm": 0.6945407817649866,
+      "learning_rate": 0.003,
+      "loss": 4.1968,
+      "step": 1937
+    },
+    {
+      "epoch": 0.01938,
+      "grad_norm": 0.7393543695473613,
+      "learning_rate": 0.003,
+      "loss": 4.1957,
+      "step": 1938
+    },
+    {
+      "epoch": 0.01939,
+      "grad_norm": 0.7713579649815137,
+      "learning_rate": 0.003,
+      "loss": 4.223,
+      "step": 1939
+    },
+    {
+      "epoch": 0.0194,
+      "grad_norm": 0.6228971544711813,
+      "learning_rate": 0.003,
+      "loss": 4.1928,
+      "step": 1940
+    },
+    {
+      "epoch": 0.01941,
+      "grad_norm": 0.5726639902946573,
+      "learning_rate": 0.003,
+      "loss": 4.1739,
+      "step": 1941
+    },
+    {
+      "epoch": 0.01942,
+      "grad_norm": 0.6064973298236461,
+      "learning_rate": 0.003,
+      "loss": 4.1989,
+      "step": 1942
+    },
+    {
+      "epoch": 0.01943,
+      "grad_norm": 0.6888737571597997,
+      "learning_rate": 0.003,
+      "loss": 4.1711,
+      "step": 1943
+    },
+    {
+      "epoch": 0.01944,
+      "grad_norm": 0.6708431670479557,
+      "learning_rate": 0.003,
+      "loss": 4.2158,
+      "step": 1944
+    },
+    {
+      "epoch": 0.01945,
+      "grad_norm": 0.5681415129494921,
+      "learning_rate": 0.003,
+      "loss": 4.1787,
+      "step": 1945
+    },
+    {
+      "epoch": 0.01946,
+      "grad_norm": 0.47730421654011235,
+      "learning_rate": 0.003,
+      "loss": 4.1716,
+      "step": 1946
+    },
+    {
+      "epoch": 0.01947,
+      "grad_norm": 0.47116667407771645,
+      "learning_rate": 0.003,
+      "loss": 4.1791,
+      "step": 1947
+    },
+    {
+      "epoch": 0.01948,
+      "grad_norm": 0.6002169491076368,
+      "learning_rate": 0.003,
+      "loss": 4.2004,
+      "step": 1948
+    },
+    {
+      "epoch": 0.01949,
+      "grad_norm": 0.6804506442238769,
+      "learning_rate": 0.003,
+      "loss": 4.1961,
+      "step": 1949
+    },
+    {
+      "epoch": 0.0195,
+      "grad_norm": 0.9031150085350885,
+      "learning_rate": 0.003,
+      "loss": 4.1909,
+      "step": 1950
+    },
+    {
+      "epoch": 0.01951,
+      "grad_norm": 1.055108763514552,
+      "learning_rate": 0.003,
+      "loss": 4.1972,
+      "step": 1951
+    },
+    {
+      "epoch": 0.01952,
+      "grad_norm": 0.7942759742680753,
+      "learning_rate": 0.003,
+      "loss": 4.1947,
+      "step": 1952
+    },
+    {
+      "epoch": 0.01953,
+      "grad_norm": 0.6995419703208098,
+      "learning_rate": 0.003,
+      "loss": 4.2007,
+      "step": 1953
+    },
+    {
+      "epoch": 0.01954,
+      "grad_norm": 0.7323475440015588,
+      "learning_rate": 0.003,
+      "loss": 4.2017,
+      "step": 1954
+    },
+    {
+      "epoch": 0.01955,
+      "grad_norm": 0.654602209622962,
+      "learning_rate": 0.003,
+      "loss": 4.1897,
+      "step": 1955
+    },
+    {
+      "epoch": 0.01956,
+      "grad_norm": 0.6795706239523924,
+      "learning_rate": 0.003,
+      "loss": 4.188,
+      "step": 1956
+    },
+    {
+      "epoch": 0.01957,
+      "grad_norm": 0.822104190978127,
+      "learning_rate": 0.003,
+      "loss": 4.1611,
+      "step": 1957
+    },
+    {
+      "epoch": 0.01958,
+      "grad_norm": 0.7829306267633842,
+      "learning_rate": 0.003,
+      "loss": 4.1555,
+      "step": 1958
+    },
+    {
+      "epoch": 0.01959,
+      "grad_norm": 0.7521187388682534,
+      "learning_rate": 0.003,
+      "loss": 4.1801,
+      "step": 1959
+    },
+    {
+      "epoch": 0.0196,
+      "grad_norm": 0.8699203949276841,
+      "learning_rate": 0.003,
+      "loss": 4.1777,
+      "step": 1960
+    },
+    {
+      "epoch": 0.01961,
+      "grad_norm": 0.9397547951451798,
+      "learning_rate": 0.003,
+      "loss": 4.1846,
+      "step": 1961
+    },
+    {
+      "epoch": 0.01962,
+      "grad_norm": 0.8367517905224471,
+      "learning_rate": 0.003,
+      "loss": 4.1848,
+      "step": 1962
+    },
+    {
+      "epoch": 0.01963,
+      "grad_norm": 0.8914011026895058,
+      "learning_rate": 0.003,
+      "loss": 4.1735,
+      "step": 1963
+    },
+    {
+      "epoch": 0.01964,
+      "grad_norm": 1.069474116819015,
+      "learning_rate": 0.003,
+      "loss": 4.2192,
+      "step": 1964
+    },
+    {
+      "epoch": 0.01965,
+      "grad_norm": 0.9846418582261246,
+      "learning_rate": 0.003,
+      "loss": 4.2097,
+      "step": 1965
+    },
+    {
+      "epoch": 0.01966,
+      "grad_norm": 1.190189959584559,
+      "learning_rate": 0.003,
+      "loss": 4.2246,
+      "step": 1966
+    },
+    {
+      "epoch": 0.01967,
+      "grad_norm": 0.9331071177303962,
+      "learning_rate": 0.003,
+      "loss": 4.1941,
+      "step": 1967
+    },
+    {
+      "epoch": 0.01968,
+      "grad_norm": 0.8236928876485803,
+      "learning_rate": 0.003,
+      "loss": 4.2092,
+      "step": 1968
+    },
+    {
+      "epoch": 0.01969,
+      "grad_norm": 0.9035337012628801,
+      "learning_rate": 0.003,
+      "loss": 4.2128,
+      "step": 1969
+    },
+    {
+      "epoch": 0.0197,
+      "grad_norm": 0.9584839773684666,
+      "learning_rate": 0.003,
+      "loss": 4.2139,
+      "step": 1970
+    },
+    {
+      "epoch": 0.01971,
+      "grad_norm": 1.1976612578146555,
+      "learning_rate": 0.003,
+      "loss": 4.2078,
+      "step": 1971
+    },
+    {
+      "epoch": 0.01972,
+      "grad_norm": 1.03766882598544,
+      "learning_rate": 0.003,
+      "loss": 4.216,
+      "step": 1972
+    },
+    {
+      "epoch": 0.01973,
+      "grad_norm": 1.157490820023187,
+      "learning_rate": 0.003,
+      "loss": 4.204,
+      "step": 1973
+    },
+    {
+      "epoch": 0.01974,
+      "grad_norm": 1.0168705909824025,
+      "learning_rate": 0.003,
+      "loss": 4.2365,
+      "step": 1974
+    },
+    {
+      "epoch": 0.01975,
+      "grad_norm": 0.9807823986551194,
+      "learning_rate": 0.003,
+      "loss": 4.2359,
+      "step": 1975
+    },
+    {
+      "epoch": 0.01976,
+      "grad_norm": 0.7975927738530828,
+      "learning_rate": 0.003,
+      "loss": 4.2115,
+      "step": 1976
+    },
+    {
+      "epoch": 0.01977,
+      "grad_norm": 0.9111601004721311,
+      "learning_rate": 0.003,
+      "loss": 4.2503,
+      "step": 1977
+    },
+    {
+      "epoch": 0.01978,
+      "grad_norm": 0.95585271722792,
+      "learning_rate": 0.003,
+      "loss": 4.2456,
+      "step": 1978
+    },
+    {
+      "epoch": 0.01979,
+      "grad_norm": 0.8967358768483624,
+      "learning_rate": 0.003,
+      "loss": 4.2367,
+      "step": 1979
+    },
+    {
+      "epoch": 0.0198,
+      "grad_norm": 0.9098539062714608,
+      "learning_rate": 0.003,
+      "loss": 4.2135,
+      "step": 1980
+    },
+    {
+      "epoch": 0.01981,
+      "grad_norm": 0.9619207371647327,
+      "learning_rate": 0.003,
+      "loss": 4.2036,
+      "step": 1981
+    },
+    {
+      "epoch": 0.01982,
+      "grad_norm": 1.004618203421798,
+      "learning_rate": 0.003,
+      "loss": 4.2311,
+      "step": 1982
+    },
+    {
+      "epoch": 0.01983,
+      "grad_norm": 0.8778331022367669,
+      "learning_rate": 0.003,
+      "loss": 4.2203,
+      "step": 1983
+    },
+    {
+      "epoch": 0.01984,
+      "grad_norm": 0.7409261134848839,
+      "learning_rate": 0.003,
+      "loss": 4.2234,
+      "step": 1984
+    },
+    {
+      "epoch": 0.01985,
+      "grad_norm": 0.7987127824711159,
+      "learning_rate": 0.003,
+      "loss": 4.2126,
+      "step": 1985
+    },
+    {
+      "epoch": 0.01986,
+      "grad_norm": 0.8488590004969258,
+      "learning_rate": 0.003,
+      "loss": 4.2379,
+      "step": 1986
+    },
+    {
+      "epoch": 0.01987,
+      "grad_norm": 0.856993962490694,
+      "learning_rate": 0.003,
+      "loss": 4.1987,
+      "step": 1987
+    },
+    {
+      "epoch": 0.01988,
+      "grad_norm": 0.6814479531298734,
+      "learning_rate": 0.003,
+      "loss": 4.2021,
+      "step": 1988
+    },
+    {
+      "epoch": 0.01989,
+      "grad_norm": 0.5447842315255877,
+      "learning_rate": 0.003,
+      "loss": 4.1871,
+      "step": 1989
+    },
+    {
+      "epoch": 0.0199,
+      "grad_norm": 0.5760022915320738,
+      "learning_rate": 0.003,
+      "loss": 4.1964,
+      "step": 1990
+    },
+    {
+      "epoch": 0.01991,
+      "grad_norm": 0.5132919071482156,
+      "learning_rate": 0.003,
+      "loss": 4.1738,
+      "step": 1991
+    },
+    {
+      "epoch": 0.01992,
+      "grad_norm": 0.5851245859941666,
+      "learning_rate": 0.003,
+      "loss": 4.2128,
+      "step": 1992
+    },
+    {
+      "epoch": 0.01993,
+      "grad_norm": 0.6267354593691943,
+      "learning_rate": 0.003,
+      "loss": 4.1956,
+      "step": 1993
+    },
+    {
+      "epoch": 0.01994,
+      "grad_norm": 0.7099792076486661,
+      "learning_rate": 0.003,
+      "loss": 4.2188,
+      "step": 1994
+    },
+    {
+      "epoch": 0.01995,
+      "grad_norm": 0.7678935327418717,
+      "learning_rate": 0.003,
+      "loss": 4.2111,
+      "step": 1995
+    },
+    {
+      "epoch": 0.01996,
+      "grad_norm": 0.6394486685805509,
+      "learning_rate": 0.003,
+      "loss": 4.1755,
+      "step": 1996
+    },
+    {
+      "epoch": 0.01997,
+      "grad_norm": 0.4226153497592854,
+      "learning_rate": 0.003,
+      "loss": 4.1972,
+      "step": 1997
+    },
+    {
+      "epoch": 0.01998,
+      "grad_norm": 0.5031225205891672,
+      "learning_rate": 0.003,
+      "loss": 4.1844,
+      "step": 1998
+    },
+    {
+      "epoch": 0.01999,
+      "grad_norm": 0.5395430302746123,
+      "learning_rate": 0.003,
+      "loss": 4.1747,
+      "step": 1999
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 0.46234156015204186,
+      "learning_rate": 0.003,
+      "loss": 4.1586,
+      "step": 2000
+    },
+    {
+      "epoch": 0.02001,
+      "grad_norm": 0.4395345274961725,
+      "learning_rate": 0.003,
+      "loss": 4.1934,
+      "step": 2001
+    },
+    {
+      "epoch": 0.02002,
+      "grad_norm": 0.3957661324841875,
+      "learning_rate": 0.003,
+      "loss": 4.1863,
+      "step": 2002
+    },
+    {
+      "epoch": 0.02003,
+      "grad_norm": 0.4711152480437331,
+      "learning_rate": 0.003,
+      "loss": 4.1873,
+      "step": 2003
+    },
+    {
+      "epoch": 0.02004,
+      "grad_norm": 0.546136823152871,
+      "learning_rate": 0.003,
+      "loss": 4.1727,
+      "step": 2004
+    },
+    {
+      "epoch": 0.02005,
+      "grad_norm": 0.654074266015456,
+      "learning_rate": 0.003,
+      "loss": 4.1803,
+      "step": 2005
+    },
+    {
+      "epoch": 0.02006,
+      "grad_norm": 0.710370558494441,
+      "learning_rate": 0.003,
+      "loss": 4.1784,
+      "step": 2006
+    },
+    {
+      "epoch": 0.02007,
+      "grad_norm": 0.7542070025547006,
+      "learning_rate": 0.003,
+      "loss": 4.1945,
+      "step": 2007
+    },
+    {
+      "epoch": 0.02008,
+      "grad_norm": 0.7773827468349818,
+      "learning_rate": 0.003,
+      "loss": 4.191,
+      "step": 2008
+    },
+    {
+      "epoch": 0.02009,
+      "grad_norm": 0.6899352222366664,
+      "learning_rate": 0.003,
+      "loss": 4.1857,
+      "step": 2009
+    },
+    {
+      "epoch": 0.0201,
+      "grad_norm": 0.5614651061397582,
+      "learning_rate": 0.003,
+      "loss": 4.1935,
+      "step": 2010
+    },
+    {
+      "epoch": 0.02011,
+      "grad_norm": 0.6625002839338998,
+      "learning_rate": 0.003,
+      "loss": 4.183,
+      "step": 2011
+    },
+    {
+      "epoch": 0.02012,
+      "grad_norm": 0.6997403670690155,
+      "learning_rate": 0.003,
+      "loss": 4.1822,
+      "step": 2012
+    },
+    {
+      "epoch": 0.02013,
+      "grad_norm": 0.6697515151480535,
+      "learning_rate": 0.003,
+      "loss": 4.1896,
+      "step": 2013
+    },
+    {
+      "epoch": 0.02014,
+      "grad_norm": 0.6386903403073065,
+      "learning_rate": 0.003,
+      "loss": 4.1729,
+      "step": 2014
+    },
+    {
+      "epoch": 0.02015,
+      "grad_norm": 0.5907166723123874,
+      "learning_rate": 0.003,
+      "loss": 4.1657,
+      "step": 2015
+    },
+    {
+      "epoch": 0.02016,
+      "grad_norm": 0.605106268984101,
+      "learning_rate": 0.003,
+      "loss": 4.1769,
+      "step": 2016
+    },
+    {
+      "epoch": 0.02017,
+      "grad_norm": 0.6631806062284518,
+      "learning_rate": 0.003,
+      "loss": 4.1711,
+      "step": 2017
+    },
+    {
+      "epoch": 0.02018,
+      "grad_norm": 0.6347297750042842,
+      "learning_rate": 0.003,
+      "loss": 4.1746,
+      "step": 2018
+    },
+    {
+      "epoch": 0.02019,
+      "grad_norm": 0.8248022848153107,
+      "learning_rate": 0.003,
+      "loss": 4.1834,
+      "step": 2019
+    },
+    {
+      "epoch": 0.0202,
+      "grad_norm": 0.9904970732072179,
+      "learning_rate": 0.003,
+      "loss": 4.2031,
+      "step": 2020
+    },
+    {
+      "epoch": 0.02021,
+      "grad_norm": 0.9086090012812373,
+      "learning_rate": 0.003,
+      "loss": 4.2148,
+      "step": 2021
+    },
+    {
+      "epoch": 0.02022,
+      "grad_norm": 0.9232445860299345,
+      "learning_rate": 0.003,
+      "loss": 4.175,
+      "step": 2022
+    },
+    {
+      "epoch": 0.02023,
+      "grad_norm": 0.8050131783240017,
+      "learning_rate": 0.003,
+      "loss": 4.1493,
+      "step": 2023
+    },
+    {
+      "epoch": 0.02024,
+      "grad_norm": 0.7442373542055678,
+      "learning_rate": 0.003,
+      "loss": 4.1717,
+      "step": 2024
+    },
+    {
+      "epoch": 0.02025,
+      "grad_norm": 0.9832310341451624,
+      "learning_rate": 0.003,
+      "loss": 4.2324,
+      "step": 2025
+    },
+    {
+      "epoch": 0.02026,
+      "grad_norm": 0.9556185585548506,
+      "learning_rate": 0.003,
+      "loss": 4.2147,
+      "step": 2026
+    },
+    {
+      "epoch": 0.02027,
+      "grad_norm": 1.0633653823279028,
+      "learning_rate": 0.003,
+      "loss": 4.218,
+      "step": 2027
+    },
+    {
+      "epoch": 0.02028,
+      "grad_norm": 0.8361708667618777,
+      "learning_rate": 0.003,
+      "loss": 4.1941,
+      "step": 2028
+    },
+    {
+      "epoch": 0.02029,
+      "grad_norm": 0.8621114948206428,
+      "learning_rate": 0.003,
+      "loss": 4.1995,
+      "step": 2029
+    },
+    {
+      "epoch": 0.0203,
+      "grad_norm": 0.7505639644342775,
+      "learning_rate": 0.003,
+      "loss": 4.1873,
+      "step": 2030
+    },
+    {
+      "epoch": 0.02031,
+      "grad_norm": 0.7688539298918926,
+      "learning_rate": 0.003,
+      "loss": 4.1919,
+      "step": 2031
+    },
+    {
+      "epoch": 0.02032,
+      "grad_norm": 0.8008832349814213,
+      "learning_rate": 0.003,
+      "loss": 4.1682,
+      "step": 2032
+    },
+    {
+      "epoch": 0.02033,
+      "grad_norm": 0.7744220160391694,
+      "learning_rate": 0.003,
+      "loss": 4.2069,
+      "step": 2033
+    },
+    {
+      "epoch": 0.02034,
+      "grad_norm": 0.9334621419858093,
+      "learning_rate": 0.003,
+      "loss": 4.2063,
+      "step": 2034
+    },
+    {
+      "epoch": 0.02035,
+      "grad_norm": 1.0003714475706196,
+      "learning_rate": 0.003,
+      "loss": 4.1981,
+      "step": 2035
+    },
+    {
+      "epoch": 0.02036,
+      "grad_norm": 1.0165755074456069,
+      "learning_rate": 0.003,
+      "loss": 4.2039,
+      "step": 2036
+    },
+    {
+      "epoch": 0.02037,
+      "grad_norm": 0.8752656941969562,
+      "learning_rate": 0.003,
+      "loss": 4.2133,
+      "step": 2037
+    },
+    {
+      "epoch": 0.02038,
+      "grad_norm": 0.8030828089902575,
+      "learning_rate": 0.003,
+      "loss": 4.1951,
+      "step": 2038
+    },
+    {
+      "epoch": 0.02039,
+      "grad_norm": 0.6849651287466818,
+      "learning_rate": 0.003,
+      "loss": 4.1984,
+      "step": 2039
+    },
+    {
+      "epoch": 0.0204,
+      "grad_norm": 0.7048992687138306,
+      "learning_rate": 0.003,
+      "loss": 4.2015,
+      "step": 2040
+    },
+    {
+      "epoch": 0.02041,
+      "grad_norm": 0.5846622816322785,
+      "learning_rate": 0.003,
+      "loss": 4.1601,
+      "step": 2041
+    },
+    {
+      "epoch": 0.02042,
+      "grad_norm": 0.579059647971908,
+      "learning_rate": 0.003,
+      "loss": 4.159,
+      "step": 2042
+    },
+    {
+      "epoch": 0.02043,
+      "grad_norm": 0.5320216814522557,
+      "learning_rate": 0.003,
+      "loss": 4.2018,
+      "step": 2043
+    },
+    {
+      "epoch": 0.02044,
+      "grad_norm": 0.5749364817997652,
+      "learning_rate": 0.003,
+      "loss": 4.174,
+      "step": 2044
+    },
+    {
+      "epoch": 0.02045,
+      "grad_norm": 0.5872195224989512,
+      "learning_rate": 0.003,
+      "loss": 4.1881,
+      "step": 2045
+    },
+    {
+      "epoch": 0.02046,
+      "grad_norm": 0.6166345493647961,
+      "learning_rate": 0.003,
+      "loss": 4.1634,
+      "step": 2046
+    },
+    {
+      "epoch": 0.02047,
+      "grad_norm": 0.7223919170905914,
+      "learning_rate": 0.003,
+      "loss": 4.2037,
+      "step": 2047
+    },
+    {
+      "epoch": 0.02048,
+      "grad_norm": 0.7799560828695167,
+      "learning_rate": 0.003,
+      "loss": 4.1877,
+      "step": 2048
+    },
+    {
+      "epoch": 0.02049,
+      "grad_norm": 0.8702209657581298,
+      "learning_rate": 0.003,
+      "loss": 4.2131,
+      "step": 2049
+    },
+    {
+      "epoch": 0.0205,
+      "grad_norm": 0.9375771238974213,
+      "learning_rate": 0.003,
+      "loss": 4.1631,
+      "step": 2050
+    },
+    {
+      "epoch": 0.02051,
+      "grad_norm": 1.1873073578231947,
+      "learning_rate": 0.003,
+      "loss": 4.178,
+      "step": 2051
+    },
+    {
+      "epoch": 0.02052,
+      "grad_norm": 1.03209456773,
+      "learning_rate": 0.003,
+      "loss": 4.1777,
+      "step": 2052
+    },
+    {
+      "epoch": 0.02053,
+      "grad_norm": 1.0555848829046368,
+      "learning_rate": 0.003,
+      "loss": 4.1832,
+      "step": 2053
+    },
+    {
+      "epoch": 0.02054,
+      "grad_norm": 0.9050417954028227,
+      "learning_rate": 0.003,
+      "loss": 4.2168,
+      "step": 2054
+    },
+    {
+      "epoch": 0.02055,
+      "grad_norm": 0.8496945287219897,
+      "learning_rate": 0.003,
+      "loss": 4.1765,
+      "step": 2055
+    },
+    {
+      "epoch": 0.02056,
+      "grad_norm": 0.7540734931162184,
+      "learning_rate": 0.003,
+      "loss": 4.17,
+      "step": 2056
+    },
+    {
+      "epoch": 0.02057,
+      "grad_norm": 0.79729190390271,
+      "learning_rate": 0.003,
+      "loss": 4.2076,
+      "step": 2057
+    },
+    {
+      "epoch": 0.02058,
+      "grad_norm": 0.8092766990194165,
+      "learning_rate": 0.003,
+      "loss": 4.1879,
+      "step": 2058
+    },
+    {
+      "epoch": 0.02059,
+      "grad_norm": 0.787520651985971,
+      "learning_rate": 0.003,
+      "loss": 4.1662,
+      "step": 2059
+    },
+    {
+      "epoch": 0.0206,
+      "grad_norm": 0.8179849805821454,
+      "learning_rate": 0.003,
+      "loss": 4.2123,
+      "step": 2060
+    },
+    {
+      "epoch": 0.02061,
+      "grad_norm": 0.765277725532741,
+      "learning_rate": 0.003,
+      "loss": 4.1867,
+      "step": 2061
+    },
+    {
+      "epoch": 0.02062,
+      "grad_norm": 0.6833167046153735,
+      "learning_rate": 0.003,
+      "loss": 4.1822,
+      "step": 2062
+    },
+    {
+      "epoch": 0.02063,
+      "grad_norm": 0.7852297427280791,
+      "learning_rate": 0.003,
+      "loss": 4.1788,
+      "step": 2063
+    },
+    {
+      "epoch": 0.02064,
+      "grad_norm": 0.7558602646875056,
+      "learning_rate": 0.003,
+      "loss": 4.1894,
+      "step": 2064
+    },
+    {
+      "epoch": 0.02065,
+      "grad_norm": 0.7461513201926369,
+      "learning_rate": 0.003,
+      "loss": 4.1686,
+      "step": 2065
+    },
+    {
+      "epoch": 0.02066,
+      "grad_norm": 0.8279588238364882,
+      "learning_rate": 0.003,
+      "loss": 4.1898,
+      "step": 2066
+    },
+    {
+      "epoch": 0.02067,
+      "grad_norm": 0.7670898564018478,
+      "learning_rate": 0.003,
+      "loss": 4.1869,
+      "step": 2067
+    },
+    {
+      "epoch": 0.02068,
+      "grad_norm": 0.6723403011847539,
+      "learning_rate": 0.003,
+      "loss": 4.1662,
+      "step": 2068
+    },
+    {
+      "epoch": 0.02069,
+      "grad_norm": 0.6336511314262159,
+      "learning_rate": 0.003,
+      "loss": 4.1849,
+      "step": 2069
+    },
+    {
+      "epoch": 0.0207,
+      "grad_norm": 0.70657998006886,
+      "learning_rate": 0.003,
+      "loss": 4.1607,
+      "step": 2070
+    },
+    {
+      "epoch": 0.02071,
+      "grad_norm": 0.7487170820279913,
+      "learning_rate": 0.003,
+      "loss": 4.1912,
+      "step": 2071
+    },
+    {
+      "epoch": 0.02072,
+      "grad_norm": 0.7172563865038417,
+      "learning_rate": 0.003,
+      "loss": 4.1982,
+      "step": 2072
+    },
+    {
+      "epoch": 0.02073,
+      "grad_norm": 0.7815042091167561,
+      "learning_rate": 0.003,
+      "loss": 4.1796,
+      "step": 2073
+    },
+    {
+      "epoch": 0.02074,
+      "grad_norm": 0.7869404752550972,
+      "learning_rate": 0.003,
+      "loss": 4.1864,
+      "step": 2074
+    },
+    {
+      "epoch": 0.02075,
+      "grad_norm": 0.7115380813621578,
+      "learning_rate": 0.003,
+      "loss": 4.2091,
+      "step": 2075
+    },
+    {
+      "epoch": 0.02076,
+      "grad_norm": 0.6271616020332964,
+      "learning_rate": 0.003,
+      "loss": 4.199,
+      "step": 2076
+    },
+    {
+      "epoch": 0.02077,
+      "grad_norm": 0.5945516898335454,
+      "learning_rate": 0.003,
+      "loss": 4.1669,
+      "step": 2077
+    },
+    {
+      "epoch": 0.02078,
+      "grad_norm": 0.5807187113226505,
+      "learning_rate": 0.003,
+      "loss": 4.1615,
+      "step": 2078
+    },
+    {
+      "epoch": 0.02079,
+      "grad_norm": 0.4917157616397027,
+      "learning_rate": 0.003,
+      "loss": 4.1449,
+      "step": 2079
+    },
+    {
+      "epoch": 0.0208,
+      "grad_norm": 0.5388783899386764,
+      "learning_rate": 0.003,
+      "loss": 4.1563,
+      "step": 2080
+    },
+    {
+      "epoch": 0.02081,
+      "grad_norm": 0.6250429891561764,
+      "learning_rate": 0.003,
+      "loss": 4.1899,
+      "step": 2081
+    },
+    {
+      "epoch": 0.02082,
+      "grad_norm": 0.7539524872161132,
+      "learning_rate": 0.003,
+      "loss": 4.1879,
+      "step": 2082
+    },
+    {
+      "epoch": 0.02083,
+      "grad_norm": 0.9021795929932535,
+      "learning_rate": 0.003,
+      "loss": 4.189,
+      "step": 2083
+    },
+    {
+      "epoch": 0.02084,
+      "grad_norm": 0.9892236370325144,
+      "learning_rate": 0.003,
+      "loss": 4.1914,
+      "step": 2084
+    },
+    {
+      "epoch": 0.02085,
+      "grad_norm": 0.9279291435694554,
+      "learning_rate": 0.003,
+      "loss": 4.1653,
+      "step": 2085
+    },
+    {
+      "epoch": 0.02086,
+      "grad_norm": 0.7919688525399468,
+      "learning_rate": 0.003,
+      "loss": 4.1726,
+      "step": 2086
+    },
+    {
+      "epoch": 0.02087,
+      "grad_norm": 0.7965117787009012,
+      "learning_rate": 0.003,
+      "loss": 4.1719,
+      "step": 2087
+    },
+    {
+      "epoch": 0.02088,
+      "grad_norm": 0.882456029692519,
+      "learning_rate": 0.003,
+      "loss": 4.2097,
+      "step": 2088
+    },
+    {
+      "epoch": 0.02089,
+      "grad_norm": 0.8843676371103549,
+      "learning_rate": 0.003,
+      "loss": 4.1781,
+      "step": 2089
+    },
+    {
+      "epoch": 0.0209,
+      "grad_norm": 0.9095817089780983,
+      "learning_rate": 0.003,
+      "loss": 4.2024,
+      "step": 2090
+    },
+    {
+      "epoch": 0.02091,
+      "grad_norm": 0.8360707980502976,
+      "learning_rate": 0.003,
+      "loss": 4.1884,
+      "step": 2091
+    },
+    {
+      "epoch": 0.02092,
+      "grad_norm": 0.8954851906550106,
+      "learning_rate": 0.003,
+      "loss": 4.189,
+      "step": 2092
+    },
+    {
+      "epoch": 0.02093,
+      "grad_norm": 0.8146056324857043,
+      "learning_rate": 0.003,
+      "loss": 4.1544,
+      "step": 2093
+    },
+    {
+      "epoch": 0.02094,
+      "grad_norm": 0.7558490596404085,
+      "learning_rate": 0.003,
+      "loss": 4.1629,
+      "step": 2094
+    },
+    {
+      "epoch": 0.02095,
+      "grad_norm": 0.7384447141579936,
+      "learning_rate": 0.003,
+      "loss": 4.2013,
+      "step": 2095
+    },
+    {
+      "epoch": 0.02096,
+      "grad_norm": 0.7339651428587286,
+      "learning_rate": 0.003,
+      "loss": 4.1917,
+      "step": 2096
+    },
+    {
+      "epoch": 0.02097,
+      "grad_norm": 0.6671356121778469,
+      "learning_rate": 0.003,
+      "loss": 4.1698,
+      "step": 2097
+    },
+    {
+      "epoch": 0.02098,
+      "grad_norm": 0.653522008057512,
+      "learning_rate": 0.003,
+      "loss": 4.1857,
+      "step": 2098
+    },
+    {
+      "epoch": 0.02099,
+      "grad_norm": 0.6717296847888601,
+      "learning_rate": 0.003,
+      "loss": 4.1945,
+      "step": 2099
+    },
+    {
+      "epoch": 0.021,
+      "grad_norm": 0.6641683049026381,
+      "learning_rate": 0.003,
+      "loss": 4.206,
+      "step": 2100
+    },
+    {
+      "epoch": 0.02101,
+      "grad_norm": 0.8263568144502693,
+      "learning_rate": 0.003,
+      "loss": 4.2014,
+      "step": 2101
+    },
+    {
+      "epoch": 0.02102,
+      "grad_norm": 0.9182856357229072,
+      "learning_rate": 0.003,
+      "loss": 4.2116,
+      "step": 2102
+    },
+    {
+      "epoch": 0.02103,
+      "grad_norm": 0.8775651409452159,
+      "learning_rate": 0.003,
+      "loss": 4.1702,
+      "step": 2103
+    },
+    {
+      "epoch": 0.02104,
+      "grad_norm": 0.717248452996101,
+      "learning_rate": 0.003,
+      "loss": 4.1698,
+      "step": 2104
+    },
+    {
+      "epoch": 0.02105,
+      "grad_norm": 0.6147629172749789,
+      "learning_rate": 0.003,
+      "loss": 4.1709,
+      "step": 2105
+    },
+    {
+      "epoch": 0.02106,
+      "grad_norm": 0.5350819274969562,
+      "learning_rate": 0.003,
+      "loss": 4.1599,
+      "step": 2106
+    },
+    {
+      "epoch": 0.02107,
+      "grad_norm": 0.6022860581700974,
+      "learning_rate": 0.003,
+      "loss": 4.1732,
+      "step": 2107
+    },
+    {
+      "epoch": 0.02108,
+      "grad_norm": 0.6033513423948526,
+      "learning_rate": 0.003,
+      "loss": 4.1543,
+      "step": 2108
+    },
+    {
+      "epoch": 0.02109,
+      "grad_norm": 0.6042225440605065,
+      "learning_rate": 0.003,
+      "loss": 4.144,
+      "step": 2109
+    },
+    {
+      "epoch": 0.0211,
+      "grad_norm": 0.560183095906506,
+      "learning_rate": 0.003,
+      "loss": 4.1584,
+      "step": 2110
+    },
+    {
+      "epoch": 0.02111,
+      "grad_norm": 0.5249161566388668,
+      "learning_rate": 0.003,
+      "loss": 4.1663,
+      "step": 2111
+    },
+    {
+      "epoch": 0.02112,
+      "grad_norm": 0.5391312057228763,
+      "learning_rate": 0.003,
+      "loss": 4.1641,
+      "step": 2112
+    },
+    {
+      "epoch": 0.02113,
+      "grad_norm": 0.5260626981895078,
+      "learning_rate": 0.003,
+      "loss": 4.1597,
+      "step": 2113
+    },
+    {
+      "epoch": 0.02114,
+      "grad_norm": 0.5072590511188936,
+      "learning_rate": 0.003,
+      "loss": 4.1483,
+      "step": 2114
+    },
+    {
+      "epoch": 0.02115,
+      "grad_norm": 0.5460141991570172,
+      "learning_rate": 0.003,
+      "loss": 4.1417,
+      "step": 2115
+    },
+    {
+      "epoch": 0.02116,
+      "grad_norm": 0.5749891422597724,
+      "learning_rate": 0.003,
+      "loss": 4.1705,
+      "step": 2116
+    },
+    {
+      "epoch": 0.02117,
+      "grad_norm": 0.6781459947006083,
+      "learning_rate": 0.003,
+      "loss": 4.1668,
+      "step": 2117
+    },
+    {
+      "epoch": 0.02118,
+      "grad_norm": 0.797658364148174,
+      "learning_rate": 0.003,
+      "loss": 4.1692,
+      "step": 2118
+    },
+    {
+      "epoch": 0.02119,
+      "grad_norm": 0.8688435880924688,
+      "learning_rate": 0.003,
+      "loss": 4.1833,
+      "step": 2119
+    },
+    {
+      "epoch": 0.0212,
+      "grad_norm": 0.7833741674522176,
+      "learning_rate": 0.003,
+      "loss": 4.1545,
+      "step": 2120
+    },
+    {
+      "epoch": 0.02121,
+      "grad_norm": 0.791532521338522,
+      "learning_rate": 0.003,
+      "loss": 4.1508,
+      "step": 2121
+    },
+    {
+      "epoch": 0.02122,
+      "grad_norm": 0.7913660839492322,
+      "learning_rate": 0.003,
+      "loss": 4.1614,
+      "step": 2122
+    },
+    {
+      "epoch": 0.02123,
+      "grad_norm": 0.7506478823984966,
+      "learning_rate": 0.003,
+      "loss": 4.1402,
+      "step": 2123
+    },
+    {
+      "epoch": 0.02124,
+      "grad_norm": 0.7580020078198468,
+      "learning_rate": 0.003,
+      "loss": 4.1643,
+      "step": 2124
+    },
+    {
+      "epoch": 0.02125,
+      "grad_norm": 0.7006901754929752,
+      "learning_rate": 0.003,
+      "loss": 4.1792,
+      "step": 2125
+    },
+    {
+      "epoch": 0.02126,
+      "grad_norm": 0.7653616945644249,
+      "learning_rate": 0.003,
+      "loss": 4.1781,
+      "step": 2126
+    },
+    {
+      "epoch": 0.02127,
+      "grad_norm": 0.7492928706532664,
+      "learning_rate": 0.003,
+      "loss": 4.1859,
+      "step": 2127
+    },
+    {
+      "epoch": 0.02128,
+      "grad_norm": 0.7795854700268753,
+      "learning_rate": 0.003,
+      "loss": 4.1758,
+      "step": 2128
+    },
+    {
+      "epoch": 0.02129,
+      "grad_norm": 0.9805419044523191,
+      "learning_rate": 0.003,
+      "loss": 4.186,
+      "step": 2129
+    },
+    {
+      "epoch": 0.0213,
+      "grad_norm": 1.0343575534498912,
+      "learning_rate": 0.003,
+      "loss": 4.1641,
+      "step": 2130
+    },
+    {
+      "epoch": 0.02131,
+      "grad_norm": 0.9468793830055084,
+      "learning_rate": 0.003,
+      "loss": 4.1869,
+      "step": 2131
+    },
+    {
+      "epoch": 0.02132,
+      "grad_norm": 0.9021420581140779,
+      "learning_rate": 0.003,
+      "loss": 4.2032,
+      "step": 2132
+    },
+    {
+      "epoch": 0.02133,
+      "grad_norm": 0.8122842455378513,
+      "learning_rate": 0.003,
+      "loss": 4.1908,
+      "step": 2133
+    },
+    {
+      "epoch": 0.02134,
+      "grad_norm": 1.06138323809129,
+      "learning_rate": 0.003,
+      "loss": 4.2048,
+      "step": 2134
+    },
+    {
+      "epoch": 0.02135,
+      "grad_norm": 1.2270031550495697,
+      "learning_rate": 0.003,
+      "loss": 4.212,
+      "step": 2135
+    },
+    {
+      "epoch": 0.02136,
+      "grad_norm": 0.824636655199696,
+      "learning_rate": 0.003,
+      "loss": 4.1714,
+      "step": 2136
+    },
+    {
+      "epoch": 0.02137,
+      "grad_norm": 0.7977636532107003,
+      "learning_rate": 0.003,
+      "loss": 4.2074,
+      "step": 2137
+    },
+    {
+      "epoch": 0.02138,
+      "grad_norm": 0.8175212273276642,
+      "learning_rate": 0.003,
+      "loss": 4.2175,
+      "step": 2138
+    },
+    {
+      "epoch": 0.02139,
+      "grad_norm": 0.8041667304441814,
+      "learning_rate": 0.003,
+      "loss": 4.2081,
+      "step": 2139
+    },
+    {
+      "epoch": 0.0214,
+      "grad_norm": 0.9410719295719912,
+      "learning_rate": 0.003,
+      "loss": 4.2078,
+      "step": 2140
+    },
+    {
+      "epoch": 0.02141,
+      "grad_norm": 0.9128066534114356,
+      "learning_rate": 0.003,
+      "loss": 4.1891,
+      "step": 2141
+    },
+    {
+      "epoch": 0.02142,
+      "grad_norm": 0.7166876786866843,
+      "learning_rate": 0.003,
+      "loss": 4.1743,
+      "step": 2142
+    },
+    {
+      "epoch": 0.02143,
+      "grad_norm": 0.7553648404993994,
+      "learning_rate": 0.003,
+      "loss": 4.1879,
+      "step": 2143
+    },
+    {
+      "epoch": 0.02144,
+      "grad_norm": 0.9693569859499341,
+      "learning_rate": 0.003,
+      "loss": 4.1979,
+      "step": 2144
+    },
+    {
+      "epoch": 0.02145,
+      "grad_norm": 1.005405043081365,
+      "learning_rate": 0.003,
+      "loss": 4.1853,
+      "step": 2145
+    },
+    {
+      "epoch": 0.02146,
+      "grad_norm": 0.9174606002788802,
+      "learning_rate": 0.003,
+      "loss": 4.1967,
+      "step": 2146
+    },
+    {
+      "epoch": 0.02147,
+      "grad_norm": 0.7660679608210765,
+      "learning_rate": 0.003,
+      "loss": 4.2109,
+      "step": 2147
+    },
+    {
+      "epoch": 0.02148,
+      "grad_norm": 0.7067244008901828,
+      "learning_rate": 0.003,
+      "loss": 4.222,
+      "step": 2148
+    },
+    {
+      "epoch": 0.02149,
+      "grad_norm": 0.5597922211015285,
+      "learning_rate": 0.003,
+      "loss": 4.1982,
+      "step": 2149
+    },
+    {
+      "epoch": 0.0215,
+      "grad_norm": 0.5887191266363829,
+      "learning_rate": 0.003,
+      "loss": 4.1847,
+      "step": 2150
+    },
+    {
+      "epoch": 0.02151,
+      "grad_norm": 0.5623719175027401,
+      "learning_rate": 0.003,
+      "loss": 4.1889,
+      "step": 2151
+    },
+    {
+      "epoch": 0.02152,
+      "grad_norm": 0.5432643275971011,
+      "learning_rate": 0.003,
+      "loss": 4.1639,
+      "step": 2152
+    },
+    {
+      "epoch": 0.02153,
+      "grad_norm": 0.5447875969469528,
+      "learning_rate": 0.003,
+      "loss": 4.1717,
+      "step": 2153
+    },
+    {
+      "epoch": 0.02154,
+      "grad_norm": 0.64393428597859,
+      "learning_rate": 0.003,
+      "loss": 4.1635,
+      "step": 2154
+    },
+    {
+      "epoch": 0.02155,
+      "grad_norm": 0.836540801508149,
+      "learning_rate": 0.003,
+      "loss": 4.1972,
+      "step": 2155
+    },
+    {
+      "epoch": 0.02156,
+      "grad_norm": 0.9453076222627308,
+      "learning_rate": 0.003,
+      "loss": 4.1938,
+      "step": 2156
+    },
+    {
+      "epoch": 0.02157,
+      "grad_norm": 0.7417036890833684,
+      "learning_rate": 0.003,
+      "loss": 4.1515,
+      "step": 2157
+    },
+    {
+      "epoch": 0.02158,
+      "grad_norm": 0.5957713091881504,
+      "learning_rate": 0.003,
+      "loss": 4.1686,
+      "step": 2158
+    },
+    {
+      "epoch": 0.02159,
+      "grad_norm": 0.6018851123999939,
+      "learning_rate": 0.003,
+      "loss": 4.1807,
+      "step": 2159
+    },
+    {
+      "epoch": 0.0216,
+      "grad_norm": 0.6716376115123553,
+      "learning_rate": 0.003,
+      "loss": 4.1831,
+      "step": 2160
+    },
+    {
+      "epoch": 0.02161,
+      "grad_norm": 0.7819649346338547,
+      "learning_rate": 0.003,
+      "loss": 4.1755,
+      "step": 2161
+    },
+    {
+      "epoch": 0.02162,
+      "grad_norm": 0.8233828232110464,
+      "learning_rate": 0.003,
+      "loss": 4.1801,
+      "step": 2162
+    },
+    {
+      "epoch": 0.02163,
+      "grad_norm": 0.7824568090505709,
+      "learning_rate": 0.003,
+      "loss": 4.1982,
+      "step": 2163
+    },
+    {
+      "epoch": 0.02164,
+      "grad_norm": 0.8597315749146158,
+      "learning_rate": 0.003,
+      "loss": 4.1915,
+      "step": 2164
+    },
+    {
+      "epoch": 0.02165,
+      "grad_norm": 0.7979744235864884,
+      "learning_rate": 0.003,
+      "loss": 4.1562,
+      "step": 2165
+    },
+    {
+      "epoch": 0.02166,
+      "grad_norm": 0.7759936363363831,
+      "learning_rate": 0.003,
+      "loss": 4.1947,
+      "step": 2166
+    },
+    {
+      "epoch": 0.02167,
+      "grad_norm": 0.7097277369414112,
+      "learning_rate": 0.003,
+      "loss": 4.1708,
+      "step": 2167
+    },
+    {
+      "epoch": 0.02168,
+      "grad_norm": 0.6129275664005919,
+      "learning_rate": 0.003,
+      "loss": 4.1674,
+      "step": 2168
+    },
+    {
+      "epoch": 0.02169,
+      "grad_norm": 0.5617202579327312,
+      "learning_rate": 0.003,
+      "loss": 4.1636,
+      "step": 2169
+    },
+    {
+      "epoch": 0.0217,
+      "grad_norm": 0.4729981033232719,
+      "learning_rate": 0.003,
+      "loss": 4.1693,
+      "step": 2170
+    },
+    {
+      "epoch": 0.02171,
+      "grad_norm": 0.5127457916731437,
+      "learning_rate": 0.003,
+      "loss": 4.1819,
+      "step": 2171
+    },
+    {
+      "epoch": 0.02172,
+      "grad_norm": 0.6304303564787295,
+      "learning_rate": 0.003,
+      "loss": 4.1556,
+      "step": 2172
+    },
+    {
+      "epoch": 0.02173,
+      "grad_norm": 0.7235970129567234,
+      "learning_rate": 0.003,
+      "loss": 4.1636,
+      "step": 2173
+    },
+    {
+      "epoch": 0.02174,
+      "grad_norm": 0.9537416454234164,
+      "learning_rate": 0.003,
+      "loss": 4.1668,
+      "step": 2174
+    },
+    {
+      "epoch": 0.02175,
+      "grad_norm": 1.0772501316127283,
+      "learning_rate": 0.003,
+      "loss": 4.2082,
+      "step": 2175
+    },
+    {
+      "epoch": 0.02176,
+      "grad_norm": 0.9320344556958656,
+      "learning_rate": 0.003,
+      "loss": 4.1798,
+      "step": 2176
+    },
+    {
+      "epoch": 0.02177,
+      "grad_norm": 1.2306562612139564,
+      "learning_rate": 0.003,
+      "loss": 4.2022,
+      "step": 2177
+    },
+    {
+      "epoch": 0.02178,
+      "grad_norm": 0.9649462703992993,
+      "learning_rate": 0.003,
+      "loss": 4.1822,
+      "step": 2178
+    },
+    {
+      "epoch": 0.02179,
+      "grad_norm": 0.8364440339114567,
+      "learning_rate": 0.003,
+      "loss": 4.1982,
+      "step": 2179
+    },
+    {
+      "epoch": 0.0218,
+      "grad_norm": 0.8494241782855642,
+      "learning_rate": 0.003,
+      "loss": 4.2094,
+      "step": 2180
+    },
+    {
+      "epoch": 0.02181,
+      "grad_norm": 0.8028341199053393,
+      "learning_rate": 0.003,
+      "loss": 4.2103,
+      "step": 2181
+    },
+    {
+      "epoch": 0.02182,
+      "grad_norm": 0.8789575016731874,
+      "learning_rate": 0.003,
+      "loss": 4.1991,
+      "step": 2182
+    },
+    {
+      "epoch": 0.02183,
+      "grad_norm": 0.7339272838818488,
+      "learning_rate": 0.003,
+      "loss": 4.2018,
+      "step": 2183
+    },
+    {
+      "epoch": 0.02184,
+      "grad_norm": 0.6992146186501603,
+      "learning_rate": 0.003,
+      "loss": 4.1762,
+      "step": 2184
+    },
+    {
+      "epoch": 0.02185,
+      "grad_norm": 0.6859058633396239,
+      "learning_rate": 0.003,
+      "loss": 4.1699,
+      "step": 2185
+    },
+    {
+      "epoch": 0.02186,
+      "grad_norm": 0.671595445918346,
+      "learning_rate": 0.003,
+      "loss": 4.186,
+      "step": 2186
+    },
+    {
+      "epoch": 0.02187,
+      "grad_norm": 0.6535925166183008,
+      "learning_rate": 0.003,
+      "loss": 4.1812,
+      "step": 2187
+    },
+    {
+      "epoch": 0.02188,
+      "grad_norm": 0.6198661745230879,
+      "learning_rate": 0.003,
+      "loss": 4.1584,
+      "step": 2188
+    },
+    {
+      "epoch": 0.02189,
+      "grad_norm": 0.5776255514929863,
+      "learning_rate": 0.003,
+      "loss": 4.1869,
+      "step": 2189
+    },
+    {
+      "epoch": 0.0219,
+      "grad_norm": 0.7433015793364336,
+      "learning_rate": 0.003,
+      "loss": 4.1787,
+      "step": 2190
+    },
+    {
+      "epoch": 0.02191,
+      "grad_norm": 0.8514656528184604,
+      "learning_rate": 0.003,
+      "loss": 4.1806,
+      "step": 2191
+    },
+    {
+      "epoch": 0.02192,
+      "grad_norm": 0.9191452260055095,
+      "learning_rate": 0.003,
+      "loss": 4.2122,
+      "step": 2192
+    },
+    {
+      "epoch": 0.02193,
+      "grad_norm": 0.7405436618584794,
+      "learning_rate": 0.003,
+      "loss": 4.1726,
+      "step": 2193
+    },
+    {
+      "epoch": 0.02194,
+      "grad_norm": 0.7140573914846811,
+      "learning_rate": 0.003,
+      "loss": 4.2215,
+      "step": 2194
+    },
+    {
+      "epoch": 0.02195,
+      "grad_norm": 0.7595143608079454,
+      "learning_rate": 0.003,
+      "loss": 4.1859,
+      "step": 2195
+    },
+    {
+      "epoch": 0.02196,
+      "grad_norm": 0.7549194736868536,
+      "learning_rate": 0.003,
+      "loss": 4.1951,
+      "step": 2196
+    },
+    {
+      "epoch": 0.02197,
+      "grad_norm": 0.6975555957838411,
+      "learning_rate": 0.003,
+      "loss": 4.1831,
+      "step": 2197
+    },
+    {
+      "epoch": 0.02198,
+      "grad_norm": 0.7595090640426224,
+      "learning_rate": 0.003,
+      "loss": 4.1756,
+      "step": 2198
+    },
+    {
+      "epoch": 0.02199,
+      "grad_norm": 0.7642539057941399,
+      "learning_rate": 0.003,
+      "loss": 4.145,
+      "step": 2199
+    },
+    {
+      "epoch": 0.022,
+      "grad_norm": 0.7482736596797179,
+      "learning_rate": 0.003,
+      "loss": 4.1698,
+      "step": 2200
+    },
+    {
+      "epoch": 0.02201,
+      "grad_norm": 0.6545018698949331,
+      "learning_rate": 0.003,
+      "loss": 4.1824,
+      "step": 2201
+    },
+    {
+      "epoch": 0.02202,
+      "grad_norm": 0.6269777415335018,
+      "learning_rate": 0.003,
+      "loss": 4.1683,
+      "step": 2202
+    },
+    {
+      "epoch": 0.02203,
+      "grad_norm": 0.7202806939294419,
+      "learning_rate": 0.003,
+      "loss": 4.1965,
+      "step": 2203
+    },
+    {
+      "epoch": 0.02204,
+      "grad_norm": 0.7354393023905447,
+      "learning_rate": 0.003,
+      "loss": 4.1919,
+      "step": 2204
+    },
+    {
+      "epoch": 0.02205,
+      "grad_norm": 0.6658405305239573,
+      "learning_rate": 0.003,
+      "loss": 4.1888,
+      "step": 2205
+    },
+    {
+      "epoch": 0.02206,
+      "grad_norm": 0.5494433484311174,
+      "learning_rate": 0.003,
+      "loss": 4.1509,
+      "step": 2206
+    },
+    {
+      "epoch": 0.02207,
+      "grad_norm": 0.48191881483377974,
+      "learning_rate": 0.003,
+      "loss": 4.1617,
+      "step": 2207
+    },
+    {
+      "epoch": 0.02208,
+      "grad_norm": 0.5055031144775365,
+      "learning_rate": 0.003,
+      "loss": 4.1419,
+      "step": 2208
+    },
+    {
+      "epoch": 0.02209,
+      "grad_norm": 0.5328357421096614,
+      "learning_rate": 0.003,
+      "loss": 4.1587,
+      "step": 2209
+    },
+    {
+      "epoch": 0.0221,
+      "grad_norm": 0.580303567033314,
+      "learning_rate": 0.003,
+      "loss": 4.1426,
+      "step": 2210
+    },
+    {
+      "epoch": 0.02211,
+      "grad_norm": 0.627373370176424,
+      "learning_rate": 0.003,
+      "loss": 4.1541,
+      "step": 2211
+    },
+    {
+      "epoch": 0.02212,
+      "grad_norm": 0.8017723022310775,
+      "learning_rate": 0.003,
+      "loss": 4.1578,
+      "step": 2212
+    },
+    {
+      "epoch": 0.02213,
+      "grad_norm": 0.9755586674162295,
+      "learning_rate": 0.003,
+      "loss": 4.1762,
+      "step": 2213
+    },
+    {
+      "epoch": 0.02214,
+      "grad_norm": 0.8888070506530766,
+      "learning_rate": 0.003,
+      "loss": 4.1705,
+      "step": 2214
+    },
+    {
+      "epoch": 0.02215,
+      "grad_norm": 0.8341073466106758,
+      "learning_rate": 0.003,
+      "loss": 4.173,
+      "step": 2215
+    },
+    {
+      "epoch": 0.02216,
+      "grad_norm": 0.8721558185443227,
+      "learning_rate": 0.003,
+      "loss": 4.1552,
+      "step": 2216
+    },
+    {
+      "epoch": 0.02217,
+      "grad_norm": 0.9058617906836595,
+      "learning_rate": 0.003,
+      "loss": 4.1535,
+      "step": 2217
+    },
+    {
+      "epoch": 0.02218,
+      "grad_norm": 0.8239137730922835,
+      "learning_rate": 0.003,
+      "loss": 4.1865,
+      "step": 2218
+    },
+    {
+      "epoch": 0.02219,
+      "grad_norm": 0.7662188590247174,
+      "learning_rate": 0.003,
+      "loss": 4.1744,
+      "step": 2219
+    },
+    {
+      "epoch": 0.0222,
+      "grad_norm": 0.7276128620873287,
+      "learning_rate": 0.003,
+      "loss": 4.1721,
+      "step": 2220
+    },
+    {
+      "epoch": 0.02221,
+      "grad_norm": 0.8153838699769882,
+      "learning_rate": 0.003,
+      "loss": 4.1618,
+      "step": 2221
+    },
+    {
+      "epoch": 0.02222,
+      "grad_norm": 0.9239982401134911,
+      "learning_rate": 0.003,
+      "loss": 4.1621,
+      "step": 2222
+    },
+    {
+      "epoch": 0.02223,
+      "grad_norm": 0.9248509160902595,
+      "learning_rate": 0.003,
+      "loss": 4.1826,
+      "step": 2223
+    },
+    {
+      "epoch": 0.02224,
+      "grad_norm": 0.8778359853731296,
+      "learning_rate": 0.003,
+      "loss": 4.1633,
+      "step": 2224
+    },
+    {
+      "epoch": 0.02225,
+      "grad_norm": 1.1133790365277945,
+      "learning_rate": 0.003,
+      "loss": 4.1791,
+      "step": 2225
+    },
+    {
+      "epoch": 0.02226,
+      "grad_norm": 0.9357923626997176,
+      "learning_rate": 0.003,
+      "loss": 4.1895,
+      "step": 2226
+    },
+    {
+      "epoch": 0.02227,
+      "grad_norm": 0.8646361690518631,
+      "learning_rate": 0.003,
+      "loss": 4.183,
+      "step": 2227
+    },
+    {
+      "epoch": 0.02228,
+      "grad_norm": 0.938807129740724,
+      "learning_rate": 0.003,
+      "loss": 4.1718,
+      "step": 2228
+    },
+    {
+      "epoch": 0.02229,
+      "grad_norm": 0.8004751977002076,
+      "learning_rate": 0.003,
+      "loss": 4.149,
+      "step": 2229
+    },
+    {
+      "epoch": 0.0223,
+      "grad_norm": 0.7882101721343657,
+      "learning_rate": 0.003,
+      "loss": 4.1858,
+      "step": 2230
+    },
+    {
+      "epoch": 0.02231,
+      "grad_norm": 0.7274707205896823,
+      "learning_rate": 0.003,
+      "loss": 4.1896,
+      "step": 2231
+    },
+    {
+      "epoch": 0.02232,
+      "grad_norm": 0.8291412822325788,
+      "learning_rate": 0.003,
+      "loss": 4.2,
+      "step": 2232
+    },
+    {
+      "epoch": 0.02233,
+      "grad_norm": 0.8063025430085007,
+      "learning_rate": 0.003,
+      "loss": 4.166,
+      "step": 2233
+    },
+    {
+      "epoch": 0.02234,
+      "grad_norm": 0.6911537576664851,
+      "learning_rate": 0.003,
+      "loss": 4.1865,
+      "step": 2234
+    },
+    {
+      "epoch": 0.02235,
+      "grad_norm": 0.7448703016841659,
+      "learning_rate": 0.003,
+      "loss": 4.2113,
+      "step": 2235
+    },
+    {
+      "epoch": 0.02236,
+      "grad_norm": 0.8233234836151326,
+      "learning_rate": 0.003,
+      "loss": 4.1655,
+      "step": 2236
+    },
+    {
+      "epoch": 0.02237,
+      "grad_norm": 1.0093335974280213,
+      "learning_rate": 0.003,
+      "loss": 4.2052,
+      "step": 2237
+    },
+    {
+      "epoch": 0.02238,
+      "grad_norm": 1.1018651279861302,
+      "learning_rate": 0.003,
+      "loss": 4.196,
+      "step": 2238
+    },
+    {
+      "epoch": 0.02239,
+      "grad_norm": 0.7048103914820842,
+      "learning_rate": 0.003,
+      "loss": 4.1668,
+      "step": 2239
+    },
+    {
+      "epoch": 0.0224,
+      "grad_norm": 0.6705866907364436,
+      "learning_rate": 0.003,
+      "loss": 4.1477,
+      "step": 2240
+    },
+    {
+      "epoch": 0.02241,
+      "grad_norm": 0.768034661346772,
+      "learning_rate": 0.003,
+      "loss": 4.1807,
+      "step": 2241
+    },
+    {
+      "epoch": 0.02242,
+      "grad_norm": 0.7013024117659296,
+      "learning_rate": 0.003,
+      "loss": 4.1591,
+      "step": 2242
+    },
+    {
+      "epoch": 0.02243,
+      "grad_norm": 0.8407412837981002,
+      "learning_rate": 0.003,
+      "loss": 4.1937,
+      "step": 2243
+    },
+    {
+      "epoch": 0.02244,
+      "grad_norm": 0.8584973105281422,
+      "learning_rate": 0.003,
+      "loss": 4.1731,
+      "step": 2244
+    },
+    {
+      "epoch": 0.02245,
+      "grad_norm": 0.810849020894934,
+      "learning_rate": 0.003,
+      "loss": 4.1578,
+      "step": 2245
+    },
+    {
+      "epoch": 0.02246,
+      "grad_norm": 0.7441445696162579,
+      "learning_rate": 0.003,
+      "loss": 4.191,
+      "step": 2246
+    },
+    {
+      "epoch": 0.02247,
+      "grad_norm": 0.719895099061681,
+      "learning_rate": 0.003,
+      "loss": 4.162,
+      "step": 2247
+    },
+    {
+      "epoch": 0.02248,
+      "grad_norm": 0.6276605133960077,
+      "learning_rate": 0.003,
+      "loss": 4.1652,
+      "step": 2248
+    },
+    {
+      "epoch": 0.02249,
+      "grad_norm": 0.5820105403717251,
+      "learning_rate": 0.003,
+      "loss": 4.1561,
+      "step": 2249
+    },
+    {
+      "epoch": 0.0225,
+      "grad_norm": 0.494797181910464,
+      "learning_rate": 0.003,
+      "loss": 4.1646,
+      "step": 2250
+    },
+    {
+      "epoch": 0.02251,
+      "grad_norm": 0.45799261155941245,
+      "learning_rate": 0.003,
+      "loss": 4.1881,
+      "step": 2251
+    },
+    {
+      "epoch": 0.02252,
+      "grad_norm": 0.4312165334190654,
+      "learning_rate": 0.003,
+      "loss": 4.1745,
+      "step": 2252
+    },
+    {
+      "epoch": 0.02253,
+      "grad_norm": 0.4089956454491447,
+      "learning_rate": 0.003,
+      "loss": 4.1547,
+      "step": 2253
+    },
+    {
+      "epoch": 0.02254,
+      "grad_norm": 0.41711525641558334,
+      "learning_rate": 0.003,
+      "loss": 4.1591,
+      "step": 2254
+    },
+    {
+      "epoch": 0.02255,
+      "grad_norm": 0.39887312531471364,
+      "learning_rate": 0.003,
+      "loss": 4.1419,
+      "step": 2255
+    },
+    {
+      "epoch": 0.02256,
+      "grad_norm": 0.44922284802782086,
+      "learning_rate": 0.003,
+      "loss": 4.1321,
+      "step": 2256
+    },
+    {
+      "epoch": 0.02257,
+      "grad_norm": 0.43881289908027915,
+      "learning_rate": 0.003,
+      "loss": 4.1476,
+      "step": 2257
+    },
+    {
+      "epoch": 0.02258,
+      "grad_norm": 0.42711858762354266,
+      "learning_rate": 0.003,
+      "loss": 4.1569,
+      "step": 2258
+    },
+    {
+      "epoch": 0.02259,
+      "grad_norm": 0.4761674249696505,
+      "learning_rate": 0.003,
+      "loss": 4.1182,
+      "step": 2259
+    },
+    {
+      "epoch": 0.0226,
+      "grad_norm": 0.6172223177216527,
+      "learning_rate": 0.003,
+      "loss": 4.1745,
+      "step": 2260
+    },
+    {
+      "epoch": 0.02261,
+      "grad_norm": 0.9969811836278653,
+      "learning_rate": 0.003,
+      "loss": 4.1626,
+      "step": 2261
+    },
+    {
+      "epoch": 0.02262,
+      "grad_norm": 1.4207495716773118,
+      "learning_rate": 0.003,
+      "loss": 4.1955,
+      "step": 2262
+    },
+    {
+      "epoch": 0.02263,
+      "grad_norm": 0.5107196284950156,
+      "learning_rate": 0.003,
+      "loss": 4.1749,
+      "step": 2263
+    },
+    {
+      "epoch": 0.02264,
+      "grad_norm": 0.8124315452603879,
+      "learning_rate": 0.003,
+      "loss": 4.1794,
+      "step": 2264
+    },
+    {
+      "epoch": 0.02265,
+      "grad_norm": 0.7127120804095198,
+      "learning_rate": 0.003,
+      "loss": 4.1342,
+      "step": 2265
+    },
+    {
+      "epoch": 0.02266,
+      "grad_norm": 0.5120142831669139,
+      "learning_rate": 0.003,
+      "loss": 4.142,
+      "step": 2266
+    },
+    {
+      "epoch": 0.02267,
+      "grad_norm": 0.842715736214197,
+      "learning_rate": 0.003,
+      "loss": 4.1775,
+      "step": 2267
+    },
+    {
+      "epoch": 0.02268,
+      "grad_norm": 0.8370937405428989,
+      "learning_rate": 0.003,
+      "loss": 4.1593,
+      "step": 2268
+    },
+    {
+      "epoch": 0.02269,
+      "grad_norm": 0.7173617092756458,
+      "learning_rate": 0.003,
+      "loss": 4.174,
+      "step": 2269
+    },
+    {
+      "epoch": 0.0227,
+      "grad_norm": 0.8926880986379259,
+      "learning_rate": 0.003,
+      "loss": 4.1781,
+      "step": 2270
+    },
+    {
+      "epoch": 0.02271,
+      "grad_norm": 0.9394293424718215,
+      "learning_rate": 0.003,
+      "loss": 4.1873,
+      "step": 2271
+    },
+    {
+      "epoch": 0.02272,
+      "grad_norm": 0.9564097717489924,
+      "learning_rate": 0.003,
+      "loss": 4.1878,
+      "step": 2272
+    },
+    {
+      "epoch": 0.02273,
+      "grad_norm": 1.2112473190186792,
+      "learning_rate": 0.003,
+      "loss": 4.1998,
+      "step": 2273
+    },
+    {
+      "epoch": 0.02274,
+      "grad_norm": 0.9744415053378092,
+      "learning_rate": 0.003,
+      "loss": 4.1786,
+      "step": 2274
+    },
+    {
+      "epoch": 0.02275,
+      "grad_norm": 1.0078841940550498,
+      "learning_rate": 0.003,
+      "loss": 4.1796,
+      "step": 2275
+    },
+    {
+      "epoch": 0.02276,
+      "grad_norm": 0.8300538512961302,
+      "learning_rate": 0.003,
+      "loss": 4.1696,
+      "step": 2276
+    },
+    {
+      "epoch": 0.02277,
+      "grad_norm": 0.6643966250880426,
+      "learning_rate": 0.003,
+      "loss": 4.1719,
+      "step": 2277
+    },
+    {
+      "epoch": 0.02278,
+      "grad_norm": 0.6190627183036216,
+      "learning_rate": 0.003,
+      "loss": 4.1782,
+      "step": 2278
+    },
+    {
+      "epoch": 0.02279,
+      "grad_norm": 0.551131169249495,
+      "learning_rate": 0.003,
+      "loss": 4.1827,
+      "step": 2279
+    },
+    {
+      "epoch": 0.0228,
+      "grad_norm": 0.5572806608313633,
+      "learning_rate": 0.003,
+      "loss": 4.173,
+      "step": 2280
+    },
+    {
+      "epoch": 0.02281,
+      "grad_norm": 0.630640256246517,
+      "learning_rate": 0.003,
+      "loss": 4.1688,
+      "step": 2281
+    },
+    {
+      "epoch": 0.02282,
+      "grad_norm": 0.667161853354935,
+      "learning_rate": 0.003,
+      "loss": 4.181,
+      "step": 2282
+    },
+    {
+      "epoch": 0.02283,
+      "grad_norm": 0.7804313255347314,
+      "learning_rate": 0.003,
+      "loss": 4.1982,
+      "step": 2283
+    },
+    {
+      "epoch": 0.02284,
+      "grad_norm": 0.8174062173017085,
+      "learning_rate": 0.003,
+      "loss": 4.1998,
+      "step": 2284
+    },
+    {
+      "epoch": 0.02285,
+      "grad_norm": 0.8933970302021632,
+      "learning_rate": 0.003,
+      "loss": 4.147,
+      "step": 2285
+    },
+    {
+      "epoch": 0.02286,
+      "grad_norm": 0.8583328769415308,
+      "learning_rate": 0.003,
+      "loss": 4.2039,
+      "step": 2286
+    },
+    {
+      "epoch": 0.02287,
+      "grad_norm": 0.8734810759184407,
+      "learning_rate": 0.003,
+      "loss": 4.1855,
+      "step": 2287
+    },
+    {
+      "epoch": 0.02288,
+      "grad_norm": 0.8928624064077537,
+      "learning_rate": 0.003,
+      "loss": 4.2123,
+      "step": 2288
+    },
+    {
+      "epoch": 0.02289,
+      "grad_norm": 0.783879288737337,
+      "learning_rate": 0.003,
+      "loss": 4.2056,
+      "step": 2289
+    },
+    {
+      "epoch": 0.0229,
+      "grad_norm": 0.7701279568768081,
+      "learning_rate": 0.003,
+      "loss": 4.1809,
+      "step": 2290
+    },
+    {
+      "epoch": 0.02291,
+      "grad_norm": 0.7353904143222354,
+      "learning_rate": 0.003,
+      "loss": 4.1585,
+      "step": 2291
+    },
+    {
+      "epoch": 0.02292,
+      "grad_norm": 0.5673182764651727,
+      "learning_rate": 0.003,
+      "loss": 4.1467,
+      "step": 2292
+    },
+    {
+      "epoch": 0.02293,
+      "grad_norm": 0.6211445071442583,
+      "learning_rate": 0.003,
+      "loss": 4.1508,
+      "step": 2293
+    },
+    {
+      "epoch": 0.02294,
+      "grad_norm": 0.5897432424358218,
+      "learning_rate": 0.003,
+      "loss": 4.1497,
+      "step": 2294
+    },
+    {
+      "epoch": 0.02295,
+      "grad_norm": 0.6701151196414159,
+      "learning_rate": 0.003,
+      "loss": 4.147,
+      "step": 2295
+    },
+    {
+      "epoch": 0.02296,
+      "grad_norm": 1.0101216354387503,
+      "learning_rate": 0.003,
+      "loss": 4.1897,
+      "step": 2296
+    },
+    {
+      "epoch": 0.02297,
+      "grad_norm": 1.2406081319623274,
+      "learning_rate": 0.003,
+      "loss": 4.1822,
+      "step": 2297
+    },
+    {
+      "epoch": 0.02298,
+      "grad_norm": 0.5988120231143333,
+      "learning_rate": 0.003,
+      "loss": 4.1702,
+      "step": 2298
+    },
+    {
+      "epoch": 0.02299,
+      "grad_norm": 0.7795860072160454,
+      "learning_rate": 0.003,
+      "loss": 4.1804,
+      "step": 2299
+    },
+    {
+      "epoch": 0.023,
+      "grad_norm": 0.784959258488215,
+      "learning_rate": 0.003,
+      "loss": 4.1984,
+      "step": 2300
+    },
+    {
+      "epoch": 0.02301,
+      "grad_norm": 0.5879362613709453,
+      "learning_rate": 0.003,
+      "loss": 4.157,
+      "step": 2301
+    },
+    {
+      "epoch": 0.02302,
+      "grad_norm": 0.7106606864619079,
+      "learning_rate": 0.003,
+      "loss": 4.1649,
+      "step": 2302
+    },
+    {
+      "epoch": 0.02303,
+      "grad_norm": 0.7402995000062723,
+      "learning_rate": 0.003,
+      "loss": 4.1545,
+      "step": 2303
+    },
+    {
+      "epoch": 0.02304,
+      "grad_norm": 0.8291546359347778,
+      "learning_rate": 0.003,
+      "loss": 4.1898,
+      "step": 2304
+    },
+    {
+      "epoch": 0.02305,
+      "grad_norm": 0.753148855618092,
+      "learning_rate": 0.003,
+      "loss": 4.1615,
+      "step": 2305
+    },
+    {
+      "epoch": 0.02306,
+      "grad_norm": 0.8869246970763861,
+      "learning_rate": 0.003,
+      "loss": 4.1444,
+      "step": 2306
+    },
+    {
+      "epoch": 0.02307,
+      "grad_norm": 0.8925578818662347,
+      "learning_rate": 0.003,
+      "loss": 4.1758,
+      "step": 2307
+    },
+    {
+      "epoch": 0.02308,
+      "grad_norm": 0.9066759173397617,
+      "learning_rate": 0.003,
+      "loss": 4.1762,
+      "step": 2308
+    },
+    {
+      "epoch": 0.02309,
+      "grad_norm": 0.8201889707383639,
+      "learning_rate": 0.003,
+      "loss": 4.1952,
+      "step": 2309
+    },
+    {
+      "epoch": 0.0231,
+      "grad_norm": 0.7904644702923331,
+      "learning_rate": 0.003,
+      "loss": 4.1751,
+      "step": 2310
+    },
+    {
+      "epoch": 0.02311,
+      "grad_norm": 0.7060518287169474,
+      "learning_rate": 0.003,
+      "loss": 4.1776,
+      "step": 2311
+    },
+    {
+      "epoch": 0.02312,
+      "grad_norm": 0.7392874926033767,
+      "learning_rate": 0.003,
+      "loss": 4.1519,
+      "step": 2312
+    },
+    {
+      "epoch": 0.02313,
+      "grad_norm": 0.8538999440765905,
+      "learning_rate": 0.003,
+      "loss": 4.1529,
+      "step": 2313
+    },
+    {
+      "epoch": 0.02314,
+      "grad_norm": 0.9312222422709578,
+      "learning_rate": 0.003,
+      "loss": 4.1718,
+      "step": 2314
+    },
+    {
+      "epoch": 0.02315,
+      "grad_norm": 0.9687214234200097,
+      "learning_rate": 0.003,
+      "loss": 4.1771,
+      "step": 2315
+    },
+    {
+      "epoch": 0.02316,
+      "grad_norm": 0.891697328396861,
+      "learning_rate": 0.003,
+      "loss": 4.1909,
+      "step": 2316
+    },
+    {
+      "epoch": 0.02317,
+      "grad_norm": 0.897934373295407,
+      "learning_rate": 0.003,
+      "loss": 4.1753,
+      "step": 2317
+    },
+    {
+      "epoch": 0.02318,
+      "grad_norm": 0.8051331418091766,
+      "learning_rate": 0.003,
+      "loss": 4.1614,
+      "step": 2318
+    },
+    {
+      "epoch": 0.02319,
+      "grad_norm": 0.703806089932542,
+      "learning_rate": 0.003,
+      "loss": 4.1804,
+      "step": 2319
+    },
+    {
+      "epoch": 0.0232,
+      "grad_norm": 0.7388332818016227,
+      "learning_rate": 0.003,
+      "loss": 4.1898,
+      "step": 2320
+    },
+    {
+      "epoch": 0.02321,
+      "grad_norm": 0.7450344205679207,
+      "learning_rate": 0.003,
+      "loss": 4.1692,
+      "step": 2321
+    },
+    {
+      "epoch": 0.02322,
+      "grad_norm": 0.6660500555618327,
+      "learning_rate": 0.003,
+      "loss": 4.1746,
+      "step": 2322
+    },
+    {
+      "epoch": 0.02323,
+      "grad_norm": 0.6571661630697367,
+      "learning_rate": 0.003,
+      "loss": 4.1481,
+      "step": 2323
+    },
+    {
+      "epoch": 0.02324,
+      "grad_norm": 0.6161646162269099,
+      "learning_rate": 0.003,
+      "loss": 4.1462,
+      "step": 2324
+    },
+    {
+      "epoch": 0.02325,
+      "grad_norm": 0.5919512249258755,
+      "learning_rate": 0.003,
+      "loss": 4.1679,
+      "step": 2325
+    },
+    {
+      "epoch": 0.02326,
+      "grad_norm": 0.7224558633075708,
+      "learning_rate": 0.003,
+      "loss": 4.1831,
+      "step": 2326
+    },
+    {
+      "epoch": 0.02327,
+      "grad_norm": 0.8816178665411202,
+      "learning_rate": 0.003,
+      "loss": 4.1759,
+      "step": 2327
+    },
+    {
+      "epoch": 0.02328,
+      "grad_norm": 0.8699762426391761,
+      "learning_rate": 0.003,
+      "loss": 4.176,
+      "step": 2328
+    },
+    {
+      "epoch": 0.02329,
+      "grad_norm": 0.723960245924297,
+      "learning_rate": 0.003,
+      "loss": 4.1606,
+      "step": 2329
+    },
+    {
+      "epoch": 0.0233,
+      "grad_norm": 0.7628023637683098,
+      "learning_rate": 0.003,
+      "loss": 4.1563,
+      "step": 2330
+    },
+    {
+      "epoch": 0.02331,
+      "grad_norm": 0.7695226471244487,
+      "learning_rate": 0.003,
+      "loss": 4.1933,
+      "step": 2331
+    },
+    {
+      "epoch": 0.02332,
+      "grad_norm": 0.7712606828136085,
+      "learning_rate": 0.003,
+      "loss": 4.1567,
+      "step": 2332
+    },
+    {
+      "epoch": 0.02333,
+      "grad_norm": 0.7181228868994012,
+      "learning_rate": 0.003,
+      "loss": 4.1639,
+      "step": 2333
+    },
+    {
+      "epoch": 0.02334,
+      "grad_norm": 0.6385757918578437,
+      "learning_rate": 0.003,
+      "loss": 4.1304,
+      "step": 2334
+    },
+    {
+      "epoch": 0.02335,
+      "grad_norm": 0.5804913608133354,
+      "learning_rate": 0.003,
+      "loss": 4.1671,
+      "step": 2335
+    },
+    {
+      "epoch": 0.02336,
+      "grad_norm": 0.6406948035907543,
+      "learning_rate": 0.003,
+      "loss": 4.1601,
+      "step": 2336
+    },
+    {
+      "epoch": 0.02337,
+      "grad_norm": 0.7550291811921531,
+      "learning_rate": 0.003,
+      "loss": 4.1668,
+      "step": 2337
+    },
+    {
+      "epoch": 0.02338,
+      "grad_norm": 0.867264165349628,
+      "learning_rate": 0.003,
+      "loss": 4.1574,
+      "step": 2338
+    },
+    {
+      "epoch": 0.02339,
+      "grad_norm": 0.9370869698358649,
+      "learning_rate": 0.003,
+      "loss": 4.1596,
+      "step": 2339
+    },
+    {
+      "epoch": 0.0234,
+      "grad_norm": 1.04114552861465,
+      "learning_rate": 0.003,
+      "loss": 4.1762,
+      "step": 2340
+    },
+    {
+      "epoch": 0.02341,
+      "grad_norm": 0.8419443125713769,
+      "learning_rate": 0.003,
+      "loss": 4.1461,
+      "step": 2341
+    },
+    {
+      "epoch": 0.02342,
+      "grad_norm": 0.7516609856190137,
+      "learning_rate": 0.003,
+      "loss": 4.1608,
+      "step": 2342
+    },
+    {
+      "epoch": 0.02343,
+      "grad_norm": 0.7840218113502367,
+      "learning_rate": 0.003,
+      "loss": 4.1597,
+      "step": 2343
+    },
+    {
+      "epoch": 0.02344,
+      "grad_norm": 0.7106909207560747,
+      "learning_rate": 0.003,
+      "loss": 4.1579,
+      "step": 2344
+    },
+    {
+      "epoch": 0.02345,
+      "grad_norm": 0.695743479378358,
+      "learning_rate": 0.003,
+      "loss": 4.1505,
+      "step": 2345
+    },
+    {
+      "epoch": 0.02346,
+      "grad_norm": 0.7618556711999727,
+      "learning_rate": 0.003,
+      "loss": 4.1514,
+      "step": 2346
+    },
+    {
+      "epoch": 0.02347,
+      "grad_norm": 0.8494779389825688,
+      "learning_rate": 0.003,
+      "loss": 4.1923,
+      "step": 2347
+    },
+    {
+      "epoch": 0.02348,
+      "grad_norm": 0.8819304188600076,
+      "learning_rate": 0.003,
+      "loss": 4.1614,
+      "step": 2348
+    },
+    {
+      "epoch": 0.02349,
+      "grad_norm": 0.801874189029763,
+      "learning_rate": 0.003,
+      "loss": 4.1729,
+      "step": 2349
+    },
+    {
+      "epoch": 0.0235,
+      "grad_norm": 0.7502001194200574,
+      "learning_rate": 0.003,
+      "loss": 4.168,
+      "step": 2350
+    },
+    {
+      "epoch": 0.02351,
+      "grad_norm": 0.7987799293083814,
+      "learning_rate": 0.003,
+      "loss": 4.1781,
+      "step": 2351
+    },
+    {
+      "epoch": 0.02352,
+      "grad_norm": 0.9306705411998242,
+      "learning_rate": 0.003,
+      "loss": 4.142,
+      "step": 2352
+    },
+    {
+      "epoch": 0.02353,
+      "grad_norm": 0.8564501775467426,
+      "learning_rate": 0.003,
+      "loss": 4.1759,
+      "step": 2353
+    },
+    {
+      "epoch": 0.02354,
+      "grad_norm": 0.8345109850380096,
+      "learning_rate": 0.003,
+      "loss": 4.1813,
+      "step": 2354
+    },
+    {
+      "epoch": 0.02355,
+      "grad_norm": 0.7887172745864481,
+      "learning_rate": 0.003,
+      "loss": 4.1723,
+      "step": 2355
+    },
+    {
+      "epoch": 0.02356,
+      "grad_norm": 0.6582873563724112,
+      "learning_rate": 0.003,
+      "loss": 4.1515,
+      "step": 2356
+    },
+    {
+      "epoch": 0.02357,
+      "grad_norm": 0.6737457349000578,
+      "learning_rate": 0.003,
+      "loss": 4.1517,
+      "step": 2357
+    },
+    {
+      "epoch": 0.02358,
+      "grad_norm": 0.7705872627309706,
+      "learning_rate": 0.003,
+      "loss": 4.1742,
+      "step": 2358
+    },
+    {
+      "epoch": 0.02359,
+      "grad_norm": 0.8542556066081018,
+      "learning_rate": 0.003,
+      "loss": 4.1536,
+      "step": 2359
+    },
+    {
+      "epoch": 0.0236,
+      "grad_norm": 0.9842191585501165,
+      "learning_rate": 0.003,
+      "loss": 4.2032,
+      "step": 2360
+    },
+    {
+      "epoch": 0.02361,
+      "grad_norm": 0.9112511096880844,
+      "learning_rate": 0.003,
+      "loss": 4.1793,
+      "step": 2361
+    },
+    {
+      "epoch": 0.02362,
+      "grad_norm": 0.9278874197656558,
+      "learning_rate": 0.003,
+      "loss": 4.1712,
+      "step": 2362
+    },
+    {
+      "epoch": 0.02363,
+      "grad_norm": 0.7482616982968588,
+      "learning_rate": 0.003,
+      "loss": 4.1444,
+      "step": 2363
+    },
+    {
+      "epoch": 0.02364,
+      "grad_norm": 0.5981870727991454,
+      "learning_rate": 0.003,
+      "loss": 4.1597,
+      "step": 2364
+    },
+    {
+      "epoch": 0.02365,
+      "grad_norm": 0.6120042832874062,
+      "learning_rate": 0.003,
+      "loss": 4.1788,
+      "step": 2365
+    },
+    {
+      "epoch": 0.02366,
+      "grad_norm": 0.592965390221233,
+      "learning_rate": 0.003,
+      "loss": 4.1489,
+      "step": 2366
+    },
+    {
+      "epoch": 0.02367,
+      "grad_norm": 0.5967931209483315,
+      "learning_rate": 0.003,
+      "loss": 4.1579,
+      "step": 2367
+    },
+    {
+      "epoch": 0.02368,
+      "grad_norm": 0.6289890460021353,
+      "learning_rate": 0.003,
+      "loss": 4.1346,
+      "step": 2368
+    },
+    {
+      "epoch": 0.02369,
+      "grad_norm": 0.6580991252391354,
+      "learning_rate": 0.003,
+      "loss": 4.1441,
+      "step": 2369
+    },
+    {
+      "epoch": 0.0237,
+      "grad_norm": 0.6945790129958274,
+      "learning_rate": 0.003,
+      "loss": 4.1741,
+      "step": 2370
+    },
+    {
+      "epoch": 0.02371,
+      "grad_norm": 0.7107030962570803,
+      "learning_rate": 0.003,
+      "loss": 4.1569,
+      "step": 2371
+    },
+    {
+      "epoch": 0.02372,
+      "grad_norm": 0.8059296050081108,
+      "learning_rate": 0.003,
+      "loss": 4.149,
+      "step": 2372
+    },
+    {
+      "epoch": 0.02373,
+      "grad_norm": 0.921257026362051,
+      "learning_rate": 0.003,
+      "loss": 4.1414,
+      "step": 2373
+    },
+    {
+      "epoch": 0.02374,
+      "grad_norm": 0.955728704961619,
+      "learning_rate": 0.003,
+      "loss": 4.1544,
+      "step": 2374
+    },
+    {
+      "epoch": 0.02375,
+      "grad_norm": 0.7504286137015724,
+      "learning_rate": 0.003,
+      "loss": 4.15,
+      "step": 2375
+    },
+    {
+      "epoch": 0.02376,
+      "grad_norm": 0.7478781963883802,
+      "learning_rate": 0.003,
+      "loss": 4.1724,
+      "step": 2376
+    },
+    {
+      "epoch": 0.02377,
+      "grad_norm": 0.917428121872858,
+      "learning_rate": 0.003,
+      "loss": 4.1486,
+      "step": 2377
+    },
+    {
+      "epoch": 0.02378,
+      "grad_norm": 0.7822980127349399,
+      "learning_rate": 0.003,
+      "loss": 4.1546,
+      "step": 2378
+    },
+    {
+      "epoch": 0.02379,
+      "grad_norm": 0.5863463150077827,
+      "learning_rate": 0.003,
+      "loss": 4.136,
+      "step": 2379
+    },
+    {
+      "epoch": 0.0238,
+      "grad_norm": 0.5340016137538751,
+      "learning_rate": 0.003,
+      "loss": 4.109,
+      "step": 2380
+    },
+    {
+      "epoch": 0.02381,
+      "grad_norm": 0.6128857312553131,
+      "learning_rate": 0.003,
+      "loss": 4.1229,
+      "step": 2381
+    },
+    {
+      "epoch": 0.02382,
+      "grad_norm": 0.5658688824938058,
+      "learning_rate": 0.003,
+      "loss": 4.1779,
+      "step": 2382
+    },
+    {
+      "epoch": 0.02383,
+      "grad_norm": 0.5676379045904221,
+      "learning_rate": 0.003,
+      "loss": 4.1407,
+      "step": 2383
+    },
+    {
+      "epoch": 0.02384,
+      "grad_norm": 0.5238096521489247,
+      "learning_rate": 0.003,
+      "loss": 4.1258,
+      "step": 2384
+    },
+    {
+      "epoch": 0.02385,
+      "grad_norm": 0.5152751390365298,
+      "learning_rate": 0.003,
+      "loss": 4.1501,
+      "step": 2385
+    },
+    {
+      "epoch": 0.02386,
+      "grad_norm": 0.6091514932256747,
+      "learning_rate": 0.003,
+      "loss": 4.13,
+      "step": 2386
+    },
+    {
+      "epoch": 0.02387,
+      "grad_norm": 0.7793599922586826,
+      "learning_rate": 0.003,
+      "loss": 4.1323,
+      "step": 2387
+    },
+    {
+      "epoch": 0.02388,
+      "grad_norm": 0.9352617435026656,
+      "learning_rate": 0.003,
+      "loss": 4.1923,
+      "step": 2388
+    },
+    {
+      "epoch": 0.02389,
+      "grad_norm": 0.9609729763435508,
+      "learning_rate": 0.003,
+      "loss": 4.1586,
+      "step": 2389
+    },
+    {
+      "epoch": 0.0239,
+      "grad_norm": 0.8094018370976185,
+      "learning_rate": 0.003,
+      "loss": 4.1475,
+      "step": 2390
+    },
+    {
+      "epoch": 0.02391,
+      "grad_norm": 0.8506283143692317,
+      "learning_rate": 0.003,
+      "loss": 4.1734,
+      "step": 2391
+    },
+    {
+      "epoch": 0.02392,
+      "grad_norm": 0.9050332115497783,
+      "learning_rate": 0.003,
+      "loss": 4.1803,
+      "step": 2392
+    },
+    {
+      "epoch": 0.02393,
+      "grad_norm": 0.9634890728744777,
+      "learning_rate": 0.003,
+      "loss": 4.1413,
+      "step": 2393
+    },
+    {
+      "epoch": 0.02394,
+      "grad_norm": 0.8882373157340075,
+      "learning_rate": 0.003,
+      "loss": 4.1569,
+      "step": 2394
+    },
+    {
+      "epoch": 0.02395,
+      "grad_norm": 0.7565332153535935,
+      "learning_rate": 0.003,
+      "loss": 4.1675,
+      "step": 2395
+    },
+    {
+      "epoch": 0.02396,
+      "grad_norm": 0.8603458435447788,
+      "learning_rate": 0.003,
+      "loss": 4.1849,
+      "step": 2396
+    },
+    {
+      "epoch": 0.02397,
+      "grad_norm": 0.8261263361309694,
+      "learning_rate": 0.003,
+      "loss": 4.169,
+      "step": 2397
+    },
+    {
+      "epoch": 0.02398,
+      "grad_norm": 0.7794743587492478,
+      "learning_rate": 0.003,
+      "loss": 4.1744,
+      "step": 2398
+    },
+    {
+      "epoch": 0.02399,
+      "grad_norm": 0.8140982417839558,
+      "learning_rate": 0.003,
+      "loss": 4.1533,
+      "step": 2399
+    },
+    {
+      "epoch": 0.024,
+      "grad_norm": 0.9091203275980858,
+      "learning_rate": 0.003,
+      "loss": 4.1719,
+      "step": 2400
+    },
+    {
+      "epoch": 0.02401,
+      "grad_norm": 0.9082248326953591,
+      "learning_rate": 0.003,
+      "loss": 4.1471,
+      "step": 2401
+    },
+    {
+      "epoch": 0.02402,
+      "grad_norm": 0.8996640369589112,
+      "learning_rate": 0.003,
+      "loss": 4.1798,
+      "step": 2402
+    },
+    {
+      "epoch": 0.02403,
+      "grad_norm": 0.9493494458526733,
+      "learning_rate": 0.003,
+      "loss": 4.1514,
+      "step": 2403
+    },
+    {
+      "epoch": 0.02404,
+      "grad_norm": 0.9827368493949332,
+      "learning_rate": 0.003,
+      "loss": 4.1965,
+      "step": 2404
+    },
+    {
+      "epoch": 0.02405,
+      "grad_norm": 0.9551905621174913,
+      "learning_rate": 0.003,
+      "loss": 4.2021,
+      "step": 2405
+    },
+    {
+      "epoch": 0.02406,
+      "grad_norm": 0.9364752365255031,
+      "learning_rate": 0.003,
+      "loss": 4.1854,
+      "step": 2406
+    },
+    {
+      "epoch": 0.02407,
+      "grad_norm": 0.9463612550147267,
+      "learning_rate": 0.003,
+      "loss": 4.1641,
+      "step": 2407
+    },
+    {
+      "epoch": 0.02408,
+      "grad_norm": 0.8719994888631714,
+      "learning_rate": 0.003,
+      "loss": 4.1732,
+      "step": 2408
+    },
+    {
+      "epoch": 0.02409,
+      "grad_norm": 0.864310092393717,
+      "learning_rate": 0.003,
+      "loss": 4.1757,
+      "step": 2409
+    },
+    {
+      "epoch": 0.0241,
+      "grad_norm": 0.8973500607718927,
+      "learning_rate": 0.003,
+      "loss": 4.1883,
+      "step": 2410
+    },
+    {
+      "epoch": 0.02411,
+      "grad_norm": 1.1693499034787715,
+      "learning_rate": 0.003,
+      "loss": 4.1693,
+      "step": 2411
+    },
+    {
+      "epoch": 0.02412,
+      "grad_norm": 0.8431604098452435,
+      "learning_rate": 0.003,
+      "loss": 4.164,
+      "step": 2412
+    },
+    {
+      "epoch": 0.02413,
+      "grad_norm": 0.7767055362680262,
+      "learning_rate": 0.003,
+      "loss": 4.1597,
+      "step": 2413
+    },
+    {
+      "epoch": 0.02414,
+      "grad_norm": 0.8330306566206577,
+      "learning_rate": 0.003,
+      "loss": 4.1744,
+      "step": 2414
+    },
+    {
+      "epoch": 0.02415,
+      "grad_norm": 0.8217708926095696,
+      "learning_rate": 0.003,
+      "loss": 4.1765,
+      "step": 2415
+    },
+    {
+      "epoch": 0.02416,
+      "grad_norm": 0.8597994699370953,
+      "learning_rate": 0.003,
+      "loss": 4.1589,
+      "step": 2416
+    },
+    {
+      "epoch": 0.02417,
+      "grad_norm": 0.8398506547738631,
+      "learning_rate": 0.003,
+      "loss": 4.1829,
+      "step": 2417
+    },
+    {
+      "epoch": 0.02418,
+      "grad_norm": 0.7704254084685477,
+      "learning_rate": 0.003,
+      "loss": 4.1714,
+      "step": 2418
+    },
+    {
+      "epoch": 0.02419,
+      "grad_norm": 0.752617109928811,
+      "learning_rate": 0.003,
+      "loss": 4.1691,
+      "step": 2419
+    },
+    {
+      "epoch": 0.0242,
+      "grad_norm": 0.681614467806083,
+      "learning_rate": 0.003,
+      "loss": 4.1781,
+      "step": 2420
+    },
+    {
+      "epoch": 0.02421,
+      "grad_norm": 0.57806703718961,
+      "learning_rate": 0.003,
+      "loss": 4.1374,
+      "step": 2421
+    },
+    {
+      "epoch": 0.02422,
+      "grad_norm": 0.5368829917602504,
+      "learning_rate": 0.003,
+      "loss": 4.1904,
+      "step": 2422
+    },
+    {
+      "epoch": 0.02423,
+      "grad_norm": 0.49926017966447833,
+      "learning_rate": 0.003,
+      "loss": 4.1767,
+      "step": 2423
+    },
+    {
+      "epoch": 0.02424,
+      "grad_norm": 0.44538511553678933,
+      "learning_rate": 0.003,
+      "loss": 4.1709,
+      "step": 2424
+    },
+    {
+      "epoch": 0.02425,
+      "grad_norm": 0.4455623217366007,
+      "learning_rate": 0.003,
+      "loss": 4.1546,
+      "step": 2425
+    },
+    {
+      "epoch": 0.02426,
+      "grad_norm": 0.45034256565793024,
+      "learning_rate": 0.003,
+      "loss": 4.1608,
+      "step": 2426
+    },
+    {
+      "epoch": 0.02427,
+      "grad_norm": 0.4277494354911392,
+      "learning_rate": 0.003,
+      "loss": 4.1831,
+      "step": 2427
+    },
+    {
+      "epoch": 0.02428,
+      "grad_norm": 0.49536930851083055,
+      "learning_rate": 0.003,
+      "loss": 4.1561,
+      "step": 2428
+    },
+    {
+      "epoch": 0.02429,
+      "grad_norm": 0.5349623201813287,
+      "learning_rate": 0.003,
+      "loss": 4.1417,
+      "step": 2429
+    },
+    {
+      "epoch": 0.0243,
+      "grad_norm": 0.6385700511917229,
+      "learning_rate": 0.003,
+      "loss": 4.1681,
+      "step": 2430
+    },
+    {
+      "epoch": 0.02431,
+      "grad_norm": 0.7962007873904552,
+      "learning_rate": 0.003,
+      "loss": 4.1506,
+      "step": 2431
+    },
+    {
+      "epoch": 0.02432,
+      "grad_norm": 0.7552982964141086,
+      "learning_rate": 0.003,
+      "loss": 4.1735,
+      "step": 2432
+    },
+    {
+      "epoch": 0.02433,
+      "grad_norm": 0.5576758209292167,
+      "learning_rate": 0.003,
+      "loss": 4.179,
+      "step": 2433
+    },
+    {
+      "epoch": 0.02434,
+      "grad_norm": 0.5731742455027719,
+      "learning_rate": 0.003,
+      "loss": 4.1457,
+      "step": 2434
+    },
+    {
+      "epoch": 0.02435,
+      "grad_norm": 0.6782357026569522,
+      "learning_rate": 0.003,
+      "loss": 4.1482,
+      "step": 2435
+    },
+    {
+      "epoch": 0.02436,
+      "grad_norm": 0.8071855900106416,
+      "learning_rate": 0.003,
+      "loss": 4.1587,
+      "step": 2436
+    },
+    {
+      "epoch": 0.02437,
+      "grad_norm": 0.9265059909484438,
+      "learning_rate": 0.003,
+      "loss": 4.1467,
+      "step": 2437
+    },
+    {
+      "epoch": 0.02438,
+      "grad_norm": 0.8793214152171194,
+      "learning_rate": 0.003,
+      "loss": 4.1644,
+      "step": 2438
+    },
+    {
+      "epoch": 0.02439,
+      "grad_norm": 0.8832749976596599,
+      "learning_rate": 0.003,
+      "loss": 4.1384,
+      "step": 2439
+    },
+    {
+      "epoch": 0.0244,
+      "grad_norm": 0.9746984844718865,
+      "learning_rate": 0.003,
+      "loss": 4.1934,
+      "step": 2440
+    },
+    {
+      "epoch": 0.02441,
+      "grad_norm": 1.0708150303834312,
+      "learning_rate": 0.003,
+      "loss": 4.1618,
+      "step": 2441
+    },
+    {
+      "epoch": 0.02442,
+      "grad_norm": 1.040411975678819,
+      "learning_rate": 0.003,
+      "loss": 4.2014,
+      "step": 2442
+    },
+    {
+      "epoch": 0.02443,
+      "grad_norm": 1.157601705274513,
+      "learning_rate": 0.003,
+      "loss": 4.1413,
+      "step": 2443
+    },
+    {
+      "epoch": 0.02444,
+      "grad_norm": 0.8998517717034221,
+      "learning_rate": 0.003,
+      "loss": 4.1633,
+      "step": 2444
+    },
+    {
+      "epoch": 0.02445,
+      "grad_norm": 0.7914422604882547,
+      "learning_rate": 0.003,
+      "loss": 4.1691,
+      "step": 2445
+    },
+    {
+      "epoch": 0.02446,
+      "grad_norm": 0.765570533672039,
+      "learning_rate": 0.003,
+      "loss": 4.168,
+      "step": 2446
+    },
+    {
+      "epoch": 0.02447,
+      "grad_norm": 0.7780408804262848,
+      "learning_rate": 0.003,
+      "loss": 4.1745,
+      "step": 2447
+    },
+    {
+      "epoch": 0.02448,
+      "grad_norm": 0.8355663969723807,
+      "learning_rate": 0.003,
+      "loss": 4.1748,
+      "step": 2448
+    },
+    {
+      "epoch": 0.02449,
+      "grad_norm": 0.8911004412381984,
+      "learning_rate": 0.003,
+      "loss": 4.173,
+      "step": 2449
+    },
+    {
+      "epoch": 0.0245,
+      "grad_norm": 0.8721571131136453,
+      "learning_rate": 0.003,
+      "loss": 4.1764,
+      "step": 2450
+    },
+    {
+      "epoch": 0.02451,
+      "grad_norm": 0.8424855685228627,
+      "learning_rate": 0.003,
+      "loss": 4.1549,
+      "step": 2451
+    },
+    {
+      "epoch": 0.02452,
+      "grad_norm": 0.71343486564028,
+      "learning_rate": 0.003,
+      "loss": 4.1589,
+      "step": 2452
+    },
+    {
+      "epoch": 0.02453,
+      "grad_norm": 0.830794139281049,
+      "learning_rate": 0.003,
+      "loss": 4.1386,
+      "step": 2453
+    },
+    {
+      "epoch": 0.02454,
+      "grad_norm": 0.8371964018463887,
+      "learning_rate": 0.003,
+      "loss": 4.1444,
+      "step": 2454
+    },
+    {
+      "epoch": 0.02455,
+      "grad_norm": 0.828173261775671,
+      "learning_rate": 0.003,
+      "loss": 4.1555,
+      "step": 2455
+    },
+    {
+      "epoch": 0.02456,
+      "grad_norm": 0.8111209228027585,
+      "learning_rate": 0.003,
+      "loss": 4.1205,
+      "step": 2456
+    },
+    {
+      "epoch": 0.02457,
+      "grad_norm": 0.7593406743031937,
+      "learning_rate": 0.003,
+      "loss": 4.175,
+      "step": 2457
+    },
+    {
+      "epoch": 0.02458,
+      "grad_norm": 0.6152494042836864,
+      "learning_rate": 0.003,
+      "loss": 4.1392,
+      "step": 2458
+    },
+    {
+      "epoch": 0.02459,
+      "grad_norm": 0.6726714704776762,
+      "learning_rate": 0.003,
+      "loss": 4.1704,
+      "step": 2459
+    },
+    {
+      "epoch": 0.0246,
+      "grad_norm": 0.6849708403280691,
+      "learning_rate": 0.003,
+      "loss": 4.1742,
+      "step": 2460
+    },
+    {
+      "epoch": 0.02461,
+      "grad_norm": 0.6926478544751591,
+      "learning_rate": 0.003,
+      "loss": 4.1489,
+      "step": 2461
+    },
+    {
+      "epoch": 0.02462,
+      "grad_norm": 0.6546290518877314,
+      "learning_rate": 0.003,
+      "loss": 4.1745,
+      "step": 2462
+    },
+    {
+      "epoch": 0.02463,
+      "grad_norm": 0.6160770029093054,
+      "learning_rate": 0.003,
+      "loss": 4.1691,
+      "step": 2463
+    },
+    {
+      "epoch": 0.02464,
+      "grad_norm": 0.5990408684539491,
+      "learning_rate": 0.003,
+      "loss": 4.1487,
+      "step": 2464
+    },
+    {
+      "epoch": 0.02465,
+      "grad_norm": 0.5034148559800348,
+      "learning_rate": 0.003,
+      "loss": 4.1619,
+      "step": 2465
+    },
+    {
+      "epoch": 0.02466,
+      "grad_norm": 0.4860347114545619,
+      "learning_rate": 0.003,
+      "loss": 4.1408,
+      "step": 2466
+    },
+    {
+      "epoch": 0.02467,
+      "grad_norm": 0.4781136004723512,
+      "learning_rate": 0.003,
+      "loss": 4.1183,
+      "step": 2467
+    },
+    {
+      "epoch": 0.02468,
+      "grad_norm": 0.4265902063645912,
+      "learning_rate": 0.003,
+      "loss": 4.1584,
+      "step": 2468
+    },
+    {
+      "epoch": 0.02469,
+      "grad_norm": 0.5508562887056804,
+      "learning_rate": 0.003,
+      "loss": 4.1409,
+      "step": 2469
+    },
+    {
+      "epoch": 0.0247,
+      "grad_norm": 0.8249536754461567,
+      "learning_rate": 0.003,
+      "loss": 4.1452,
+      "step": 2470
+    },
+    {
+      "epoch": 0.02471,
+      "grad_norm": 1.4359640347930054,
+      "learning_rate": 0.003,
+      "loss": 4.1561,
+      "step": 2471
+    },
+    {
+      "epoch": 0.02472,
+      "grad_norm": 0.6769644869683711,
+      "learning_rate": 0.003,
+      "loss": 4.1433,
+      "step": 2472
+    },
+    {
+      "epoch": 0.02473,
+      "grad_norm": 0.7658209871323785,
+      "learning_rate": 0.003,
+      "loss": 4.1289,
+      "step": 2473
+    },
+    {
+      "epoch": 0.02474,
+      "grad_norm": 0.9511612267134097,
+      "learning_rate": 0.003,
+      "loss": 4.1669,
+      "step": 2474
+    },
+    {
+      "epoch": 0.02475,
+      "grad_norm": 0.8892227903889872,
+      "learning_rate": 0.003,
+      "loss": 4.1391,
+      "step": 2475
+    },
+    {
+      "epoch": 0.02476,
+      "grad_norm": 0.9920143400330694,
+      "learning_rate": 0.003,
+      "loss": 4.152,
+      "step": 2476
+    },
+    {
+      "epoch": 0.02477,
+      "grad_norm": 0.9006606413119249,
+      "learning_rate": 0.003,
+      "loss": 4.1748,
+      "step": 2477
+    },
+    {
+      "epoch": 0.02478,
+      "grad_norm": 0.8088690703733978,
+      "learning_rate": 0.003,
+      "loss": 4.1471,
+      "step": 2478
+    },
+    {
+      "epoch": 0.02479,
+      "grad_norm": 0.8182856999501472,
+      "learning_rate": 0.003,
+      "loss": 4.1366,
+      "step": 2479
+    },
+    {
+      "epoch": 0.0248,
+      "grad_norm": 0.7355042387960451,
+      "learning_rate": 0.003,
+      "loss": 4.178,
+      "step": 2480
+    },
+    {
+      "epoch": 0.02481,
+      "grad_norm": 0.6962248057182712,
+      "learning_rate": 0.003,
+      "loss": 4.1578,
+      "step": 2481
+    },
+    {
+      "epoch": 0.02482,
+      "grad_norm": 0.6352228705173556,
+      "learning_rate": 0.003,
+      "loss": 4.1666,
+      "step": 2482
+    },
+    {
+      "epoch": 0.02483,
+      "grad_norm": 0.6906094612236757,
+      "learning_rate": 0.003,
+      "loss": 4.1183,
+      "step": 2483
+    },
+    {
+      "epoch": 0.02484,
+      "grad_norm": 0.66658401463766,
+      "learning_rate": 0.003,
+      "loss": 4.1474,
+      "step": 2484
+    },
+    {
+      "epoch": 0.02485,
+      "grad_norm": 0.6596750859147653,
+      "learning_rate": 0.003,
+      "loss": 4.1705,
+      "step": 2485
+    },
+    {
+      "epoch": 0.02486,
+      "grad_norm": 0.7066148867713012,
+      "learning_rate": 0.003,
+      "loss": 4.1929,
+      "step": 2486
+    },
+    {
+      "epoch": 0.02487,
+      "grad_norm": 0.754767630208959,
+      "learning_rate": 0.003,
+      "loss": 4.1498,
+      "step": 2487
+    },
+    {
+      "epoch": 0.02488,
+      "grad_norm": 0.7178651369815516,
+      "learning_rate": 0.003,
+      "loss": 4.1276,
+      "step": 2488
+    },
+    {
+      "epoch": 0.02489,
+      "grad_norm": 0.7388633013067267,
+      "learning_rate": 0.003,
+      "loss": 4.1159,
+      "step": 2489
+    },
+    {
+      "epoch": 0.0249,
+      "grad_norm": 0.7802405314573639,
+      "learning_rate": 0.003,
+      "loss": 4.1239,
+      "step": 2490
+    },
+    {
+      "epoch": 0.02491,
+      "grad_norm": 0.6722919878141813,
+      "learning_rate": 0.003,
+      "loss": 4.147,
+      "step": 2491
+    },
+    {
+      "epoch": 0.02492,
+      "grad_norm": 0.6231446299505073,
+      "learning_rate": 0.003,
+      "loss": 4.1546,
+      "step": 2492
+    },
+    {
+      "epoch": 0.02493,
+      "grad_norm": 0.6055678621905288,
+      "learning_rate": 0.003,
+      "loss": 4.1252,
+      "step": 2493
+    },
+    {
+      "epoch": 0.02494,
+      "grad_norm": 0.6270135330055409,
+      "learning_rate": 0.003,
+      "loss": 4.1374,
+      "step": 2494
+    },
+    {
+      "epoch": 0.02495,
+      "grad_norm": 0.6525517462302536,
+      "learning_rate": 0.003,
+      "loss": 4.1487,
+      "step": 2495
+    },
+    {
+      "epoch": 0.02496,
+      "grad_norm": 0.7329319033542416,
+      "learning_rate": 0.003,
+      "loss": 4.14,
+      "step": 2496
+    },
+    {
+      "epoch": 0.02497,
+      "grad_norm": 0.9619996867887689,
+      "learning_rate": 0.003,
+      "loss": 4.1515,
+      "step": 2497
+    },
+    {
+      "epoch": 0.02498,
+      "grad_norm": 1.141814403141371,
+      "learning_rate": 0.003,
+      "loss": 4.1545,
+      "step": 2498
+    },
+    {
+      "epoch": 0.02499,
+      "grad_norm": 0.9016540268347464,
+      "learning_rate": 0.003,
+      "loss": 4.1556,
+      "step": 2499
+    },
+    {
+      "epoch": 0.025,
+      "grad_norm": 0.9065801171881926,
+      "learning_rate": 0.003,
+      "loss": 4.1446,
+      "step": 2500
+    },
+    {
+      "epoch": 0.02501,
+      "grad_norm": 0.8844338776736929,
+      "learning_rate": 0.003,
+      "loss": 4.1765,
+      "step": 2501
+    },
+    {
+      "epoch": 0.02502,
+      "grad_norm": 0.8046602370535848,
+      "learning_rate": 0.003,
+      "loss": 4.1579,
+      "step": 2502
+    },
+    {
+      "epoch": 0.02503,
+      "grad_norm": 0.7039592913476108,
+      "learning_rate": 0.003,
+      "loss": 4.1643,
+      "step": 2503
+    },
+    {
+      "epoch": 0.02504,
+      "grad_norm": 0.7419132370317111,
+      "learning_rate": 0.003,
+      "loss": 4.1429,
+      "step": 2504
+    },
+    {
+      "epoch": 0.02505,
+      "grad_norm": 0.7606498464541366,
+      "learning_rate": 0.003,
+      "loss": 4.1455,
+      "step": 2505
+    },
+    {
+      "epoch": 0.02506,
+      "grad_norm": 0.769560412914919,
+      "learning_rate": 0.003,
+      "loss": 4.1579,
+      "step": 2506
+    },
+    {
+      "epoch": 0.02507,
+      "grad_norm": 0.8361991263151639,
+      "learning_rate": 0.003,
+      "loss": 4.1464,
+      "step": 2507
+    },
+    {
+      "epoch": 0.02508,
+      "grad_norm": 1.0314436698214402,
+      "learning_rate": 0.003,
+      "loss": 4.1552,
+      "step": 2508
+    },
+    {
+      "epoch": 0.02509,
+      "grad_norm": 1.0808307469983176,
+      "learning_rate": 0.003,
+      "loss": 4.1638,
+      "step": 2509
+    },
+    {
+      "epoch": 0.0251,
+      "grad_norm": 0.9756326578065097,
+      "learning_rate": 0.003,
+      "loss": 4.1772,
+      "step": 2510
+    },
+    {
+      "epoch": 0.02511,
+      "grad_norm": 0.9782444756269447,
+      "learning_rate": 0.003,
+      "loss": 4.1853,
+      "step": 2511
+    },
+    {
+      "epoch": 0.02512,
+      "grad_norm": 0.9679666828238646,
+      "learning_rate": 0.003,
+      "loss": 4.1672,
+      "step": 2512
+    },
+    {
+      "epoch": 0.02513,
+      "grad_norm": 0.9141144321737233,
+      "learning_rate": 0.003,
+      "loss": 4.1647,
+      "step": 2513
+    },
+    {
+      "epoch": 0.02514,
+      "grad_norm": 0.870311596191751,
+      "learning_rate": 0.003,
+      "loss": 4.1521,
+      "step": 2514
+    },
+    {
+      "epoch": 0.02515,
+      "grad_norm": 0.718099567068549,
+      "learning_rate": 0.003,
+      "loss": 4.134,
+      "step": 2515
+    },
+    {
+      "epoch": 0.02516,
+      "grad_norm": 0.6485758792814954,
+      "learning_rate": 0.003,
+      "loss": 4.153,
+      "step": 2516
+    },
+    {
+      "epoch": 0.02517,
+      "grad_norm": 0.6631125888262853,
+      "learning_rate": 0.003,
+      "loss": 4.1528,
+      "step": 2517
+    },
+    {
+      "epoch": 0.02518,
+      "grad_norm": 0.62508603256109,
+      "learning_rate": 0.003,
+      "loss": 4.1629,
+      "step": 2518
+    },
+    {
+      "epoch": 0.02519,
+      "grad_norm": 0.6533648513332591,
+      "learning_rate": 0.003,
+      "loss": 4.1392,
+      "step": 2519
+    },
+    {
+      "epoch": 0.0252,
+      "grad_norm": 0.8730883164618287,
+      "learning_rate": 0.003,
+      "loss": 4.1603,
+      "step": 2520
+    },
+    {
+      "epoch": 0.02521,
+      "grad_norm": 1.1508715959612579,
+      "learning_rate": 0.003,
+      "loss": 4.1617,
+      "step": 2521
+    },
+    {
+      "epoch": 0.02522,
+      "grad_norm": 0.8851560060677419,
+      "learning_rate": 0.003,
+      "loss": 4.1564,
+      "step": 2522
+    },
+    {
+      "epoch": 0.02523,
+      "grad_norm": 0.7609503224943213,
+      "learning_rate": 0.003,
+      "loss": 4.1544,
+      "step": 2523
+    },
+    {
+      "epoch": 0.02524,
+      "grad_norm": 0.7874722612084045,
+      "learning_rate": 0.003,
+      "loss": 4.1632,
+      "step": 2524
+    },
+    {
+      "epoch": 0.02525,
+      "grad_norm": 0.9497267581795765,
+      "learning_rate": 0.003,
+      "loss": 4.1834,
+      "step": 2525
+    },
+    {
+      "epoch": 0.02526,
+      "grad_norm": 1.1487960136042998,
+      "learning_rate": 0.003,
+      "loss": 4.1381,
+      "step": 2526
+    },
+    {
+      "epoch": 0.02527,
+      "grad_norm": 0.749631180786954,
+      "learning_rate": 0.003,
+      "loss": 4.1602,
+      "step": 2527
+    },
+    {
+      "epoch": 0.02528,
+      "grad_norm": 0.6717432753530606,
+      "learning_rate": 0.003,
+      "loss": 4.1816,
+      "step": 2528
+    },
+    {
+      "epoch": 0.02529,
+      "grad_norm": 0.6421573788772453,
+      "learning_rate": 0.003,
+      "loss": 4.1481,
+      "step": 2529
+    },
+    {
+      "epoch": 0.0253,
+      "grad_norm": 0.7486612963376212,
+      "learning_rate": 0.003,
+      "loss": 4.1689,
+      "step": 2530
+    },
+    {
+      "epoch": 0.02531,
+      "grad_norm": 0.7949983435688287,
+      "learning_rate": 0.003,
+      "loss": 4.171,
+      "step": 2531
+    },
+    {
+      "epoch": 0.02532,
+      "grad_norm": 0.7618621411023159,
+      "learning_rate": 0.003,
+      "loss": 4.1364,
+      "step": 2532
+    },
+    {
+      "epoch": 0.02533,
+      "grad_norm": 0.8123389715874717,
+      "learning_rate": 0.003,
+      "loss": 4.1773,
+      "step": 2533
+    },
+    {
+      "epoch": 0.02534,
+      "grad_norm": 0.808321322590649,
+      "learning_rate": 0.003,
+      "loss": 4.1573,
+      "step": 2534
+    },
+    {
+      "epoch": 0.02535,
+      "grad_norm": 0.7051467031412186,
+      "learning_rate": 0.003,
+      "loss": 4.1477,
+      "step": 2535
+    },
+    {
+      "epoch": 0.02536,
+      "grad_norm": 0.6093069610094953,
+      "learning_rate": 0.003,
+      "loss": 4.142,
+      "step": 2536
+    },
+    {
+      "epoch": 0.02537,
+      "grad_norm": 0.5843395517845005,
+      "learning_rate": 0.003,
+      "loss": 4.1344,
+      "step": 2537
+    },
+    {
+      "epoch": 0.02538,
+      "grad_norm": 0.5674485238862751,
+      "learning_rate": 0.003,
+      "loss": 4.1098,
+      "step": 2538
+    },
+    {
+      "epoch": 0.02539,
+      "grad_norm": 0.5652766007864589,
+      "learning_rate": 0.003,
+      "loss": 4.1606,
+      "step": 2539
+    },
+    {
+      "epoch": 0.0254,
+      "grad_norm": 0.6570254628858063,
+      "learning_rate": 0.003,
+      "loss": 4.1514,
+      "step": 2540
+    },
+    {
+      "epoch": 0.02541,
+      "grad_norm": 0.8206242142708078,
+      "learning_rate": 0.003,
+      "loss": 4.1443,
+      "step": 2541
+    },
+    {
+      "epoch": 0.02542,
+      "grad_norm": 0.9677168967962686,
+      "learning_rate": 0.003,
+      "loss": 4.134,
+      "step": 2542
+    },
+    {
+      "epoch": 0.02543,
+      "grad_norm": 1.024805521305044,
+      "learning_rate": 0.003,
+      "loss": 4.1646,
+      "step": 2543
+    },
+    {
+      "epoch": 0.02544,
+      "grad_norm": 0.7591855178067561,
+      "learning_rate": 0.003,
+      "loss": 4.148,
+      "step": 2544
+    },
+    {
+      "epoch": 0.02545,
+      "grad_norm": 0.6449474624868338,
+      "learning_rate": 0.003,
+      "loss": 4.1255,
+      "step": 2545
+    },
+    {
+      "epoch": 0.02546,
+      "grad_norm": 0.6980586139707194,
+      "learning_rate": 0.003,
+      "loss": 4.136,
+      "step": 2546
+    },
+    {
+      "epoch": 0.02547,
+      "grad_norm": 0.7283106303300475,
+      "learning_rate": 0.003,
+      "loss": 4.1754,
+      "step": 2547
+    },
+    {
+      "epoch": 0.02548,
+      "grad_norm": 0.6850467395404413,
+      "learning_rate": 0.003,
+      "loss": 4.1691,
+      "step": 2548
+    },
+    {
+      "epoch": 0.02549,
+      "grad_norm": 0.7158743790784491,
+      "learning_rate": 0.003,
+      "loss": 4.1632,
+      "step": 2549
+    },
+    {
+      "epoch": 0.0255,
+      "grad_norm": 0.799200990441851,
+      "learning_rate": 0.003,
+      "loss": 4.1303,
+      "step": 2550
+    },
+    {
+      "epoch": 0.02551,
+      "grad_norm": 0.8852033835267075,
+      "learning_rate": 0.003,
+      "loss": 4.165,
+      "step": 2551
+    },
+    {
+      "epoch": 0.02552,
+      "grad_norm": 0.9381299460340671,
+      "learning_rate": 0.003,
+      "loss": 4.1394,
+      "step": 2552
+    },
+    {
+      "epoch": 0.02553,
+      "grad_norm": 0.8787078285398282,
+      "learning_rate": 0.003,
+      "loss": 4.2019,
+      "step": 2553
+    },
+    {
+      "epoch": 0.02554,
+      "grad_norm": 1.0064349811006572,
+      "learning_rate": 0.003,
+      "loss": 4.1601,
+      "step": 2554
+    },
+    {
+      "epoch": 0.02555,
+      "grad_norm": 0.8623285229523936,
+      "learning_rate": 0.003,
+      "loss": 4.1454,
+      "step": 2555
+    },
+    {
+      "epoch": 0.02556,
+      "grad_norm": 0.779791532210884,
+      "learning_rate": 0.003,
+      "loss": 4.154,
+      "step": 2556
+    },
+    {
+      "epoch": 0.02557,
+      "grad_norm": 0.8041071030856266,
+      "learning_rate": 0.003,
+      "loss": 4.1561,
+      "step": 2557
+    },
+    {
+      "epoch": 0.02558,
+      "grad_norm": 0.9046731898466632,
+      "learning_rate": 0.003,
+      "loss": 4.1797,
+      "step": 2558
+    },
+    {
+      "epoch": 0.02559,
+      "grad_norm": 0.9231060963177342,
+      "learning_rate": 0.003,
+      "loss": 4.1753,
+      "step": 2559
+    },
+    {
+      "epoch": 0.0256,
+      "grad_norm": 0.7942918059321874,
+      "learning_rate": 0.003,
+      "loss": 4.1509,
+      "step": 2560
+    },
+    {
+      "epoch": 0.02561,
+      "grad_norm": 0.7182994269745618,
+      "learning_rate": 0.003,
+      "loss": 4.1458,
+      "step": 2561
+    },
+    {
+      "epoch": 0.02562,
+      "grad_norm": 0.7377092426971169,
+      "learning_rate": 0.003,
+      "loss": 4.1635,
+      "step": 2562
+    },
+    {
+      "epoch": 0.02563,
+      "grad_norm": 0.8008807069379792,
+      "learning_rate": 0.003,
+      "loss": 4.1444,
+      "step": 2563
+    },
+    {
+      "epoch": 0.02564,
+      "grad_norm": 0.9654334894945076,
+      "learning_rate": 0.003,
+      "loss": 4.1721,
+      "step": 2564
+    },
+    {
+      "epoch": 0.02565,
+      "grad_norm": 1.1270177489997797,
+      "learning_rate": 0.003,
+      "loss": 4.1607,
+      "step": 2565
+    },
+    {
+      "epoch": 0.02566,
+      "grad_norm": 0.7790702826784099,
+      "learning_rate": 0.003,
+      "loss": 4.1704,
+      "step": 2566
+    },
+    {
+      "epoch": 0.02567,
+      "grad_norm": 0.7253981064969616,
+      "learning_rate": 0.003,
+      "loss": 4.1684,
+      "step": 2567
+    },
+    {
+      "epoch": 0.02568,
+      "grad_norm": 0.7766989633341348,
+      "learning_rate": 0.003,
+      "loss": 4.1634,
+      "step": 2568
+    },
+    {
+      "epoch": 0.02569,
+      "grad_norm": 0.6753943323614341,
+      "learning_rate": 0.003,
+      "loss": 4.1406,
+      "step": 2569
+    },
+    {
+      "epoch": 0.0257,
+      "grad_norm": 0.6053036326649146,
+      "learning_rate": 0.003,
+      "loss": 4.142,
+      "step": 2570
+    },
+    {
+      "epoch": 0.02571,
+      "grad_norm": 0.5412545714840278,
+      "learning_rate": 0.003,
+      "loss": 4.1611,
+      "step": 2571
+    },
+    {
+      "epoch": 0.02572,
+      "grad_norm": 0.618814983550634,
+      "learning_rate": 0.003,
+      "loss": 4.1399,
+      "step": 2572
+    },
+    {
+      "epoch": 0.02573,
+      "grad_norm": 0.6647990779688017,
+      "learning_rate": 0.003,
+      "loss": 4.1458,
+      "step": 2573
+    },
+    {
+      "epoch": 0.02574,
+      "grad_norm": 0.7052848934500006,
+      "learning_rate": 0.003,
+      "loss": 4.1737,
+      "step": 2574
+    },
+    {
+      "epoch": 0.02575,
+      "grad_norm": 0.7610884337795838,
+      "learning_rate": 0.003,
+      "loss": 4.1101,
+      "step": 2575
+    },
+    {
+      "epoch": 0.02576,
+      "grad_norm": 0.6692271813839591,
+      "learning_rate": 0.003,
+      "loss": 4.1159,
+      "step": 2576
+    },
+    {
+      "epoch": 0.02577,
+      "grad_norm": 0.5756800187751938,
+      "learning_rate": 0.003,
+      "loss": 4.1153,
+      "step": 2577
+    },
+    {
+      "epoch": 0.02578,
+      "grad_norm": 0.5445744087322336,
+      "learning_rate": 0.003,
+      "loss": 4.1401,
+      "step": 2578
+    },
+    {
+      "epoch": 0.02579,
+      "grad_norm": 0.5526066768211919,
+      "learning_rate": 0.003,
+      "loss": 4.0838,
+      "step": 2579
+    },
+    {
+      "epoch": 0.0258,
+      "grad_norm": 0.555186498872379,
+      "learning_rate": 0.003,
+      "loss": 4.1418,
+      "step": 2580
+    },
+    {
+      "epoch": 0.02581,
+      "grad_norm": 0.590974511132741,
+      "learning_rate": 0.003,
+      "loss": 4.1622,
+      "step": 2581
+    },
+    {
+      "epoch": 0.02582,
+      "grad_norm": 0.6134714053872987,
+      "learning_rate": 0.003,
+      "loss": 4.157,
+      "step": 2582
+    },
+    {
+      "epoch": 0.02583,
+      "grad_norm": 0.6785456864342533,
+      "learning_rate": 0.003,
+      "loss": 4.1327,
+      "step": 2583
+    },
+    {
+      "epoch": 0.02584,
+      "grad_norm": 0.7692608608699008,
+      "learning_rate": 0.003,
+      "loss": 4.1476,
+      "step": 2584
+    },
+    {
+      "epoch": 0.02585,
+      "grad_norm": 0.8096555829199314,
+      "learning_rate": 0.003,
+      "loss": 4.1338,
+      "step": 2585
+    },
+    {
+      "epoch": 0.02586,
+      "grad_norm": 0.8850658157806096,
+      "learning_rate": 0.003,
+      "loss": 4.1563,
+      "step": 2586
+    },
+    {
+      "epoch": 0.02587,
+      "grad_norm": 1.0102418517134517,
+      "learning_rate": 0.003,
+      "loss": 4.1487,
+      "step": 2587
+    },
+    {
+      "epoch": 0.02588,
+      "grad_norm": 0.9275963803575394,
+      "learning_rate": 0.003,
+      "loss": 4.1505,
+      "step": 2588
+    },
+    {
+      "epoch": 0.02589,
+      "grad_norm": 0.9403619185225363,
+      "learning_rate": 0.003,
+      "loss": 4.1571,
+      "step": 2589
+    },
+    {
+      "epoch": 0.0259,
+      "grad_norm": 1.0078495428012795,
+      "learning_rate": 0.003,
+      "loss": 4.1693,
+      "step": 2590
+    },
+    {
+      "epoch": 0.02591,
+      "grad_norm": 1.2743604633788357,
+      "learning_rate": 0.003,
+      "loss": 4.1191,
+      "step": 2591
+    },
+    {
+      "epoch": 0.02592,
+      "grad_norm": 0.8231355726319857,
+      "learning_rate": 0.003,
+      "loss": 4.155,
+      "step": 2592
+    },
+    {
+      "epoch": 0.02593,
+      "grad_norm": 0.6890041984971179,
+      "learning_rate": 0.003,
+      "loss": 4.1511,
+      "step": 2593
+    },
+    {
+      "epoch": 0.02594,
+      "grad_norm": 0.7454180549344623,
+      "learning_rate": 0.003,
+      "loss": 4.1728,
+      "step": 2594
+    },
+    {
+      "epoch": 0.02595,
+      "grad_norm": 0.6902489836421277,
+      "learning_rate": 0.003,
+      "loss": 4.1203,
+      "step": 2595
+    },
+    {
+      "epoch": 0.02596,
+      "grad_norm": 0.8183952841129141,
+      "learning_rate": 0.003,
+      "loss": 4.1633,
+      "step": 2596
+    },
+    {
+      "epoch": 0.02597,
+      "grad_norm": 0.9211443305771235,
+      "learning_rate": 0.003,
+      "loss": 4.1359,
+      "step": 2597
+    },
+    {
+      "epoch": 0.02598,
+      "grad_norm": 1.0292390798108857,
+      "learning_rate": 0.003,
+      "loss": 4.1827,
+      "step": 2598
+    },
+    {
+      "epoch": 0.02599,
+      "grad_norm": 0.9373288426158787,
+      "learning_rate": 0.003,
+      "loss": 4.1706,
+      "step": 2599
+    },
+    {
+      "epoch": 0.026,
+      "grad_norm": 0.920433494603301,
+      "learning_rate": 0.003,
+      "loss": 4.1747,
+      "step": 2600
+    },
+    {
+      "epoch": 0.02601,
+      "grad_norm": 0.8451864765965912,
+      "learning_rate": 0.003,
+      "loss": 4.1776,
+      "step": 2601
+    },
+    {
+      "epoch": 0.02602,
+      "grad_norm": 0.7170319795923626,
+      "learning_rate": 0.003,
+      "loss": 4.1455,
+      "step": 2602
+    },
+    {
+      "epoch": 0.02603,
+      "grad_norm": 0.6966464803396121,
+      "learning_rate": 0.003,
+      "loss": 4.1401,
+      "step": 2603
+    },
+    {
+      "epoch": 0.02604,
+      "grad_norm": 0.7612566166038212,
+      "learning_rate": 0.003,
+      "loss": 4.1308,
+      "step": 2604
+    },
+    {
+      "epoch": 0.02605,
+      "grad_norm": 0.6464701620756803,
+      "learning_rate": 0.003,
+      "loss": 4.1437,
+      "step": 2605
+    },
+    {
+      "epoch": 0.02606,
+      "grad_norm": 0.5737007841123123,
+      "learning_rate": 0.003,
+      "loss": 4.162,
+      "step": 2606
+    },
+    {
+      "epoch": 0.02607,
+      "grad_norm": 0.6131766565865462,
+      "learning_rate": 0.003,
+      "loss": 4.1509,
+      "step": 2607
+    },
+    {
+      "epoch": 0.02608,
+      "grad_norm": 0.8086035003985383,
+      "learning_rate": 0.003,
+      "loss": 4.136,
+      "step": 2608
+    },
+    {
+      "epoch": 0.02609,
+      "grad_norm": 0.9817909383598199,
+      "learning_rate": 0.003,
+      "loss": 4.1914,
+      "step": 2609
+    },
+    {
+      "epoch": 0.0261,
+      "grad_norm": 1.0720508985841146,
+      "learning_rate": 0.003,
+      "loss": 4.1421,
+      "step": 2610
+    },
+    {
+      "epoch": 0.02611,
+      "grad_norm": 0.8659326470213682,
+      "learning_rate": 0.003,
+      "loss": 4.1249,
+      "step": 2611
+    },
+    {
+      "epoch": 0.02612,
+      "grad_norm": 0.8732525902579722,
+      "learning_rate": 0.003,
+      "loss": 4.1648,
+      "step": 2612
+    },
+    {
+      "epoch": 0.02613,
+      "grad_norm": 0.9874637099516153,
+      "learning_rate": 0.003,
+      "loss": 4.1605,
+      "step": 2613
+    },
+    {
+      "epoch": 0.02614,
+      "grad_norm": 0.906401203843813,
+      "learning_rate": 0.003,
+      "loss": 4.1404,
+      "step": 2614
+    },
+    {
+      "epoch": 0.02615,
+      "grad_norm": 0.6462260762478055,
+      "learning_rate": 0.003,
+      "loss": 4.1238,
+      "step": 2615
+    },
+    {
+      "epoch": 0.02616,
+      "grad_norm": 0.6006231478500065,
+      "learning_rate": 0.003,
+      "loss": 4.1352,
+      "step": 2616
+    },
+    {
+      "epoch": 0.02617,
+      "grad_norm": 0.6102511179138815,
+      "learning_rate": 0.003,
+      "loss": 4.1214,
+      "step": 2617
+    },
+    {
+      "epoch": 0.02618,
+      "grad_norm": 0.6619768894826954,
+      "learning_rate": 0.003,
+      "loss": 4.15,
+      "step": 2618
+    },
+    {
+      "epoch": 0.02619,
+      "grad_norm": 0.7040985692908992,
+      "learning_rate": 0.003,
+      "loss": 4.1627,
+      "step": 2619
+    },
+    {
+      "epoch": 0.0262,
+      "grad_norm": 0.7390370888569476,
+      "learning_rate": 0.003,
+      "loss": 4.1255,
+      "step": 2620
+    },
+    {
+      "epoch": 0.02621,
+      "grad_norm": 0.8595720375733896,
+      "learning_rate": 0.003,
+      "loss": 4.1547,
+      "step": 2621
+    },
+    {
+      "epoch": 0.02622,
+      "grad_norm": 0.8747740097574618,
+      "learning_rate": 0.003,
+      "loss": 4.1571,
+      "step": 2622
+    },
+    {
+      "epoch": 0.02623,
+      "grad_norm": 0.7865985620123697,
+      "learning_rate": 0.003,
+      "loss": 4.1577,
+      "step": 2623
+    },
+    {
+      "epoch": 0.02624,
+      "grad_norm": 0.721745866206175,
+      "learning_rate": 0.003,
+      "loss": 4.1924,
+      "step": 2624
+    },
+    {
+      "epoch": 0.02625,
+      "grad_norm": 0.6369082515266815,
+      "learning_rate": 0.003,
+      "loss": 4.1844,
+      "step": 2625
+    },
+    {
+      "epoch": 0.02626,
+      "grad_norm": 0.6738431853903379,
+      "learning_rate": 0.003,
+      "loss": 4.1204,
+      "step": 2626
+    },
+    {
+      "epoch": 0.02627,
+      "grad_norm": 0.8070717121127621,
+      "learning_rate": 0.003,
+      "loss": 4.1402,
+      "step": 2627
+    },
+    {
+      "epoch": 0.02628,
+      "grad_norm": 0.9052615625467546,
+      "learning_rate": 0.003,
+      "loss": 4.1733,
+      "step": 2628
+    },
+    {
+      "epoch": 0.02629,
+      "grad_norm": 0.9064578842099213,
+      "learning_rate": 0.003,
+      "loss": 4.112,
+      "step": 2629
+    },
+    {
+      "epoch": 0.0263,
+      "grad_norm": 0.9572710825858507,
+      "learning_rate": 0.003,
+      "loss": 4.1442,
+      "step": 2630
+    },
+    {
+      "epoch": 0.02631,
+      "grad_norm": 0.9098415499268271,
+      "learning_rate": 0.003,
+      "loss": 4.1702,
+      "step": 2631
+    },
+    {
+      "epoch": 0.02632,
+      "grad_norm": 0.957961730332363,
+      "learning_rate": 0.003,
+      "loss": 4.1333,
+      "step": 2632
+    },
+    {
+      "epoch": 0.02633,
+      "grad_norm": 0.8910675740871853,
+      "learning_rate": 0.003,
+      "loss": 4.1452,
+      "step": 2633
+    },
+    {
+      "epoch": 0.02634,
+      "grad_norm": 0.8894027249202754,
+      "learning_rate": 0.003,
+      "loss": 4.1657,
+      "step": 2634
+    },
+    {
+      "epoch": 0.02635,
+      "grad_norm": 1.0919965764145694,
+      "learning_rate": 0.003,
+      "loss": 4.171,
+      "step": 2635
+    },
+    {
+      "epoch": 0.02636,
+      "grad_norm": 1.101775206670289,
+      "learning_rate": 0.003,
+      "loss": 4.1603,
+      "step": 2636
+    },
+    {
+      "epoch": 0.02637,
+      "grad_norm": 0.9585912224073222,
+      "learning_rate": 0.003,
+      "loss": 4.1888,
+      "step": 2637
+    },
+    {
+      "epoch": 0.02638,
+      "grad_norm": 0.7865200036359159,
+      "learning_rate": 0.003,
+      "loss": 4.1464,
+      "step": 2638
+    },
+    {
+      "epoch": 0.02639,
+      "grad_norm": 0.7526947533075404,
+      "learning_rate": 0.003,
+      "loss": 4.1366,
+      "step": 2639
+    },
+    {
+      "epoch": 0.0264,
+      "grad_norm": 0.8605449721173474,
+      "learning_rate": 0.003,
+      "loss": 4.134,
+      "step": 2640
+    },
+    {
+      "epoch": 0.02641,
+      "grad_norm": 0.9267463548326204,
+      "learning_rate": 0.003,
+      "loss": 4.1502,
+      "step": 2641
+    },
+    {
+      "epoch": 0.02642,
+      "grad_norm": 0.9486962646703484,
+      "learning_rate": 0.003,
+      "loss": 4.1611,
+      "step": 2642
+    },
+    {
+      "epoch": 0.02643,
+      "grad_norm": 0.8844618097245053,
+      "learning_rate": 0.003,
+      "loss": 4.15,
+      "step": 2643
+    },
+    {
+      "epoch": 0.02644,
+      "grad_norm": 0.7967251454981952,
+      "learning_rate": 0.003,
+      "loss": 4.1599,
+      "step": 2644
+    },
+    {
+      "epoch": 0.02645,
+      "grad_norm": 0.8225181053423539,
+      "learning_rate": 0.003,
+      "loss": 4.1718,
+      "step": 2645
+    },
+    {
+      "epoch": 0.02646,
+      "grad_norm": 0.7987479226871795,
+      "learning_rate": 0.003,
+      "loss": 4.1374,
+      "step": 2646
+    },
+    {
+      "epoch": 0.02647,
+      "grad_norm": 0.7821642664442445,
+      "learning_rate": 0.003,
+      "loss": 4.1612,
+      "step": 2647
+    },
+    {
+      "epoch": 0.02648,
+      "grad_norm": 0.9284024850106564,
+      "learning_rate": 0.003,
+      "loss": 4.1813,
+      "step": 2648
+    },
+    {
+      "epoch": 0.02649,
+      "grad_norm": 1.0907008227907322,
+      "learning_rate": 0.003,
+      "loss": 4.1777,
+      "step": 2649
+    },
+    {
+      "epoch": 0.0265,
+      "grad_norm": 1.125793480123405,
+      "learning_rate": 0.003,
+      "loss": 4.1386,
+      "step": 2650
+    },
+    {
+      "epoch": 0.02651,
+      "grad_norm": 0.7578496969583901,
+      "learning_rate": 0.003,
+      "loss": 4.1454,
+      "step": 2651
+    },
+    {
+      "epoch": 0.02652,
+      "grad_norm": 0.7052794942111215,
+      "learning_rate": 0.003,
+      "loss": 4.1738,
+      "step": 2652
+    },
+    {
+      "epoch": 0.02653,
+      "grad_norm": 0.7103196739746442,
+      "learning_rate": 0.003,
+      "loss": 4.1293,
+      "step": 2653
+    },
+    {
+      "epoch": 0.02654,
+      "grad_norm": 0.6925385503676881,
+      "learning_rate": 0.003,
+      "loss": 4.1383,
+      "step": 2654
+    },
+    {
+      "epoch": 0.02655,
+      "grad_norm": 0.781411208540797,
+      "learning_rate": 0.003,
+      "loss": 4.147,
+      "step": 2655
+    },
+    {
+      "epoch": 0.02656,
+      "grad_norm": 0.7579190054446218,
+      "learning_rate": 0.003,
+      "loss": 4.1226,
+      "step": 2656
+    },
+    {
+      "epoch": 0.02657,
+      "grad_norm": 0.7277248228326177,
+      "learning_rate": 0.003,
+      "loss": 4.1484,
+      "step": 2657
+    },
+    {
+      "epoch": 0.02658,
+      "grad_norm": 0.6753248968688912,
+      "learning_rate": 0.003,
+      "loss": 4.1533,
+      "step": 2658
+    },
+    {
+      "epoch": 0.02659,
+      "grad_norm": 0.6796448265031177,
+      "learning_rate": 0.003,
+      "loss": 4.1465,
+      "step": 2659
+    },
+    {
+      "epoch": 0.0266,
+      "grad_norm": 0.6424181529879253,
+      "learning_rate": 0.003,
+      "loss": 4.1271,
+      "step": 2660
+    },
+    {
+      "epoch": 0.02661,
+      "grad_norm": 0.7766396799485783,
+      "learning_rate": 0.003,
+      "loss": 4.1524,
+      "step": 2661
+    },
+    {
+      "epoch": 0.02662,
+      "grad_norm": 0.864583375609625,
+      "learning_rate": 0.003,
+      "loss": 4.1433,
+      "step": 2662
+    },
+    {
+      "epoch": 0.02663,
+      "grad_norm": 0.8284021457779097,
+      "learning_rate": 0.003,
+      "loss": 4.1285,
+      "step": 2663
+    },
+    {
+      "epoch": 0.02664,
+      "grad_norm": 0.7120779790048899,
+      "learning_rate": 0.003,
+      "loss": 4.1258,
+      "step": 2664
+    },
+    {
+      "epoch": 0.02665,
+      "grad_norm": 0.6851733303412342,
+      "learning_rate": 0.003,
+      "loss": 4.1719,
+      "step": 2665
+    },
+    {
+      "epoch": 0.02666,
+      "grad_norm": 0.6758809967977054,
+      "learning_rate": 0.003,
+      "loss": 4.1627,
+      "step": 2666
+    },
+    {
+      "epoch": 0.02667,
+      "grad_norm": 0.6361286175404186,
+      "learning_rate": 0.003,
+      "loss": 4.1421,
+      "step": 2667
+    },
+    {
+      "epoch": 0.02668,
+      "grad_norm": 0.5673144703469689,
+      "learning_rate": 0.003,
+      "loss": 4.1084,
+      "step": 2668
+    },
+    {
+      "epoch": 0.02669,
+      "grad_norm": 0.5094068244293505,
+      "learning_rate": 0.003,
+      "loss": 4.1289,
+      "step": 2669
+    },
+    {
+      "epoch": 0.0267,
+      "grad_norm": 0.5324820431990631,
+      "learning_rate": 0.003,
+      "loss": 4.1272,
+      "step": 2670
+    },
+    {
+      "epoch": 0.02671,
+      "grad_norm": 0.6250352979316671,
+      "learning_rate": 0.003,
+      "loss": 4.1447,
+      "step": 2671
+    },
+    {
+      "epoch": 0.02672,
+      "grad_norm": 0.6359363422291939,
+      "learning_rate": 0.003,
+      "loss": 4.1384,
+      "step": 2672
+    },
+    {
+      "epoch": 0.02673,
+      "grad_norm": 0.6102304515358753,
+      "learning_rate": 0.003,
+      "loss": 4.1336,
+      "step": 2673
+    },
+    {
+      "epoch": 0.02674,
+      "grad_norm": 0.7766569602853689,
+      "learning_rate": 0.003,
+      "loss": 4.1082,
+      "step": 2674
+    },
+    {
+      "epoch": 0.02675,
+      "grad_norm": 0.9731715722532488,
+      "learning_rate": 0.003,
+      "loss": 4.1138,
+      "step": 2675
+    },
+    {
+      "epoch": 0.02676,
+      "grad_norm": 1.0370179032200628,
+      "learning_rate": 0.003,
+      "loss": 4.1437,
+      "step": 2676
+    },
+    {
+      "epoch": 0.02677,
+      "grad_norm": 0.7623073096555095,
+      "learning_rate": 0.003,
+      "loss": 4.1614,
+      "step": 2677
+    },
+    {
+      "epoch": 0.02678,
+      "grad_norm": 0.7777692153557114,
+      "learning_rate": 0.003,
+      "loss": 4.1586,
+      "step": 2678
+    },
+    {
+      "epoch": 0.02679,
+      "grad_norm": 0.8733324106472917,
+      "learning_rate": 0.003,
+      "loss": 4.1359,
+      "step": 2679
+    },
+    {
+      "epoch": 0.0268,
+      "grad_norm": 0.8266517145073005,
+      "learning_rate": 0.003,
+      "loss": 4.1463,
+      "step": 2680
+    },
+    {
+      "epoch": 0.02681,
+      "grad_norm": 0.758773812684903,
+      "learning_rate": 0.003,
+      "loss": 4.1643,
+      "step": 2681
+    },
+    {
+      "epoch": 0.02682,
+      "grad_norm": 0.8283252875504241,
+      "learning_rate": 0.003,
+      "loss": 4.1476,
+      "step": 2682
+    },
+    {
+      "epoch": 0.02683,
+      "grad_norm": 0.8404974594062254,
+      "learning_rate": 0.003,
+      "loss": 4.1692,
+      "step": 2683
+    },
+    {
+      "epoch": 0.02684,
+      "grad_norm": 0.818507190423211,
+      "learning_rate": 0.003,
+      "loss": 4.1427,
+      "step": 2684
+    },
+    {
+      "epoch": 0.02685,
+      "grad_norm": 0.7649702763962748,
+      "learning_rate": 0.003,
+      "loss": 4.1484,
+      "step": 2685
+    },
+    {
+      "epoch": 0.02686,
+      "grad_norm": 0.7611427665837982,
+      "learning_rate": 0.003,
+      "loss": 4.1328,
+      "step": 2686
+    },
+    {
+      "epoch": 0.02687,
+      "grad_norm": 0.9628527455690736,
+      "learning_rate": 0.003,
+      "loss": 4.1512,
+      "step": 2687
+    },
+    {
+      "epoch": 0.02688,
+      "grad_norm": 0.8564202799506135,
+      "learning_rate": 0.003,
+      "loss": 4.1277,
+      "step": 2688
+    },
+    {
+      "epoch": 0.02689,
+      "grad_norm": 0.8557838178212854,
+      "learning_rate": 0.003,
+      "loss": 4.1562,
+      "step": 2689
+    },
+    {
+      "epoch": 0.0269,
+      "grad_norm": 0.9118623509339678,
+      "learning_rate": 0.003,
+      "loss": 4.164,
+      "step": 2690
+    },
+    {
+      "epoch": 0.02691,
+      "grad_norm": 0.8301540595445771,
+      "learning_rate": 0.003,
+      "loss": 4.1545,
+      "step": 2691
+    },
+    {
+      "epoch": 0.02692,
+      "grad_norm": 0.6228920440524642,
+      "learning_rate": 0.003,
+      "loss": 4.1516,
+      "step": 2692
+    },
+    {
+      "epoch": 0.02693,
+      "grad_norm": 0.6859665731725182,
+      "learning_rate": 0.003,
+      "loss": 4.1655,
+      "step": 2693
+    },
+    {
+      "epoch": 0.02694,
+      "grad_norm": 0.7138959075984226,
+      "learning_rate": 0.003,
+      "loss": 4.1259,
+      "step": 2694
+    },
+    {
+      "epoch": 0.02695,
+      "grad_norm": 0.7527603952329793,
+      "learning_rate": 0.003,
+      "loss": 4.1438,
+      "step": 2695
+    },
+    {
+      "epoch": 0.02696,
+      "grad_norm": 0.7052829196693109,
+      "learning_rate": 0.003,
+      "loss": 4.144,
+      "step": 2696
+    },
+    {
+      "epoch": 0.02697,
+      "grad_norm": 0.6398105779035383,
+      "learning_rate": 0.003,
+      "loss": 4.1511,
+      "step": 2697
+    },
+    {
+      "epoch": 0.02698,
+      "grad_norm": 0.7194666389048138,
+      "learning_rate": 0.003,
+      "loss": 4.1368,
+      "step": 2698
+    },
+    {
+      "epoch": 0.02699,
+      "grad_norm": 0.7132487376498455,
+      "learning_rate": 0.003,
+      "loss": 4.1522,
+      "step": 2699
+    },
+    {
+      "epoch": 0.027,
+      "grad_norm": 0.656063779780492,
+      "learning_rate": 0.003,
+      "loss": 4.124,
+      "step": 2700
+    },
+    {
+      "epoch": 0.02701,
+      "grad_norm": 0.629455348788436,
+      "learning_rate": 0.003,
+      "loss": 4.1115,
+      "step": 2701
+    },
+    {
+      "epoch": 0.02702,
+      "grad_norm": 0.7466120152268881,
+      "learning_rate": 0.003,
+      "loss": 4.1658,
+      "step": 2702
+    },
+    {
+      "epoch": 0.02703,
+      "grad_norm": 0.777360526823959,
+      "learning_rate": 0.003,
+      "loss": 4.1623,
+      "step": 2703
+    },
+    {
+      "epoch": 0.02704,
+      "grad_norm": 0.8782075840804512,
+      "learning_rate": 0.003,
+      "loss": 4.11,
+      "step": 2704
+    },
+    {
+      "epoch": 0.02705,
+      "grad_norm": 0.8925534153148256,
+      "learning_rate": 0.003,
+      "loss": 4.1656,
+      "step": 2705
+    },
+    {
+      "epoch": 0.02706,
+      "grad_norm": 0.9726075369304501,
+      "learning_rate": 0.003,
+      "loss": 4.1314,
+      "step": 2706
+    },
+    {
+      "epoch": 0.02707,
+      "grad_norm": 0.9439770827114989,
+      "learning_rate": 0.003,
+      "loss": 4.1304,
+      "step": 2707
+    },
+    {
+      "epoch": 0.02708,
+      "grad_norm": 0.8038542754161909,
+      "learning_rate": 0.003,
+      "loss": 4.1613,
+      "step": 2708
+    },
+    {
+      "epoch": 0.02709,
+      "grad_norm": 0.8203620131302694,
+      "learning_rate": 0.003,
+      "loss": 4.1282,
+      "step": 2709
+    },
+    {
+      "epoch": 0.0271,
+      "grad_norm": 0.8724888263473524,
+      "learning_rate": 0.003,
+      "loss": 4.1371,
+      "step": 2710
+    },
+    {
+      "epoch": 0.02711,
+      "grad_norm": 0.8653954860429884,
+      "learning_rate": 0.003,
+      "loss": 4.1256,
+      "step": 2711
+    },
+    {
+      "epoch": 0.02712,
+      "grad_norm": 0.8020656899710369,
+      "learning_rate": 0.003,
+      "loss": 4.1343,
+      "step": 2712
+    },
+    {
+      "epoch": 0.02713,
+      "grad_norm": 0.8083253547021919,
+      "learning_rate": 0.003,
+      "loss": 4.1308,
+      "step": 2713
+    },
+    {
+      "epoch": 0.02714,
+      "grad_norm": 0.8690387355838295,
+      "learning_rate": 0.003,
+      "loss": 4.1327,
+      "step": 2714
+    },
+    {
+      "epoch": 0.02715,
+      "grad_norm": 1.0593958700668973,
+      "learning_rate": 0.003,
+      "loss": 4.1347,
+      "step": 2715
+    },
+    {
+      "epoch": 0.02716,
+      "grad_norm": 1.0574588180443436,
+      "learning_rate": 0.003,
+      "loss": 4.1532,
+      "step": 2716
+    },
+    {
+      "epoch": 0.02717,
+      "grad_norm": 0.9164750014025292,
+      "learning_rate": 0.003,
+      "loss": 4.147,
+      "step": 2717
+    },
+    {
+      "epoch": 0.02718,
+      "grad_norm": 0.9137711318115498,
+      "learning_rate": 0.003,
+      "loss": 4.1696,
+      "step": 2718
+    },
+    {
+      "epoch": 0.02719,
+      "grad_norm": 0.7747921431969758,
+      "learning_rate": 0.003,
+      "loss": 4.1371,
+      "step": 2719
+    },
+    {
+      "epoch": 0.0272,
+      "grad_norm": 0.8252071711518221,
+      "learning_rate": 0.003,
+      "loss": 4.1305,
+      "step": 2720
+    },
+    {
+      "epoch": 0.02721,
+      "grad_norm": 0.9010417786683242,
+      "learning_rate": 0.003,
+      "loss": 4.1529,
+      "step": 2721
+    },
+    {
+      "epoch": 0.02722,
+      "grad_norm": 1.0430685055558186,
+      "learning_rate": 0.003,
+      "loss": 4.1578,
+      "step": 2722
+    },
+    {
+      "epoch": 0.02723,
+      "grad_norm": 0.8338446086253062,
+      "learning_rate": 0.003,
+      "loss": 4.1591,
+      "step": 2723
+    },
+    {
+      "epoch": 0.02724,
+      "grad_norm": 0.7926892406317515,
+      "learning_rate": 0.003,
+      "loss": 4.1604,
+      "step": 2724
+    },
+    {
+      "epoch": 0.02725,
+      "grad_norm": 0.9476051066490954,
+      "learning_rate": 0.003,
+      "loss": 4.1559,
+      "step": 2725
+    },
+    {
+      "epoch": 0.02726,
+      "grad_norm": 0.9718868135312356,
+      "learning_rate": 0.003,
+      "loss": 4.1298,
+      "step": 2726
+    },
+    {
+      "epoch": 0.02727,
+      "grad_norm": 0.8794969874731017,
+      "learning_rate": 0.003,
+      "loss": 4.1606,
+      "step": 2727
+    },
+    {
+      "epoch": 0.02728,
+      "grad_norm": 0.7500620026401651,
+      "learning_rate": 0.003,
+      "loss": 4.1435,
+      "step": 2728
+    },
+    {
+      "epoch": 0.02729,
+      "grad_norm": 0.682111991166883,
+      "learning_rate": 0.003,
+      "loss": 4.172,
+      "step": 2729
+    },
+    {
+      "epoch": 0.0273,
+      "grad_norm": 0.5938416056877477,
+      "learning_rate": 0.003,
+      "loss": 4.1397,
+      "step": 2730
+    },
+    {
+      "epoch": 0.02731,
+      "grad_norm": 0.5014324118447245,
+      "learning_rate": 0.003,
+      "loss": 4.1354,
+      "step": 2731
+    },
+    {
+      "epoch": 0.02732,
+      "grad_norm": 0.5566218730286413,
+      "learning_rate": 0.003,
+      "loss": 4.1285,
+      "step": 2732
+    },
+    {
+      "epoch": 0.02733,
+      "grad_norm": 0.656514130835437,
+      "learning_rate": 0.003,
+      "loss": 4.16,
+      "step": 2733
+    },
+    {
+      "epoch": 0.02734,
+      "grad_norm": 0.7092083749191737,
+      "learning_rate": 0.003,
+      "loss": 4.1115,
+      "step": 2734
+    },
+    {
+      "epoch": 0.02735,
+      "grad_norm": 0.7490351435778966,
+      "learning_rate": 0.003,
+      "loss": 4.1581,
+      "step": 2735
+    },
+    {
+      "epoch": 0.02736,
+      "grad_norm": 0.8057410040143573,
+      "learning_rate": 0.003,
+      "loss": 4.1338,
+      "step": 2736
+    },
+    {
+      "epoch": 0.02737,
+      "grad_norm": 0.7272389058341057,
+      "learning_rate": 0.003,
+      "loss": 4.1253,
+      "step": 2737
+    },
+    {
+      "epoch": 0.02738,
+      "grad_norm": 0.7181903456270696,
+      "learning_rate": 0.003,
+      "loss": 4.1447,
+      "step": 2738
+    },
+    {
+      "epoch": 0.02739,
+      "grad_norm": 0.8790015592136898,
+      "learning_rate": 0.003,
+      "loss": 4.1427,
+      "step": 2739
+    },
+    {
+      "epoch": 0.0274,
+      "grad_norm": 1.0911668104806873,
+      "learning_rate": 0.003,
+      "loss": 4.1395,
+      "step": 2740
+    },
+    {
+      "epoch": 0.02741,
+      "grad_norm": 1.0149366352031834,
+      "learning_rate": 0.003,
+      "loss": 4.1544,
+      "step": 2741
+    },
+    {
+      "epoch": 0.02742,
+      "grad_norm": 0.8648703789133807,
+      "learning_rate": 0.003,
+      "loss": 4.1381,
+      "step": 2742
+    },
+    {
+      "epoch": 0.02743,
+      "grad_norm": 0.8509616129669461,
+      "learning_rate": 0.003,
+      "loss": 4.1613,
+      "step": 2743
+    },
+    {
+      "epoch": 0.02744,
+      "grad_norm": 0.7514267415016833,
+      "learning_rate": 0.003,
+      "loss": 4.1399,
+      "step": 2744
+    },
+    {
+      "epoch": 0.02745,
+      "grad_norm": 0.666188856053761,
+      "learning_rate": 0.003,
+      "loss": 4.1482,
+      "step": 2745
+    },
+    {
+      "epoch": 0.02746,
+      "grad_norm": 0.6204707613465867,
+      "learning_rate": 0.003,
+      "loss": 4.1227,
+      "step": 2746
+    },
+    {
+      "epoch": 0.02747,
+      "grad_norm": 0.5745978206046833,
+      "learning_rate": 0.003,
+      "loss": 4.1542,
+      "step": 2747
+    },
+    {
+      "epoch": 0.02748,
+      "grad_norm": 0.5095037008788326,
+      "learning_rate": 0.003,
+      "loss": 4.106,
+      "step": 2748
+    },
+    {
+      "epoch": 0.02749,
+      "grad_norm": 0.4918061860336399,
+      "learning_rate": 0.003,
+      "loss": 4.1075,
+      "step": 2749
+    },
+    {
+      "epoch": 0.0275,
+      "grad_norm": 0.4689633122885494,
+      "learning_rate": 0.003,
+      "loss": 4.1398,
+      "step": 2750
+    },
+    {
+      "epoch": 0.02751,
+      "grad_norm": 0.5240834818389507,
+      "learning_rate": 0.003,
+      "loss": 4.1544,
+      "step": 2751
+    },
+    {
+      "epoch": 0.02752,
+      "grad_norm": 0.45737584283965704,
+      "learning_rate": 0.003,
+      "loss": 4.1373,
+      "step": 2752
+    },
+    {
+      "epoch": 0.02753,
+      "grad_norm": 0.4854554116683414,
+      "learning_rate": 0.003,
+      "loss": 4.1197,
+      "step": 2753
+    },
+    {
+      "epoch": 0.02754,
+      "grad_norm": 0.5451937888698382,
+      "learning_rate": 0.003,
+      "loss": 4.1039,
+      "step": 2754
+    },
+    {
+      "epoch": 0.02755,
+      "grad_norm": 0.6902713768999703,
+      "learning_rate": 0.003,
+      "loss": 4.1055,
+      "step": 2755
+    },
+    {
+      "epoch": 0.02756,
+      "grad_norm": 0.9172618359251131,
+      "learning_rate": 0.003,
+      "loss": 4.1411,
+      "step": 2756
+    },
+    {
+      "epoch": 0.02757,
+      "grad_norm": 1.264093714132492,
+      "learning_rate": 0.003,
+      "loss": 4.1713,
+      "step": 2757
+    },
+    {
+      "epoch": 0.02758,
+      "grad_norm": 0.6764873385501415,
+      "learning_rate": 0.003,
+      "loss": 4.1046,
+      "step": 2758
+    },
+    {
+      "epoch": 0.02759,
+      "grad_norm": 0.6735199753323651,
+      "learning_rate": 0.003,
+      "loss": 4.1384,
+      "step": 2759
+    },
+    {
+      "epoch": 0.0276,
+      "grad_norm": 0.7869359581922244,
+      "learning_rate": 0.003,
+      "loss": 4.143,
+      "step": 2760
+    },
+    {
+      "epoch": 0.02761,
+      "grad_norm": 0.8232479826802365,
+      "learning_rate": 0.003,
+      "loss": 4.1614,
+      "step": 2761
+    },
+    {
+      "epoch": 0.02762,
+      "grad_norm": 0.8094991367198255,
+      "learning_rate": 0.003,
+      "loss": 4.1272,
+      "step": 2762
+    },
+    {
+      "epoch": 0.02763,
+      "grad_norm": 0.808838083900847,
+      "learning_rate": 0.003,
+      "loss": 4.1393,
+      "step": 2763
+    },
+    {
+      "epoch": 0.02764,
+      "grad_norm": 0.9499840304387701,
+      "learning_rate": 0.003,
+      "loss": 4.1387,
+      "step": 2764
+    },
+    {
+      "epoch": 0.02765,
+      "grad_norm": 1.1661515206508581,
+      "learning_rate": 0.003,
+      "loss": 4.1535,
+      "step": 2765
+    },
+    {
+      "epoch": 0.02766,
+      "grad_norm": 0.8834816047449258,
+      "learning_rate": 0.003,
+      "loss": 4.1143,
+      "step": 2766
+    },
+    {
+      "epoch": 0.02767,
+      "grad_norm": 0.9679955840143722,
+      "learning_rate": 0.003,
+      "loss": 4.1737,
+      "step": 2767
+    },
+    {
+      "epoch": 0.02768,
+      "grad_norm": 1.0122550261242687,
+      "learning_rate": 0.003,
+      "loss": 4.1138,
+      "step": 2768
+    },
+    {
+      "epoch": 0.02769,
+      "grad_norm": 0.936589272381051,
+      "learning_rate": 0.003,
+      "loss": 4.1722,
+      "step": 2769
+    },
+    {
+      "epoch": 0.0277,
+      "grad_norm": 0.8812342595154814,
+      "learning_rate": 0.003,
+      "loss": 4.1383,
+      "step": 2770
+    },
+    {
+      "epoch": 0.02771,
+      "grad_norm": 0.8385099378903244,
+      "learning_rate": 0.003,
+      "loss": 4.1266,
+      "step": 2771
+    },
+    {
+      "epoch": 0.02772,
+      "grad_norm": 0.8615548972194946,
+      "learning_rate": 0.003,
+      "loss": 4.1184,
+      "step": 2772
+    },
+    {
+      "epoch": 0.02773,
+      "grad_norm": 0.7470925818947043,
+      "learning_rate": 0.003,
+      "loss": 4.1495,
+      "step": 2773
+    },
+    {
+      "epoch": 0.02774,
+      "grad_norm": 0.7432890220276933,
+      "learning_rate": 0.003,
+      "loss": 4.1713,
+      "step": 2774
+    },
+    {
+      "epoch": 0.02775,
+      "grad_norm": 0.714251751808306,
+      "learning_rate": 0.003,
+      "loss": 4.1377,
+      "step": 2775
+    },
+    {
+      "epoch": 0.02776,
+      "grad_norm": 0.6696685091947796,
+      "learning_rate": 0.003,
+      "loss": 4.1315,
+      "step": 2776
+    },
+    {
+      "epoch": 0.02777,
+      "grad_norm": 0.6601977907009066,
+      "learning_rate": 0.003,
+      "loss": 4.1109,
+      "step": 2777
+    },
+    {
+      "epoch": 0.02778,
+      "grad_norm": 0.6363902696248916,
+      "learning_rate": 0.003,
+      "loss": 4.1509,
+      "step": 2778
+    },
+    {
+      "epoch": 0.02779,
+      "grad_norm": 0.7347310050156719,
+      "learning_rate": 0.003,
+      "loss": 4.126,
+      "step": 2779
+    },
+    {
+      "epoch": 0.0278,
+      "grad_norm": 0.8193092944005641,
+      "learning_rate": 0.003,
+      "loss": 4.1233,
+      "step": 2780
+    },
+    {
+      "epoch": 0.02781,
+      "grad_norm": 0.9505772704924963,
+      "learning_rate": 0.003,
+      "loss": 4.113,
+      "step": 2781
+    },
+    {
+      "epoch": 0.02782,
+      "grad_norm": 1.09253758346844,
+      "learning_rate": 0.003,
+      "loss": 4.1421,
+      "step": 2782
+    },
+    {
+      "epoch": 0.02783,
+      "grad_norm": 0.7922623009085619,
+      "learning_rate": 0.003,
+      "loss": 4.1407,
+      "step": 2783
+    },
+    {
+      "epoch": 0.02784,
+      "grad_norm": 0.7730396671741616,
+      "learning_rate": 0.003,
+      "loss": 4.136,
+      "step": 2784
+    },
+    {
+      "epoch": 0.02785,
+      "grad_norm": 0.8111585358679776,
+      "learning_rate": 0.003,
+      "loss": 4.1529,
+      "step": 2785
+    },
+    {
+      "epoch": 0.02786,
+      "grad_norm": 0.7983751779273013,
+      "learning_rate": 0.003,
+      "loss": 4.1458,
+      "step": 2786
+    },
+    {
+      "epoch": 0.02787,
+      "grad_norm": 0.8522172170925804,
+      "learning_rate": 0.003,
+      "loss": 4.1468,
+      "step": 2787
+    },
+    {
+      "epoch": 0.02788,
+      "grad_norm": 0.8952452301797756,
+      "learning_rate": 0.003,
+      "loss": 4.1379,
+      "step": 2788
+    },
+    {
+      "epoch": 0.02789,
+      "grad_norm": 0.9315897488998124,
+      "learning_rate": 0.003,
+      "loss": 4.1439,
+      "step": 2789
+    },
+    {
+      "epoch": 0.0279,
+      "grad_norm": 0.7715461952206646,
+      "learning_rate": 0.003,
+      "loss": 4.1139,
+      "step": 2790
+    },
+    {
+      "epoch": 0.02791,
+      "grad_norm": 0.7419347495262759,
+      "learning_rate": 0.003,
+      "loss": 4.1266,
+      "step": 2791
+    },
+    {
+      "epoch": 0.02792,
+      "grad_norm": 0.804466637477434,
+      "learning_rate": 0.003,
+      "loss": 4.1446,
+      "step": 2792
+    },
+    {
+      "epoch": 0.02793,
+      "grad_norm": 0.9499312139027115,
+      "learning_rate": 0.003,
+      "loss": 4.1481,
+      "step": 2793
+    },
+    {
+      "epoch": 0.02794,
+      "grad_norm": 1.079651373139261,
+      "learning_rate": 0.003,
+      "loss": 4.1503,
+      "step": 2794
+    },
+    {
+      "epoch": 0.02795,
+      "grad_norm": 0.7784790735501392,
+      "learning_rate": 0.003,
+      "loss": 4.1562,
+      "step": 2795
+    },
+    {
+      "epoch": 0.02796,
+      "grad_norm": 0.7016883329409419,
+      "learning_rate": 0.003,
+      "loss": 4.1447,
+      "step": 2796
+    },
+    {
+      "epoch": 0.02797,
+      "grad_norm": 0.8527425055953903,
+      "learning_rate": 0.003,
+      "loss": 4.1615,
+      "step": 2797
+    },
+    {
+      "epoch": 0.02798,
+      "grad_norm": 0.8344419810312547,
+      "learning_rate": 0.003,
+      "loss": 4.1599,
+      "step": 2798
+    },
+    {
+      "epoch": 0.02799,
+      "grad_norm": 0.8118333253983723,
+      "learning_rate": 0.003,
+      "loss": 4.1456,
+      "step": 2799
+    },
+    {
+      "epoch": 0.028,
+      "grad_norm": 0.8276964023163034,
+      "learning_rate": 0.003,
+      "loss": 4.154,
+      "step": 2800
+    },
+    {
+      "epoch": 0.02801,
+      "grad_norm": 0.8829595698308325,
+      "learning_rate": 0.003,
+      "loss": 4.1343,
+      "step": 2801
+    },
+    {
+      "epoch": 0.02802,
+      "grad_norm": 0.9178695537376129,
+      "learning_rate": 0.003,
+      "loss": 4.1357,
+      "step": 2802
+    },
+    {
+      "epoch": 0.02803,
+      "grad_norm": 0.773306657979122,
+      "learning_rate": 0.003,
+      "loss": 4.1566,
+      "step": 2803
+    },
+    {
+      "epoch": 0.02804,
+      "grad_norm": 0.6894758923555496,
+      "learning_rate": 0.003,
+      "loss": 4.1238,
+      "step": 2804
+    },
+    {
+      "epoch": 0.02805,
+      "grad_norm": 0.6917993567356406,
+      "learning_rate": 0.003,
+      "loss": 4.1566,
+      "step": 2805
+    },
+    {
+      "epoch": 0.02806,
+      "grad_norm": 0.6387085680780493,
+      "learning_rate": 0.003,
+      "loss": 4.1312,
+      "step": 2806
+    },
+    {
+      "epoch": 0.02807,
+      "grad_norm": 0.580517980379294,
+      "learning_rate": 0.003,
+      "loss": 4.1301,
+      "step": 2807
+    },
+    {
+      "epoch": 0.02808,
+      "grad_norm": 0.4980072648857084,
+      "learning_rate": 0.003,
+      "loss": 4.1101,
+      "step": 2808
+    },
+    {
+      "epoch": 0.02809,
+      "grad_norm": 0.5292621050422022,
+      "learning_rate": 0.003,
+      "loss": 4.1503,
+      "step": 2809
+    },
+    {
+      "epoch": 0.0281,
+      "grad_norm": 0.5662997833368786,
+      "learning_rate": 0.003,
+      "loss": 4.1045,
+      "step": 2810
+    },
+    {
+      "epoch": 0.02811,
+      "grad_norm": 0.6056304842841205,
+      "learning_rate": 0.003,
+      "loss": 4.1149,
+      "step": 2811
+    },
+    {
+      "epoch": 0.02812,
+      "grad_norm": 0.6510291438212791,
+      "learning_rate": 0.003,
+      "loss": 4.131,
+      "step": 2812
+    },
+    {
+      "epoch": 0.02813,
+      "grad_norm": 0.6856589618404986,
+      "learning_rate": 0.003,
+      "loss": 4.1212,
+      "step": 2813
+    },
+    {
+      "epoch": 0.02814,
+      "grad_norm": 0.7345790506941087,
+      "learning_rate": 0.003,
+      "loss": 4.1221,
+      "step": 2814
+    },
+    {
+      "epoch": 0.02815,
+      "grad_norm": 0.7816359381581716,
+      "learning_rate": 0.003,
+      "loss": 4.1614,
+      "step": 2815
+    },
+    {
+      "epoch": 0.02816,
+      "grad_norm": 0.8925647710841813,
+      "learning_rate": 0.003,
+      "loss": 4.1426,
+      "step": 2816
+    },
+    {
+      "epoch": 0.02817,
+      "grad_norm": 1.169228788902705,
+      "learning_rate": 0.003,
+      "loss": 4.1079,
+      "step": 2817
+    },
+    {
+      "epoch": 0.02818,
+      "grad_norm": 0.9618528564502903,
+      "learning_rate": 0.003,
+      "loss": 4.1194,
+      "step": 2818
+    },
+    {
+      "epoch": 0.02819,
+      "grad_norm": 1.0178604687945154,
+      "learning_rate": 0.003,
+      "loss": 4.15,
+      "step": 2819
+    },
+    {
+      "epoch": 0.0282,
+      "grad_norm": 0.8729007051855019,
+      "learning_rate": 0.003,
+      "loss": 4.1374,
+      "step": 2820
+    },
+    {
+      "epoch": 0.02821,
+      "grad_norm": 0.7594720998624757,
+      "learning_rate": 0.003,
+      "loss": 4.1557,
+      "step": 2821
+    },
+    {
+      "epoch": 0.02822,
+      "grad_norm": 0.7764641169305468,
+      "learning_rate": 0.003,
+      "loss": 4.1245,
+      "step": 2822
+    },
+    {
+      "epoch": 0.02823,
+      "grad_norm": 0.79863443820284,
+      "learning_rate": 0.003,
+      "loss": 4.154,
+      "step": 2823
+    },
+    {
+      "epoch": 0.02824,
+      "grad_norm": 0.7182103089348308,
+      "learning_rate": 0.003,
+      "loss": 4.1387,
+      "step": 2824
+    },
+    {
+      "epoch": 0.02825,
+      "grad_norm": 0.7529407236815215,
+      "learning_rate": 0.003,
+      "loss": 4.1425,
+      "step": 2825
+    },
+    {
+      "epoch": 0.02826,
+      "grad_norm": 0.8056220195019022,
+      "learning_rate": 0.003,
+      "loss": 4.1212,
+      "step": 2826
+    },
+    {
+      "epoch": 0.02827,
+      "grad_norm": 0.8893025905667793,
+      "learning_rate": 0.003,
+      "loss": 4.1113,
+      "step": 2827
+    },
+    {
+      "epoch": 0.02828,
+      "grad_norm": 0.9458155387571202,
+      "learning_rate": 0.003,
+      "loss": 4.1547,
+      "step": 2828
+    },
+    {
+      "epoch": 0.02829,
+      "grad_norm": 1.0816962232960836,
+      "learning_rate": 0.003,
+      "loss": 4.1852,
+      "step": 2829
+    },
+    {
+      "epoch": 0.0283,
+      "grad_norm": 0.85593992308399,
+      "learning_rate": 0.003,
+      "loss": 4.1432,
+      "step": 2830
+    },
+    {
+      "epoch": 0.02831,
+      "grad_norm": 0.8103547032302357,
+      "learning_rate": 0.003,
+      "loss": 4.1446,
+      "step": 2831
+    },
+    {
+      "epoch": 0.02832,
+      "grad_norm": 1.0004568820848851,
+      "learning_rate": 0.003,
+      "loss": 4.1611,
+      "step": 2832
+    },
+    {
+      "epoch": 0.02833,
+      "grad_norm": 1.2257623869393093,
+      "learning_rate": 0.003,
+      "loss": 4.1482,
+      "step": 2833
+    },
+    {
+      "epoch": 0.02834,
+      "grad_norm": 0.8959695287844174,
+      "learning_rate": 0.003,
+      "loss": 4.1264,
+      "step": 2834
+    },
+    {
+      "epoch": 0.02835,
+      "grad_norm": 0.9046726088007764,
+      "learning_rate": 0.003,
+      "loss": 4.1382,
+      "step": 2835
+    },
+    {
+      "epoch": 0.02836,
+      "grad_norm": 0.9388179555413667,
+      "learning_rate": 0.003,
+      "loss": 4.1634,
+      "step": 2836
+    },
+    {
+      "epoch": 0.02837,
+      "grad_norm": 0.8876864866399585,
+      "learning_rate": 0.003,
+      "loss": 4.1312,
+      "step": 2837
+    },
+    {
+      "epoch": 0.02838,
+      "grad_norm": 0.93231141838983,
+      "learning_rate": 0.003,
+      "loss": 4.1029,
+      "step": 2838
+    },
+    {
+      "epoch": 0.02839,
+      "grad_norm": 0.8358991135319658,
+      "learning_rate": 0.003,
+      "loss": 4.0981,
+      "step": 2839
+    },
+    {
+      "epoch": 0.0284,
+      "grad_norm": 0.8834561842613636,
+      "learning_rate": 0.003,
+      "loss": 4.1133,
+      "step": 2840
+    },
+    {
+      "epoch": 0.02841,
+      "grad_norm": 0.9006412651182929,
+      "learning_rate": 0.003,
+      "loss": 4.1522,
+      "step": 2841
+    },
+    {
+      "epoch": 0.02842,
+      "grad_norm": 0.9446862919101724,
+      "learning_rate": 0.003,
+      "loss": 4.1417,
+      "step": 2842
+    },
+    {
+      "epoch": 0.02843,
+      "grad_norm": 0.8098223520337203,
+      "learning_rate": 0.003,
+      "loss": 4.1599,
+      "step": 2843
+    },
+    {
+      "epoch": 0.02844,
+      "grad_norm": 0.7322010149090498,
+      "learning_rate": 0.003,
+      "loss": 4.1529,
+      "step": 2844
+    },
+    {
+      "epoch": 0.02845,
+      "grad_norm": 0.6471809615510085,
+      "learning_rate": 0.003,
+      "loss": 4.126,
+      "step": 2845
+    },
+    {
+      "epoch": 0.02846,
+      "grad_norm": 0.6878791837952257,
+      "learning_rate": 0.003,
+      "loss": 4.1218,
+      "step": 2846
+    },
+    {
+      "epoch": 0.02847,
+      "grad_norm": 0.7696793006326048,
+      "learning_rate": 0.003,
+      "loss": 4.1788,
+      "step": 2847
+    },
+    {
+      "epoch": 0.02848,
+      "grad_norm": 0.8913190904442756,
+      "learning_rate": 0.003,
+      "loss": 4.1422,
+      "step": 2848
+    },
+    {
+      "epoch": 0.02849,
+      "grad_norm": 1.018118471142102,
+      "learning_rate": 0.003,
+      "loss": 4.1327,
+      "step": 2849
+    },
+    {
+      "epoch": 0.0285,
+      "grad_norm": 0.9082110603942541,
+      "learning_rate": 0.003,
+      "loss": 4.1356,
+      "step": 2850
+    },
+    {
+      "epoch": 0.02851,
+      "grad_norm": 0.822250823175019,
+      "learning_rate": 0.003,
+      "loss": 4.1725,
+      "step": 2851
+    },
+    {
+      "epoch": 0.02852,
+      "grad_norm": 0.6052901464954267,
+      "learning_rate": 0.003,
+      "loss": 4.1424,
+      "step": 2852
+    },
+    {
+      "epoch": 0.02853,
+      "grad_norm": 0.5735422812437921,
+      "learning_rate": 0.003,
+      "loss": 4.1065,
+      "step": 2853
+    },
+    {
+      "epoch": 0.02854,
+      "grad_norm": 0.5429930679913405,
+      "learning_rate": 0.003,
+      "loss": 4.1448,
+      "step": 2854
+    },
+    {
+      "epoch": 0.02855,
+      "grad_norm": 0.5339097730360199,
+      "learning_rate": 0.003,
+      "loss": 4.1387,
+      "step": 2855
+    },
+    {
+      "epoch": 0.02856,
+      "grad_norm": 0.5860774205400787,
+      "learning_rate": 0.003,
+      "loss": 4.1428,
+      "step": 2856
+    },
+    {
+      "epoch": 0.02857,
+      "grad_norm": 0.6433890086945638,
+      "learning_rate": 0.003,
+      "loss": 4.1236,
+      "step": 2857
+    },
+    {
+      "epoch": 0.02858,
+      "grad_norm": 0.7879326106492056,
+      "learning_rate": 0.003,
+      "loss": 4.1162,
+      "step": 2858
+    },
+    {
+      "epoch": 0.02859,
+      "grad_norm": 0.8693860304588211,
+      "learning_rate": 0.003,
+      "loss": 4.1274,
+      "step": 2859
+    },
+    {
+      "epoch": 0.0286,
+      "grad_norm": 0.8079095294087945,
+      "learning_rate": 0.003,
+      "loss": 4.1417,
+      "step": 2860
+    },
+    {
+      "epoch": 0.02861,
+      "grad_norm": 0.6656764279644372,
+      "learning_rate": 0.003,
+      "loss": 4.1049,
+      "step": 2861
+    },
+    {
+      "epoch": 0.02862,
+      "grad_norm": 0.6696791424270172,
+      "learning_rate": 0.003,
+      "loss": 4.1232,
+      "step": 2862
+    },
+    {
+      "epoch": 0.02863,
+      "grad_norm": 0.7769070451321298,
+      "learning_rate": 0.003,
+      "loss": 4.1498,
+      "step": 2863
+    },
+    {
+      "epoch": 0.02864,
+      "grad_norm": 0.8224098496945286,
+      "learning_rate": 0.003,
+      "loss": 4.1241,
+      "step": 2864
+    },
+    {
+      "epoch": 0.02865,
+      "grad_norm": 0.8864904617013009,
+      "learning_rate": 0.003,
+      "loss": 4.1583,
+      "step": 2865
+    },
+    {
+      "epoch": 0.02866,
+      "grad_norm": 0.9583148110901643,
+      "learning_rate": 0.003,
+      "loss": 4.1437,
+      "step": 2866
+    },
+    {
+      "epoch": 0.02867,
+      "grad_norm": 0.8904807738138028,
+      "learning_rate": 0.003,
+      "loss": 4.1281,
+      "step": 2867
+    },
+    {
+      "epoch": 0.02868,
+      "grad_norm": 0.9293551806467387,
+      "learning_rate": 0.003,
+      "loss": 4.1391,
+      "step": 2868
+    },
+    {
+      "epoch": 0.02869,
+      "grad_norm": 1.0457657175233683,
+      "learning_rate": 0.003,
+      "loss": 4.1353,
+      "step": 2869
+    },
+    {
+      "epoch": 0.0287,
+      "grad_norm": 1.0953444171616145,
+      "learning_rate": 0.003,
+      "loss": 4.1636,
+      "step": 2870
+    },
+    {
+      "epoch": 0.02871,
+      "grad_norm": 0.7587885970460945,
+      "learning_rate": 0.003,
+      "loss": 4.1356,
+      "step": 2871
+    },
+    {
+      "epoch": 0.02872,
+      "grad_norm": 0.6777165872502906,
+      "learning_rate": 0.003,
+      "loss": 4.1681,
+      "step": 2872
+    },
+    {
+      "epoch": 0.02873,
+      "grad_norm": 0.8924775793037929,
+      "learning_rate": 0.003,
+      "loss": 4.1422,
+      "step": 2873
+    },
+    {
+      "epoch": 0.02874,
+      "grad_norm": 0.9191095226023763,
+      "learning_rate": 0.003,
+      "loss": 4.1633,
+      "step": 2874
+    },
+    {
+      "epoch": 0.02875,
+      "grad_norm": 1.0057201785085328,
+      "learning_rate": 0.003,
+      "loss": 4.1702,
+      "step": 2875
+    },
+    {
+      "epoch": 0.02876,
+      "grad_norm": 0.9084894421524936,
+      "learning_rate": 0.003,
+      "loss": 4.1504,
+      "step": 2876
+    },
+    {
+      "epoch": 0.02877,
+      "grad_norm": 0.9095891721863432,
+      "learning_rate": 0.003,
+      "loss": 4.1305,
+      "step": 2877
+    },
+    {
+      "epoch": 0.02878,
+      "grad_norm": 0.9172527890033496,
+      "learning_rate": 0.003,
+      "loss": 4.1286,
+      "step": 2878
+    },
+    {
+      "epoch": 0.02879,
+      "grad_norm": 0.8805567763002417,
+      "learning_rate": 0.003,
+      "loss": 4.1227,
+      "step": 2879
+    },
+    {
+      "epoch": 0.0288,
+      "grad_norm": 0.9996123727255034,
+      "learning_rate": 0.003,
+      "loss": 4.1332,
+      "step": 2880
+    },
+    {
+      "epoch": 0.02881,
+      "grad_norm": 1.046330847202451,
+      "learning_rate": 0.003,
+      "loss": 4.1514,
+      "step": 2881
+    },
+    {
+      "epoch": 0.02882,
+      "grad_norm": 0.772117255675699,
+      "learning_rate": 0.003,
+      "loss": 4.1606,
+      "step": 2882
+    },
+    {
+      "epoch": 0.02883,
+      "grad_norm": 0.7008407026763371,
+      "learning_rate": 0.003,
+      "loss": 4.1396,
+      "step": 2883
+    },
+    {
+      "epoch": 0.02884,
+      "grad_norm": 0.778782483892839,
+      "learning_rate": 0.003,
+      "loss": 4.177,
+      "step": 2884
+    },
+    {
+      "epoch": 0.02885,
+      "grad_norm": 0.7648524574296616,
+      "learning_rate": 0.003,
+      "loss": 4.1174,
+      "step": 2885
+    },
+    {
+      "epoch": 0.02886,
+      "grad_norm": 0.6944709668733141,
+      "learning_rate": 0.003,
+      "loss": 4.1469,
+      "step": 2886
+    },
+    {
+      "epoch": 0.02887,
+      "grad_norm": 0.6701833801380009,
+      "learning_rate": 0.003,
+      "loss": 4.1291,
+      "step": 2887
+    },
+    {
+      "epoch": 0.02888,
+      "grad_norm": 0.6522446873326756,
+      "learning_rate": 0.003,
+      "loss": 4.1256,
+      "step": 2888
+    },
+    {
+      "epoch": 0.02889,
+      "grad_norm": 0.5695684026681817,
+      "learning_rate": 0.003,
+      "loss": 4.1113,
+      "step": 2889
+    },
+    {
+      "epoch": 0.0289,
+      "grad_norm": 0.5387645552557849,
+      "learning_rate": 0.003,
+      "loss": 4.1434,
+      "step": 2890
+    },
+    {
+      "epoch": 0.02891,
+      "grad_norm": 0.6068073035716243,
+      "learning_rate": 0.003,
+      "loss": 4.1589,
+      "step": 2891
+    },
+    {
+      "epoch": 0.02892,
+      "grad_norm": 0.5642060774681064,
+      "learning_rate": 0.003,
+      "loss": 4.109,
+      "step": 2892
+    },
+    {
+      "epoch": 0.02893,
+      "grad_norm": 0.64295806093738,
+      "learning_rate": 0.003,
+      "loss": 4.1179,
+      "step": 2893
+    },
+    {
+      "epoch": 0.02894,
+      "grad_norm": 0.7755044700890192,
+      "learning_rate": 0.003,
+      "loss": 4.1282,
+      "step": 2894
+    },
+    {
+      "epoch": 0.02895,
+      "grad_norm": 0.7810569175434396,
+      "learning_rate": 0.003,
+      "loss": 4.1719,
+      "step": 2895
+    },
+    {
+      "epoch": 0.02896,
+      "grad_norm": 0.5425405989527838,
+      "learning_rate": 0.003,
+      "loss": 4.1197,
+      "step": 2896
+    },
+    {
+      "epoch": 0.02897,
+      "grad_norm": 0.469070597819537,
+      "learning_rate": 0.003,
+      "loss": 4.0995,
+      "step": 2897
+    },
+    {
+      "epoch": 0.02898,
+      "grad_norm": 0.5139238004799116,
+      "learning_rate": 0.003,
+      "loss": 4.1442,
+      "step": 2898
+    },
+    {
+      "epoch": 0.02899,
+      "grad_norm": 0.6544046694256337,
+      "learning_rate": 0.003,
+      "loss": 4.1296,
+      "step": 2899
+    },
+    {
+      "epoch": 0.029,
+      "grad_norm": 0.7180257646745092,
+      "learning_rate": 0.003,
+      "loss": 4.1219,
+      "step": 2900
+    },
+    {
+      "epoch": 0.02901,
+      "grad_norm": 0.6895214504299034,
+      "learning_rate": 0.003,
+      "loss": 4.1358,
+      "step": 2901
+    },
+    {
+      "epoch": 0.02902,
+      "grad_norm": 0.6936366493807695,
+      "learning_rate": 0.003,
+      "loss": 4.1524,
+      "step": 2902
+    },
+    {
+      "epoch": 0.02903,
+      "grad_norm": 0.7680000086672168,
+      "learning_rate": 0.003,
+      "loss": 4.1137,
+      "step": 2903
+    },
+    {
+      "epoch": 0.02904,
+      "grad_norm": 1.0523944817643214,
+      "learning_rate": 0.003,
+      "loss": 4.1402,
+      "step": 2904
+    },
+    {
+      "epoch": 0.02905,
+      "grad_norm": 1.051471635673892,
+      "learning_rate": 0.003,
+      "loss": 4.1582,
+      "step": 2905
+    },
+    {
+      "epoch": 0.02906,
+      "grad_norm": 0.8511171595587916,
+      "learning_rate": 0.003,
+      "loss": 4.1318,
+      "step": 2906
+    },
+    {
+      "epoch": 0.02907,
+      "grad_norm": 0.8224344496311664,
+      "learning_rate": 0.003,
+      "loss": 4.1093,
+      "step": 2907
+    },
+    {
+      "epoch": 0.02908,
+      "grad_norm": 0.6999459176877906,
+      "learning_rate": 0.003,
+      "loss": 4.1259,
+      "step": 2908
+    },
+    {
+      "epoch": 0.02909,
+      "grad_norm": 0.819486070194941,
+      "learning_rate": 0.003,
+      "loss": 4.1263,
+      "step": 2909
+    },
+    {
+      "epoch": 0.0291,
+      "grad_norm": 1.066905831023775,
+      "learning_rate": 0.003,
+      "loss": 4.1444,
+      "step": 2910
+    },
+    {
+      "epoch": 0.02911,
+      "grad_norm": 0.993942719913462,
+      "learning_rate": 0.003,
+      "loss": 4.1435,
+      "step": 2911
+    },
+    {
+      "epoch": 0.02912,
+      "grad_norm": 0.9395324885369105,
+      "learning_rate": 0.003,
+      "loss": 4.114,
+      "step": 2912
+    },
+    {
+      "epoch": 0.02913,
+      "grad_norm": 0.9563813373046008,
+      "learning_rate": 0.003,
+      "loss": 4.1513,
+      "step": 2913
+    },
+    {
+      "epoch": 0.02914,
+      "grad_norm": 1.204534731933291,
+      "learning_rate": 0.003,
+      "loss": 4.1464,
+      "step": 2914
+    },
+    {
+      "epoch": 0.02915,
+      "grad_norm": 0.9555386893347948,
+      "learning_rate": 0.003,
+      "loss": 4.1331,
+      "step": 2915
+    },
+    {
+      "epoch": 0.02916,
+      "grad_norm": 1.0824011254780639,
+      "learning_rate": 0.003,
+      "loss": 4.1482,
+      "step": 2916
+    },
+    {
+      "epoch": 0.02917,
+      "grad_norm": 1.0942512414631864,
+      "learning_rate": 0.003,
+      "loss": 4.1821,
+      "step": 2917
+    },
+    {
+      "epoch": 0.02918,
+      "grad_norm": 0.9071341774299415,
+      "learning_rate": 0.003,
+      "loss": 4.1353,
+      "step": 2918
+    },
+    {
+      "epoch": 0.02919,
+      "grad_norm": 0.9246562992704038,
+      "learning_rate": 0.003,
+      "loss": 4.1768,
+      "step": 2919
+    },
+    {
+      "epoch": 0.0292,
+      "grad_norm": 0.8971677038672106,
+      "learning_rate": 0.003,
+      "loss": 4.1129,
+      "step": 2920
+    },
+    {
+      "epoch": 0.02921,
+      "grad_norm": 0.926961421602634,
+      "learning_rate": 0.003,
+      "loss": 4.1553,
+      "step": 2921
+    },
+    {
+      "epoch": 0.02922,
+      "grad_norm": 1.0848027072453201,
+      "learning_rate": 0.003,
+      "loss": 4.161,
+      "step": 2922
+    },
+    {
+      "epoch": 0.02923,
+      "grad_norm": 0.912142779077174,
+      "learning_rate": 0.003,
+      "loss": 4.1303,
+      "step": 2923
+    },
+    {
+      "epoch": 0.02924,
+      "grad_norm": 0.9865161740552381,
+      "learning_rate": 0.003,
+      "loss": 4.1648,
+      "step": 2924
+    },
+    {
+      "epoch": 0.02925,
+      "grad_norm": 1.0463105360491507,
+      "learning_rate": 0.003,
+      "loss": 4.1748,
+      "step": 2925
+    },
+    {
+      "epoch": 0.02926,
+      "grad_norm": 0.9582122548230526,
+      "learning_rate": 0.003,
+      "loss": 4.1501,
+      "step": 2926
+    },
+    {
+      "epoch": 0.02927,
+      "grad_norm": 0.8403311811917105,
+      "learning_rate": 0.003,
+      "loss": 4.1524,
+      "step": 2927
+    },
+    {
+      "epoch": 0.02928,
+      "grad_norm": 0.7854536393801775,
+      "learning_rate": 0.003,
+      "loss": 4.1695,
+      "step": 2928
+    },
+    {
+      "epoch": 0.02929,
+      "grad_norm": 0.7778022366151639,
+      "learning_rate": 0.003,
+      "loss": 4.1542,
+      "step": 2929
+    },
+    {
+      "epoch": 0.0293,
+      "grad_norm": 0.7271690214257167,
+      "learning_rate": 0.003,
+      "loss": 4.1453,
+      "step": 2930
+    },
+    {
+      "epoch": 0.02931,
+      "grad_norm": 0.782379273286375,
+      "learning_rate": 0.003,
+      "loss": 4.1435,
+      "step": 2931
+    },
+    {
+      "epoch": 0.02932,
+      "grad_norm": 0.7974568484352181,
+      "learning_rate": 0.003,
+      "loss": 4.1304,
+      "step": 2932
+    },
+    {
+      "epoch": 0.02933,
+      "grad_norm": 0.7828005432560315,
+      "learning_rate": 0.003,
+      "loss": 4.1554,
+      "step": 2933
+    },
+    {
+      "epoch": 0.02934,
+      "grad_norm": 0.6614858211514127,
+      "learning_rate": 0.003,
+      "loss": 4.14,
+      "step": 2934
+    },
+    {
+      "epoch": 0.02935,
+      "grad_norm": 0.5873856004708764,
+      "learning_rate": 0.003,
+      "loss": 4.1239,
+      "step": 2935
+    },
+    {
+      "epoch": 0.02936,
+      "grad_norm": 0.578823462032298,
+      "learning_rate": 0.003,
+      "loss": 4.1409,
+      "step": 2936
+    },
+    {
+      "epoch": 0.02937,
+      "grad_norm": 0.6078930790219389,
+      "learning_rate": 0.003,
+      "loss": 4.1209,
+      "step": 2937
+    },
+    {
+      "epoch": 0.02938,
+      "grad_norm": 0.7146509387847667,
+      "learning_rate": 0.003,
+      "loss": 4.139,
+      "step": 2938
+    },
+    {
+      "epoch": 0.02939,
+      "grad_norm": 0.8250738871743942,
+      "learning_rate": 0.003,
+      "loss": 4.1295,
+      "step": 2939
+    },
+    {
+      "epoch": 0.0294,
+      "grad_norm": 0.8696648935923786,
+      "learning_rate": 0.003,
+      "loss": 4.1547,
+      "step": 2940
+    },
+    {
+      "epoch": 0.02941,
+      "grad_norm": 0.9090371360813282,
+      "learning_rate": 0.003,
+      "loss": 4.1568,
+      "step": 2941
+    },
+    {
+      "epoch": 0.02942,
+      "grad_norm": 0.8939350957264195,
+      "learning_rate": 0.003,
+      "loss": 4.1447,
+      "step": 2942
+    },
+    {
+      "epoch": 0.02943,
+      "grad_norm": 0.7023666348770743,
+      "learning_rate": 0.003,
+      "loss": 4.1276,
+      "step": 2943
+    },
+    {
+      "epoch": 0.02944,
+      "grad_norm": 0.7067049448574164,
+      "learning_rate": 0.003,
+      "loss": 4.1465,
+      "step": 2944
+    },
+    {
+      "epoch": 0.02945,
+      "grad_norm": 0.6812738232543556,
+      "learning_rate": 0.003,
+      "loss": 4.1255,
+      "step": 2945
+    },
+    {
+      "epoch": 0.02946,
+      "grad_norm": 0.6818360632430761,
+      "learning_rate": 0.003,
+      "loss": 4.1214,
+      "step": 2946
+    },
+    {
+      "epoch": 0.02947,
+      "grad_norm": 0.6754875767553201,
+      "learning_rate": 0.003,
+      "loss": 4.1523,
+      "step": 2947
+    },
+    {
+      "epoch": 0.02948,
+      "grad_norm": 0.6113651211997226,
+      "learning_rate": 0.003,
+      "loss": 4.1317,
+      "step": 2948
+    },
+    {
+      "epoch": 0.02949,
+      "grad_norm": 0.5812056588270224,
+      "learning_rate": 0.003,
+      "loss": 4.1034,
+      "step": 2949
+    },
+    {
+      "epoch": 0.0295,
+      "grad_norm": 0.6282215017847426,
+      "learning_rate": 0.003,
+      "loss": 4.1274,
+      "step": 2950
+    },
+    {
+      "epoch": 0.02951,
+      "grad_norm": 0.7817687295389203,
+      "learning_rate": 0.003,
+      "loss": 4.0846,
+      "step": 2951
+    },
+    {
+      "epoch": 0.02952,
+      "grad_norm": 1.0587479164041036,
+      "learning_rate": 0.003,
+      "loss": 4.1248,
+      "step": 2952
+    },
+    {
+      "epoch": 0.02953,
+      "grad_norm": 0.9887356887399129,
+      "learning_rate": 0.003,
+      "loss": 4.1373,
+      "step": 2953
+    },
+    {
+      "epoch": 0.02954,
+      "grad_norm": 0.7805100959975674,
+      "learning_rate": 0.003,
+      "loss": 4.1688,
+      "step": 2954
+    },
+    {
+      "epoch": 0.02955,
+      "grad_norm": 0.6651525603429118,
+      "learning_rate": 0.003,
+      "loss": 4.138,
+      "step": 2955
+    },
+    {
+      "epoch": 0.02956,
+      "grad_norm": 0.7699019886627679,
+      "learning_rate": 0.003,
+      "loss": 4.1312,
+      "step": 2956
+    },
+    {
+      "epoch": 0.02957,
+      "grad_norm": 0.816443587627682,
+      "learning_rate": 0.003,
+      "loss": 4.1404,
+      "step": 2957
+    },
+    {
+      "epoch": 0.02958,
+      "grad_norm": 0.7961877462112285,
+      "learning_rate": 0.003,
+      "loss": 4.1553,
+      "step": 2958
+    },
+    {
+      "epoch": 0.02959,
+      "grad_norm": 0.7550243768723249,
+      "learning_rate": 0.003,
+      "loss": 4.1431,
+      "step": 2959
+    },
+    {
+      "epoch": 0.0296,
+      "grad_norm": 0.7723125453491877,
+      "learning_rate": 0.003,
+      "loss": 4.1525,
+      "step": 2960
+    },
+    {
+      "epoch": 0.02961,
+      "grad_norm": 0.8583607117371619,
+      "learning_rate": 0.003,
+      "loss": 4.127,
+      "step": 2961
+    },
+    {
+      "epoch": 0.02962,
+      "grad_norm": 0.8691287903706768,
+      "learning_rate": 0.003,
+      "loss": 4.1333,
+      "step": 2962
+    },
+    {
+      "epoch": 0.02963,
+      "grad_norm": 0.8786112930172387,
+      "learning_rate": 0.003,
+      "loss": 4.1116,
+      "step": 2963
+    },
+    {
+      "epoch": 0.02964,
+      "grad_norm": 0.9167891542829973,
+      "learning_rate": 0.003,
+      "loss": 4.1027,
+      "step": 2964
+    },
+    {
+      "epoch": 0.02965,
+      "grad_norm": 0.9419888897278245,
+      "learning_rate": 0.003,
+      "loss": 4.1533,
+      "step": 2965
+    },
+    {
+      "epoch": 0.02966,
+      "grad_norm": 1.1891655776183911,
+      "learning_rate": 0.003,
+      "loss": 4.1783,
+      "step": 2966
+    },
+    {
+      "epoch": 0.02967,
+      "grad_norm": 1.0455487688038185,
+      "learning_rate": 0.003,
+      "loss": 4.1592,
+      "step": 2967
+    },
+    {
+      "epoch": 0.02968,
+      "grad_norm": 1.0164630073257412,
+      "learning_rate": 0.003,
+      "loss": 4.155,
+      "step": 2968
+    },
+    {
+      "epoch": 0.02969,
+      "grad_norm": 1.0185954831664046,
+      "learning_rate": 0.003,
+      "loss": 4.1548,
+      "step": 2969
+    },
+    {
+      "epoch": 0.0297,
+      "grad_norm": 1.043969964180144,
+      "learning_rate": 0.003,
+      "loss": 4.1421,
+      "step": 2970
+    },
+    {
+      "epoch": 0.02971,
+      "grad_norm": 0.8397020648022002,
+      "learning_rate": 0.003,
+      "loss": 4.1551,
+      "step": 2971
+    },
+    {
+      "epoch": 0.02972,
+      "grad_norm": 0.6978014599627598,
+      "learning_rate": 0.003,
+      "loss": 4.1188,
+      "step": 2972
+    },
+    {
+      "epoch": 0.02973,
+      "grad_norm": 0.7307935837042818,
+      "learning_rate": 0.003,
+      "loss": 4.1067,
+      "step": 2973
+    },
+    {
+      "epoch": 0.02974,
+      "grad_norm": 0.8618322772785041,
+      "learning_rate": 0.003,
+      "loss": 4.1462,
+      "step": 2974
+    },
+    {
+      "epoch": 0.02975,
+      "grad_norm": 0.9050029913314735,
+      "learning_rate": 0.003,
+      "loss": 4.1377,
+      "step": 2975
+    },
+    {
+      "epoch": 0.02976,
+      "grad_norm": 0.8860984382067472,
+      "learning_rate": 0.003,
+      "loss": 4.143,
+      "step": 2976
+    },
+    {
+      "epoch": 0.02977,
+      "grad_norm": 0.6971184630795387,
+      "learning_rate": 0.003,
+      "loss": 4.1269,
+      "step": 2977
+    },
+    {
+      "epoch": 0.02978,
+      "grad_norm": 0.7418889082376986,
+      "learning_rate": 0.003,
+      "loss": 4.1484,
+      "step": 2978
+    },
+    {
+      "epoch": 0.02979,
+      "grad_norm": 0.7888236204090991,
+      "learning_rate": 0.003,
+      "loss": 4.133,
+      "step": 2979
+    },
+    {
+      "epoch": 0.0298,
+      "grad_norm": 0.7867568676249742,
+      "learning_rate": 0.003,
+      "loss": 4.1354,
+      "step": 2980
+    },
+    {
+      "epoch": 0.02981,
+      "grad_norm": 0.7844697697074997,
+      "learning_rate": 0.003,
+      "loss": 4.1438,
+      "step": 2981
+    },
+    {
+      "epoch": 0.02982,
+      "grad_norm": 0.6873427232122642,
+      "learning_rate": 0.003,
+      "loss": 4.1364,
+      "step": 2982
+    },
+    {
+      "epoch": 0.02983,
+      "grad_norm": 0.7086227659108,
+      "learning_rate": 0.003,
+      "loss": 4.1508,
+      "step": 2983
+    },
+    {
+      "epoch": 0.02984,
+      "grad_norm": 0.7859850298348623,
+      "learning_rate": 0.003,
+      "loss": 4.1284,
+      "step": 2984
+    },
+    {
+      "epoch": 0.02985,
+      "grad_norm": 0.7729797989684547,
+      "learning_rate": 0.003,
+      "loss": 4.1572,
+      "step": 2985
+    },
+    {
+      "epoch": 0.02986,
+      "grad_norm": 0.6844032687054661,
+      "learning_rate": 0.003,
+      "loss": 4.1183,
+      "step": 2986
+    },
+    {
+      "epoch": 0.02987,
+      "grad_norm": 0.5762042827564682,
+      "learning_rate": 0.003,
+      "loss": 4.1051,
+      "step": 2987
+    },
+    {
+      "epoch": 0.02988,
+      "grad_norm": 0.5620617182133862,
+      "learning_rate": 0.003,
+      "loss": 4.1187,
+      "step": 2988
+    },
+    {
+      "epoch": 0.02989,
+      "grad_norm": 0.5894029162100797,
+      "learning_rate": 0.003,
+      "loss": 4.132,
+      "step": 2989
+    },
+    {
+      "epoch": 0.0299,
+      "grad_norm": 0.5837394519047691,
+      "learning_rate": 0.003,
+      "loss": 4.0884,
+      "step": 2990
+    },
+    {
+      "epoch": 0.02991,
+      "grad_norm": 0.5606386580984015,
+      "learning_rate": 0.003,
+      "loss": 4.1426,
+      "step": 2991
+    },
+    {
+      "epoch": 0.02992,
+      "grad_norm": 0.6147897819301783,
+      "learning_rate": 0.003,
+      "loss": 4.1358,
+      "step": 2992
+    },
+    {
+      "epoch": 0.02993,
+      "grad_norm": 0.6621349604754609,
+      "learning_rate": 0.003,
+      "loss": 4.1071,
+      "step": 2993
+    },
+    {
+      "epoch": 0.02994,
+      "grad_norm": 0.8082428932736804,
+      "learning_rate": 0.003,
+      "loss": 4.1211,
+      "step": 2994
+    },
+    {
+      "epoch": 0.02995,
+      "grad_norm": 0.9615508716688156,
+      "learning_rate": 0.003,
+      "loss": 4.1639,
+      "step": 2995
+    },
+    {
+      "epoch": 0.02996,
+      "grad_norm": 0.9867828831321791,
+      "learning_rate": 0.003,
+      "loss": 4.1577,
+      "step": 2996
+    },
+    {
+      "epoch": 0.02997,
+      "grad_norm": 0.6984846126670015,
+      "learning_rate": 0.003,
+      "loss": 4.112,
+      "step": 2997
+    },
+    {
+      "epoch": 0.02998,
+      "grad_norm": 0.7004936715134856,
+      "learning_rate": 0.003,
+      "loss": 4.1088,
+      "step": 2998
+    },
+    {
+      "epoch": 0.02999,
+      "grad_norm": 0.8087922821030329,
+      "learning_rate": 0.003,
+      "loss": 4.106,
+      "step": 2999
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 0.8187211297372818,
+      "learning_rate": 0.003,
+      "loss": 4.1524,
+      "step": 3000
+    },
+    {
+      "epoch": 0.03001,
+      "grad_norm": 0.7682209061752201,
+      "learning_rate": 0.003,
+      "loss": 4.1192,
+      "step": 3001
+    },
+    {
+      "epoch": 0.03002,
+      "grad_norm": 0.7895156191994268,
+      "learning_rate": 0.003,
+      "loss": 4.1308,
+      "step": 3002
+    },
+    {
+      "epoch": 0.03003,
+      "grad_norm": 0.9473169165490495,
+      "learning_rate": 0.003,
+      "loss": 4.1162,
+      "step": 3003
+    },
+    {
+      "epoch": 0.03004,
+      "grad_norm": 0.9772459835659143,
+      "learning_rate": 0.003,
+      "loss": 4.0967,
+      "step": 3004
+    },
+    {
+      "epoch": 0.03005,
+      "grad_norm": 0.9271870841526649,
+      "learning_rate": 0.003,
+      "loss": 4.1515,
+      "step": 3005
+    },
+    {
+      "epoch": 0.03006,
+      "grad_norm": 0.81719364295006,
+      "learning_rate": 0.003,
+      "loss": 4.1395,
+      "step": 3006
+    },
+    {
+      "epoch": 0.03007,
+      "grad_norm": 0.8963225985425357,
+      "learning_rate": 0.003,
+      "loss": 4.1152,
+      "step": 3007
+    },
+    {
+      "epoch": 0.03008,
+      "grad_norm": 0.7818424581508167,
+      "learning_rate": 0.003,
+      "loss": 4.1409,
+      "step": 3008
+    },
+    {
+      "epoch": 0.03009,
+      "grad_norm": 0.8175800868725163,
+      "learning_rate": 0.003,
+      "loss": 4.1174,
+      "step": 3009
+    },
+    {
+      "epoch": 0.0301,
+      "grad_norm": 0.8915634122617913,
+      "learning_rate": 0.003,
+      "loss": 4.1397,
+      "step": 3010
+    },
+    {
+      "epoch": 0.03011,
+      "grad_norm": 0.9916010748888695,
+      "learning_rate": 0.003,
+      "loss": 4.1279,
+      "step": 3011
+    },
+    {
+      "epoch": 0.03012,
+      "grad_norm": 0.9009840506519443,
+      "learning_rate": 0.003,
+      "loss": 4.1303,
+      "step": 3012
+    },
+    {
+      "epoch": 0.03013,
+      "grad_norm": 0.8990400357883044,
+      "learning_rate": 0.003,
+      "loss": 4.0956,
+      "step": 3013
+    },
+    {
+      "epoch": 0.03014,
+      "grad_norm": 0.8701803132962008,
+      "learning_rate": 0.003,
+      "loss": 4.1241,
+      "step": 3014
+    },
+    {
+      "epoch": 0.03015,
+      "grad_norm": 0.9059467708321113,
+      "learning_rate": 0.003,
+      "loss": 4.1246,
+      "step": 3015
+    },
+    {
+      "epoch": 0.03016,
+      "grad_norm": 0.8139710481414894,
+      "learning_rate": 0.003,
+      "loss": 4.1273,
+      "step": 3016
+    },
+    {
+      "epoch": 0.03017,
+      "grad_norm": 0.9003169401719128,
+      "learning_rate": 0.003,
+      "loss": 4.1309,
+      "step": 3017
+    },
+    {
+      "epoch": 0.03018,
+      "grad_norm": 0.9148332662761703,
+      "learning_rate": 0.003,
+      "loss": 4.1282,
+      "step": 3018
+    },
+    {
+      "epoch": 0.03019,
+      "grad_norm": 0.8097272342713991,
+      "learning_rate": 0.003,
+      "loss": 4.1396,
+      "step": 3019
+    },
+    {
+      "epoch": 0.0302,
+      "grad_norm": 0.8482835840133595,
+      "learning_rate": 0.003,
+      "loss": 4.1312,
+      "step": 3020
+    },
+    {
+      "epoch": 0.03021,
+      "grad_norm": 0.7875221358593745,
+      "learning_rate": 0.003,
+      "loss": 4.1522,
+      "step": 3021
+    },
+    {
+      "epoch": 0.03022,
+      "grad_norm": 0.792143621479002,
+      "learning_rate": 0.003,
+      "loss": 4.1508,
+      "step": 3022
+    },
+    {
+      "epoch": 0.03023,
+      "grad_norm": 0.8897124061645492,
+      "learning_rate": 0.003,
+      "loss": 4.1675,
+      "step": 3023
+    },
+    {
+      "epoch": 0.03024,
+      "grad_norm": 1.016846130536088,
+      "learning_rate": 0.003,
+      "loss": 4.1643,
+      "step": 3024
+    },
+    {
+      "epoch": 0.03025,
+      "grad_norm": 0.9813514493806819,
+      "learning_rate": 0.003,
+      "loss": 4.1447,
+      "step": 3025
+    },
+    {
+      "epoch": 0.03026,
+      "grad_norm": 0.9856440178637326,
+      "learning_rate": 0.003,
+      "loss": 4.119,
+      "step": 3026
+    },
+    {
+      "epoch": 0.03027,
+      "grad_norm": 1.0170462610037305,
+      "learning_rate": 0.003,
+      "loss": 4.1467,
+      "step": 3027
+    },
+    {
+      "epoch": 0.03028,
+      "grad_norm": 0.9963929882829512,
+      "learning_rate": 0.003,
+      "loss": 4.1061,
+      "step": 3028
+    },
+    {
+      "epoch": 0.03029,
+      "grad_norm": 1.1230470374601593,
+      "learning_rate": 0.003,
+      "loss": 4.1539,
+      "step": 3029
+    },
+    {
+      "epoch": 0.0303,
+      "grad_norm": 0.9952469868078255,
+      "learning_rate": 0.003,
+      "loss": 4.1371,
+      "step": 3030
+    },
+    {
+      "epoch": 0.03031,
+      "grad_norm": 0.7974194139158707,
+      "learning_rate": 0.003,
+      "loss": 4.1117,
+      "step": 3031
+    },
+    {
+      "epoch": 0.03032,
+      "grad_norm": 0.813594268367442,
+      "learning_rate": 0.003,
+      "loss": 4.1409,
+      "step": 3032
+    },
+    {
+      "epoch": 0.03033,
+      "grad_norm": 0.8000080591024091,
+      "learning_rate": 0.003,
+      "loss": 4.1323,
+      "step": 3033
+    },
+    {
+      "epoch": 0.03034,
+      "grad_norm": 0.8702456358651927,
+      "learning_rate": 0.003,
+      "loss": 4.1388,
+      "step": 3034
+    },
+    {
+      "epoch": 0.03035,
+      "grad_norm": 0.972910117224573,
+      "learning_rate": 0.003,
+      "loss": 4.1701,
+      "step": 3035
+    },
+    {
+      "epoch": 0.03036,
+      "grad_norm": 0.946589194340261,
+      "learning_rate": 0.003,
+      "loss": 4.1449,
+      "step": 3036
+    },
+    {
+      "epoch": 0.03037,
+      "grad_norm": 1.0013129497511943,
+      "learning_rate": 0.003,
+      "loss": 4.13,
+      "step": 3037
+    },
+    {
+      "epoch": 0.03038,
+      "grad_norm": 1.0504414618218458,
+      "learning_rate": 0.003,
+      "loss": 4.1374,
+      "step": 3038
+    },
+    {
+      "epoch": 0.03039,
+      "grad_norm": 0.9552159328342077,
+      "learning_rate": 0.003,
+      "loss": 4.1371,
+      "step": 3039
+    },
+    {
+      "epoch": 0.0304,
+      "grad_norm": 0.8125546290590606,
+      "learning_rate": 0.003,
+      "loss": 4.1406,
+      "step": 3040
+    },
+    {
+      "epoch": 0.03041,
+      "grad_norm": 0.7829978292483236,
+      "learning_rate": 0.003,
+      "loss": 4.142,
+      "step": 3041
+    },
+    {
+      "epoch": 0.03042,
+      "grad_norm": 0.7611930258666472,
+      "learning_rate": 0.003,
+      "loss": 4.1671,
+      "step": 3042
+    },
+    {
+      "epoch": 0.03043,
+      "grad_norm": 0.7149717720566953,
+      "learning_rate": 0.003,
+      "loss": 4.118,
+      "step": 3043
+    },
+    {
+      "epoch": 0.03044,
+      "grad_norm": 0.7624230406515938,
+      "learning_rate": 0.003,
+      "loss": 4.12,
+      "step": 3044
+    },
+    {
+      "epoch": 0.03045,
+      "grad_norm": 0.8408403467918055,
+      "learning_rate": 0.003,
+      "loss": 4.1235,
+      "step": 3045
+    },
+    {
+      "epoch": 0.03046,
+      "grad_norm": 0.7851334779365589,
+      "learning_rate": 0.003,
+      "loss": 4.1328,
+      "step": 3046
+    },
+    {
+      "epoch": 0.03047,
+      "grad_norm": 0.7122809857507961,
+      "learning_rate": 0.003,
+      "loss": 4.1374,
+      "step": 3047
+    },
+    {
+      "epoch": 0.03048,
+      "grad_norm": 0.7964799855759337,
+      "learning_rate": 0.003,
+      "loss": 4.1208,
+      "step": 3048
+    },
+    {
+      "epoch": 0.03049,
+      "grad_norm": 0.887239863638981,
+      "learning_rate": 0.003,
+      "loss": 4.1105,
+      "step": 3049
+    },
+    {
+      "epoch": 0.0305,
+      "grad_norm": 0.9999215882595172,
+      "learning_rate": 0.003,
+      "loss": 4.1315,
+      "step": 3050
+    },
+    {
+      "epoch": 0.03051,
+      "grad_norm": 1.0055098406963119,
+      "learning_rate": 0.003,
+      "loss": 4.1455,
+      "step": 3051
+    },
+    {
+      "epoch": 0.03052,
+      "grad_norm": 0.9173091708203194,
+      "learning_rate": 0.003,
+      "loss": 4.146,
+      "step": 3052
+    },
+    {
+      "epoch": 0.03053,
+      "grad_norm": 0.8629407073466706,
+      "learning_rate": 0.003,
+      "loss": 4.1506,
+      "step": 3053
+    },
+    {
+      "epoch": 0.03054,
+      "grad_norm": 0.7744384974895352,
+      "learning_rate": 0.003,
+      "loss": 4.1267,
+      "step": 3054
+    },
+    {
+      "epoch": 0.03055,
+      "grad_norm": 0.6670797287560947,
+      "learning_rate": 0.003,
+      "loss": 4.1245,
+      "step": 3055
+    },
+    {
+      "epoch": 0.03056,
+      "grad_norm": 0.658467407296622,
+      "learning_rate": 0.003,
+      "loss": 4.134,
+      "step": 3056
+    },
+    {
+      "epoch": 0.03057,
+      "grad_norm": 0.6556069512553365,
+      "learning_rate": 0.003,
+      "loss": 4.1326,
+      "step": 3057
+    },
+    {
+      "epoch": 0.03058,
+      "grad_norm": 0.5717711332151091,
+      "learning_rate": 0.003,
+      "loss": 4.1029,
+      "step": 3058
+    },
+    {
+      "epoch": 0.03059,
+      "grad_norm": 0.493409526384602,
+      "learning_rate": 0.003,
+      "loss": 4.1103,
+      "step": 3059
+    },
+    {
+      "epoch": 0.0306,
+      "grad_norm": 0.5082424754021986,
+      "learning_rate": 0.003,
+      "loss": 4.0984,
+      "step": 3060
+    },
+    {
+      "epoch": 0.03061,
+      "grad_norm": 0.4898096783004193,
+      "learning_rate": 0.003,
+      "loss": 4.1212,
+      "step": 3061
+    },
+    {
+      "epoch": 0.03062,
+      "grad_norm": 0.5061719030039263,
+      "learning_rate": 0.003,
+      "loss": 4.144,
+      "step": 3062
+    },
+    {
+      "epoch": 0.03063,
+      "grad_norm": 0.6337804215976139,
+      "learning_rate": 0.003,
+      "loss": 4.0908,
+      "step": 3063
+    },
+    {
+      "epoch": 0.03064,
+      "grad_norm": 0.7363596095895648,
+      "learning_rate": 0.003,
+      "loss": 4.1252,
+      "step": 3064
+    },
+    {
+      "epoch": 0.03065,
+      "grad_norm": 0.7804431388454032,
+      "learning_rate": 0.003,
+      "loss": 4.0953,
+      "step": 3065
+    },
+    {
+      "epoch": 0.03066,
+      "grad_norm": 0.8048441855814717,
+      "learning_rate": 0.003,
+      "loss": 4.14,
+      "step": 3066
+    },
+    {
+      "epoch": 0.03067,
+      "grad_norm": 0.936660465754291,
+      "learning_rate": 0.003,
+      "loss": 4.1504,
+      "step": 3067
+    },
+    {
+      "epoch": 0.03068,
+      "grad_norm": 1.199111088765608,
+      "learning_rate": 0.003,
+      "loss": 4.1327,
+      "step": 3068
+    },
+    {
+      "epoch": 0.03069,
+      "grad_norm": 1.024080179977825,
+      "learning_rate": 0.003,
+      "loss": 4.1138,
+      "step": 3069
+    },
+    {
+      "epoch": 0.0307,
+      "grad_norm": 0.9087822982161698,
+      "learning_rate": 0.003,
+      "loss": 4.125,
+      "step": 3070
+    },
+    {
+      "epoch": 0.03071,
+      "grad_norm": 0.773169782429931,
+      "learning_rate": 0.003,
+      "loss": 4.121,
+      "step": 3071
+    },
+    {
+      "epoch": 0.03072,
+      "grad_norm": 0.7385627562002322,
+      "learning_rate": 0.003,
+      "loss": 4.1224,
+      "step": 3072
+    },
+    {
+      "epoch": 0.03073,
+      "grad_norm": 0.8590738611383513,
+      "learning_rate": 0.003,
+      "loss": 4.1272,
+      "step": 3073
+    },
+    {
+      "epoch": 0.03074,
+      "grad_norm": 0.7460812381201545,
+      "learning_rate": 0.003,
+      "loss": 4.1228,
+      "step": 3074
+    },
+    {
+      "epoch": 0.03075,
+      "grad_norm": 0.9060557817564706,
+      "learning_rate": 0.003,
+      "loss": 4.1505,
+      "step": 3075
+    },
+    {
+      "epoch": 0.03076,
+      "grad_norm": 0.9352871058483179,
+      "learning_rate": 0.003,
+      "loss": 4.1495,
+      "step": 3076
+    },
+    {
+      "epoch": 0.03077,
+      "grad_norm": 0.8648002758853153,
+      "learning_rate": 0.003,
+      "loss": 4.1446,
+      "step": 3077
+    },
+    {
+      "epoch": 0.03078,
+      "grad_norm": 0.7596804078061197,
+      "learning_rate": 0.003,
+      "loss": 4.1244,
+      "step": 3078
+    },
+    {
+      "epoch": 0.03079,
+      "grad_norm": 0.7757175523238533,
+      "learning_rate": 0.003,
+      "loss": 4.1133,
+      "step": 3079
+    },
+    {
+      "epoch": 0.0308,
+      "grad_norm": 0.8209364936522558,
+      "learning_rate": 0.003,
+      "loss": 4.1425,
+      "step": 3080
+    },
+    {
+      "epoch": 0.03081,
+      "grad_norm": 0.8477812326348159,
+      "learning_rate": 0.003,
+      "loss": 4.1152,
+      "step": 3081
+    },
+    {
+      "epoch": 0.03082,
+      "grad_norm": 0.8013577673715985,
+      "learning_rate": 0.003,
+      "loss": 4.1191,
+      "step": 3082
+    },
+    {
+      "epoch": 0.03083,
+      "grad_norm": 0.7134216891930999,
+      "learning_rate": 0.003,
+      "loss": 4.1183,
+      "step": 3083
+    },
+    {
+      "epoch": 0.03084,
+      "grad_norm": 0.7257190995117091,
+      "learning_rate": 0.003,
+      "loss": 4.1299,
+      "step": 3084
+    },
+    {
+      "epoch": 0.03085,
+      "grad_norm": 0.8088783726503086,
+      "learning_rate": 0.003,
+      "loss": 4.1259,
+      "step": 3085
+    },
+    {
+      "epoch": 0.03086,
+      "grad_norm": 0.8932889669749948,
+      "learning_rate": 0.003,
+      "loss": 4.1502,
+      "step": 3086
+    },
+    {
+      "epoch": 0.03087,
+      "grad_norm": 0.9271283171010463,
+      "learning_rate": 0.003,
+      "loss": 4.1256,
+      "step": 3087
+    },
+    {
+      "epoch": 0.03088,
+      "grad_norm": 1.0610338130344832,
+      "learning_rate": 0.003,
+      "loss": 4.1534,
+      "step": 3088
+    },
+    {
+      "epoch": 0.03089,
+      "grad_norm": 0.9082960513513103,
+      "learning_rate": 0.003,
+      "loss": 4.1137,
+      "step": 3089
+    },
+    {
+      "epoch": 0.0309,
+      "grad_norm": 0.8382419044332593,
+      "learning_rate": 0.003,
+      "loss": 4.1152,
+      "step": 3090
+    },
+    {
+      "epoch": 0.03091,
+      "grad_norm": 0.7721347908154246,
+      "learning_rate": 0.003,
+      "loss": 4.0786,
+      "step": 3091
+    },
+    {
+      "epoch": 0.03092,
+      "grad_norm": 0.5770289909808661,
+      "learning_rate": 0.003,
+      "loss": 4.0988,
+      "step": 3092
+    },
+    {
+      "epoch": 0.03093,
+      "grad_norm": 0.6048583748826258,
+      "learning_rate": 0.003,
+      "loss": 4.1335,
+      "step": 3093
+    },
+    {
+      "epoch": 0.03094,
+      "grad_norm": 0.5039228690347607,
+      "learning_rate": 0.003,
+      "loss": 4.1124,
+      "step": 3094
+    },
+    {
+      "epoch": 0.03095,
+      "grad_norm": 0.4932619648402882,
+      "learning_rate": 0.003,
+      "loss": 4.1254,
+      "step": 3095
+    },
+    {
+      "epoch": 0.03096,
+      "grad_norm": 0.44699630710548827,
+      "learning_rate": 0.003,
+      "loss": 4.1011,
+      "step": 3096
+    },
+    {
+      "epoch": 0.03097,
+      "grad_norm": 0.42800774052172613,
+      "learning_rate": 0.003,
+      "loss": 4.1233,
+      "step": 3097
+    },
+    {
+      "epoch": 0.03098,
+      "grad_norm": 0.5150974229730493,
+      "learning_rate": 0.003,
+      "loss": 4.1199,
+      "step": 3098
+    },
+    {
+      "epoch": 0.03099,
+      "grad_norm": 0.6808104155412404,
+      "learning_rate": 0.003,
+      "loss": 4.1152,
+      "step": 3099
+    },
+    {
+      "epoch": 0.031,
+      "grad_norm": 1.1124941338393186,
+      "learning_rate": 0.003,
+      "loss": 4.1238,
+      "step": 3100
+    },
+    {
+      "epoch": 0.03101,
+      "grad_norm": 1.0220222334230853,
+      "learning_rate": 0.003,
+      "loss": 4.1232,
+      "step": 3101
+    },
+    {
+      "epoch": 0.03102,
+      "grad_norm": 0.6635321325690858,
+      "learning_rate": 0.003,
+      "loss": 4.1121,
+      "step": 3102
+    },
+    {
+      "epoch": 0.03103,
+      "grad_norm": 0.7879413374703326,
+      "learning_rate": 0.003,
+      "loss": 4.1018,
+      "step": 3103
+    },
+    {
+      "epoch": 0.03104,
+      "grad_norm": 1.0336622472450878,
+      "learning_rate": 0.003,
+      "loss": 4.1177,
+      "step": 3104
+    },
+    {
+      "epoch": 0.03105,
+      "grad_norm": 0.8225297439648239,
+      "learning_rate": 0.003,
+      "loss": 4.0889,
+      "step": 3105
+    },
+    {
+      "epoch": 0.03106,
+      "grad_norm": 0.9200113560338348,
+      "learning_rate": 0.003,
+      "loss": 4.1106,
+      "step": 3106
+    },
+    {
+      "epoch": 0.03107,
+      "grad_norm": 0.8693837130689374,
+      "learning_rate": 0.003,
+      "loss": 4.0948,
+      "step": 3107
+    },
+    {
+      "epoch": 0.03108,
+      "grad_norm": 0.7481880861084276,
+      "learning_rate": 0.003,
+      "loss": 4.1016,
+      "step": 3108
+    },
+    {
+      "epoch": 0.03109,
+      "grad_norm": 0.7161901524718123,
+      "learning_rate": 0.003,
+      "loss": 4.1063,
+      "step": 3109
+    },
+    {
+      "epoch": 0.0311,
+      "grad_norm": 0.7717771970106354,
+      "learning_rate": 0.003,
+      "loss": 4.1301,
+      "step": 3110
+    },
+    {
+      "epoch": 0.03111,
+      "grad_norm": 0.7918589418817704,
+      "learning_rate": 0.003,
+      "loss": 4.1105,
+      "step": 3111
+    },
+    {
+      "epoch": 0.03112,
+      "grad_norm": 0.8112295219654945,
+      "learning_rate": 0.003,
+      "loss": 4.1307,
+      "step": 3112
+    },
+    {
+      "epoch": 0.03113,
+      "grad_norm": 0.8724658411048615,
+      "learning_rate": 0.003,
+      "loss": 4.1204,
+      "step": 3113
+    },
+    {
+      "epoch": 0.03114,
+      "grad_norm": 1.0040711142290948,
+      "learning_rate": 0.003,
+      "loss": 4.1346,
+      "step": 3114
+    },
+    {
+      "epoch": 0.03115,
+      "grad_norm": 1.2108105696861353,
+      "learning_rate": 0.003,
+      "loss": 4.1509,
+      "step": 3115
+    },
+    {
+      "epoch": 0.03116,
+      "grad_norm": 0.9386810822379548,
+      "learning_rate": 0.003,
+      "loss": 4.1225,
+      "step": 3116
+    },
+    {
+      "epoch": 0.03117,
+      "grad_norm": 0.7805193907050799,
+      "learning_rate": 0.003,
+      "loss": 4.1247,
+      "step": 3117
+    },
+    {
+      "epoch": 0.03118,
+      "grad_norm": 0.9100639956456111,
+      "learning_rate": 0.003,
+      "loss": 4.1304,
+      "step": 3118
+    },
+    {
+      "epoch": 0.03119,
+      "grad_norm": 0.9031494351027202,
+      "learning_rate": 0.003,
+      "loss": 4.1434,
+      "step": 3119
+    },
+    {
+      "epoch": 0.0312,
+      "grad_norm": 0.9920122086984918,
+      "learning_rate": 0.003,
+      "loss": 4.154,
+      "step": 3120
+    },
+    {
+      "epoch": 0.03121,
+      "grad_norm": 1.24679349546566,
+      "learning_rate": 0.003,
+      "loss": 4.1376,
+      "step": 3121
+    },
+    {
+      "epoch": 0.03122,
+      "grad_norm": 0.8914719736220875,
+      "learning_rate": 0.003,
+      "loss": 4.1494,
+      "step": 3122
+    },
+    {
+      "epoch": 0.03123,
+      "grad_norm": 0.7579991423334604,
+      "learning_rate": 0.003,
+      "loss": 4.1403,
+      "step": 3123
+    },
+    {
+      "epoch": 0.03124,
+      "grad_norm": 0.7621871567104618,
+      "learning_rate": 0.003,
+      "loss": 4.1182,
+      "step": 3124
+    },
+    {
+      "epoch": 0.03125,
+      "grad_norm": 0.8062917259017726,
+      "learning_rate": 0.003,
+      "loss": 4.1181,
+      "step": 3125
+    },
+    {
+      "epoch": 0.03126,
+      "grad_norm": 0.833712338155859,
+      "learning_rate": 0.003,
+      "loss": 4.1244,
+      "step": 3126
+    },
+    {
+      "epoch": 0.03127,
+      "grad_norm": 0.8998971933566047,
+      "learning_rate": 0.003,
+      "loss": 4.1339,
+      "step": 3127
+    },
+    {
+      "epoch": 0.03128,
+      "grad_norm": 0.9380372829323406,
+      "learning_rate": 0.003,
+      "loss": 4.126,
+      "step": 3128
+    },
+    {
+      "epoch": 0.03129,
+      "grad_norm": 0.9521516498255859,
+      "learning_rate": 0.003,
+      "loss": 4.1452,
+      "step": 3129
+    },
+    {
+      "epoch": 0.0313,
+      "grad_norm": 0.8951772330842159,
+      "learning_rate": 0.003,
+      "loss": 4.1496,
+      "step": 3130
+    },
+    {
+      "epoch": 0.03131,
+      "grad_norm": 0.7755621391919874,
+      "learning_rate": 0.003,
+      "loss": 4.1279,
+      "step": 3131
+    },
+    {
+      "epoch": 0.03132,
+      "grad_norm": 0.8129807814076891,
+      "learning_rate": 0.003,
+      "loss": 4.1478,
+      "step": 3132
+    },
+    {
+      "epoch": 0.03133,
+      "grad_norm": 0.9303049881214197,
+      "learning_rate": 0.003,
+      "loss": 4.1402,
+      "step": 3133
+    },
+    {
+      "epoch": 0.03134,
+      "grad_norm": 0.9415721268309895,
+      "learning_rate": 0.003,
+      "loss": 4.1412,
+      "step": 3134
+    },
+    {
+      "epoch": 0.03135,
+      "grad_norm": 0.9104183542988432,
+      "learning_rate": 0.003,
+      "loss": 4.1062,
+      "step": 3135
+    },
+    {
+      "epoch": 0.03136,
+      "grad_norm": 0.9100236222536682,
+      "learning_rate": 0.003,
+      "loss": 4.1242,
+      "step": 3136
+    },
+    {
+      "epoch": 0.03137,
+      "grad_norm": 0.8798690651639479,
+      "learning_rate": 0.003,
+      "loss": 4.1248,
+      "step": 3137
+    },
+    {
+      "epoch": 0.03138,
+      "grad_norm": 0.7903392795080382,
+      "learning_rate": 0.003,
+      "loss": 4.1249,
+      "step": 3138
+    },
+    {
+      "epoch": 0.03139,
+      "grad_norm": 0.7667246215003086,
+      "learning_rate": 0.003,
+      "loss": 4.1279,
+      "step": 3139
+    },
+    {
+      "epoch": 0.0314,
+      "grad_norm": 0.7544018295001089,
+      "learning_rate": 0.003,
+      "loss": 4.1255,
+      "step": 3140
+    },
+    {
+      "epoch": 0.03141,
+      "grad_norm": 0.6716524130054017,
+      "learning_rate": 0.003,
+      "loss": 4.154,
+      "step": 3141
+    },
+    {
+      "epoch": 0.03142,
+      "grad_norm": 0.6382378457514613,
+      "learning_rate": 0.003,
+      "loss": 4.1317,
+      "step": 3142
+    },
+    {
+      "epoch": 0.03143,
+      "grad_norm": 0.7526757290066967,
+      "learning_rate": 0.003,
+      "loss": 4.1206,
+      "step": 3143
+    },
+    {
+      "epoch": 0.03144,
+      "grad_norm": 0.938489146233584,
+      "learning_rate": 0.003,
+      "loss": 4.0931,
+      "step": 3144
+    },
+    {
+      "epoch": 0.03145,
+      "grad_norm": 1.0493538668141305,
+      "learning_rate": 0.003,
+      "loss": 4.1262,
+      "step": 3145
+    },
+    {
+      "epoch": 0.03146,
+      "grad_norm": 0.9048504750556206,
+      "learning_rate": 0.003,
+      "loss": 4.1308,
+      "step": 3146
+    },
+    {
+      "epoch": 0.03147,
+      "grad_norm": 0.9152634263998236,
+      "learning_rate": 0.003,
+      "loss": 4.1297,
+      "step": 3147
+    },
+    {
+      "epoch": 0.03148,
+      "grad_norm": 0.878607908387141,
+      "learning_rate": 0.003,
+      "loss": 4.1341,
+      "step": 3148
+    },
+    {
+      "epoch": 0.03149,
+      "grad_norm": 0.6944713264838394,
+      "learning_rate": 0.003,
+      "loss": 4.1322,
+      "step": 3149
+    },
+    {
+      "epoch": 0.0315,
+      "grad_norm": 0.6811030653109197,
+      "learning_rate": 0.003,
+      "loss": 4.1195,
+      "step": 3150
+    },
+    {
+      "epoch": 0.03151,
+      "grad_norm": 0.7166494028947639,
+      "learning_rate": 0.003,
+      "loss": 4.1292,
+      "step": 3151
+    },
+    {
+      "epoch": 0.03152,
+      "grad_norm": 0.7180332750616866,
+      "learning_rate": 0.003,
+      "loss": 4.1294,
+      "step": 3152
+    },
+    {
+      "epoch": 0.03153,
+      "grad_norm": 0.8734661321822886,
+      "learning_rate": 0.003,
+      "loss": 4.1407,
+      "step": 3153
+    },
+    {
+      "epoch": 0.03154,
+      "grad_norm": 0.9738879775883073,
+      "learning_rate": 0.003,
+      "loss": 4.1387,
+      "step": 3154
+    },
+    {
+      "epoch": 0.03155,
+      "grad_norm": 0.8618576527781467,
+      "learning_rate": 0.003,
+      "loss": 4.127,
+      "step": 3155
+    },
+    {
+      "epoch": 0.03156,
+      "grad_norm": 0.7300214083601039,
+      "learning_rate": 0.003,
+      "loss": 4.1063,
+      "step": 3156
+    },
+    {
+      "epoch": 0.03157,
+      "grad_norm": 0.6260105425384149,
+      "learning_rate": 0.003,
+      "loss": 4.1296,
+      "step": 3157
+    },
+    {
+      "epoch": 0.03158,
+      "grad_norm": 0.682384994546209,
+      "learning_rate": 0.003,
+      "loss": 4.1132,
+      "step": 3158
+    },
+    {
+      "epoch": 0.03159,
+      "grad_norm": 0.7718937672456789,
+      "learning_rate": 0.003,
+      "loss": 4.1181,
+      "step": 3159
+    },
+    {
+      "epoch": 0.0316,
+      "grad_norm": 0.6586793613223971,
+      "learning_rate": 0.003,
+      "loss": 4.1277,
+      "step": 3160
+    },
+    {
+      "epoch": 0.03161,
+      "grad_norm": 0.6187219141678427,
+      "learning_rate": 0.003,
+      "loss": 4.133,
+      "step": 3161
+    },
+    {
+      "epoch": 0.03162,
+      "grad_norm": 0.7732103677060469,
+      "learning_rate": 0.003,
+      "loss": 4.1389,
+      "step": 3162
+    },
+    {
+      "epoch": 0.03163,
+      "grad_norm": 0.9681852590593839,
+      "learning_rate": 0.003,
+      "loss": 4.129,
+      "step": 3163
+    },
+    {
+      "epoch": 0.03164,
+      "grad_norm": 0.983612124215567,
+      "learning_rate": 0.003,
+      "loss": 4.1291,
+      "step": 3164
+    },
+    {
+      "epoch": 0.03165,
+      "grad_norm": 0.8828238997897765,
+      "learning_rate": 0.003,
+      "loss": 4.0995,
+      "step": 3165
+    },
+    {
+      "epoch": 0.03166,
+      "grad_norm": 0.9082253980909,
+      "learning_rate": 0.003,
+      "loss": 4.1181,
+      "step": 3166
+    },
+    {
+      "epoch": 0.03167,
+      "grad_norm": 0.8883995374606223,
+      "learning_rate": 0.003,
+      "loss": 4.11,
+      "step": 3167
+    },
+    {
+      "epoch": 0.03168,
+      "grad_norm": 0.8814873185877611,
+      "learning_rate": 0.003,
+      "loss": 4.1367,
+      "step": 3168
+    },
+    {
+      "epoch": 0.03169,
+      "grad_norm": 0.8358140037799937,
+      "learning_rate": 0.003,
+      "loss": 4.1428,
+      "step": 3169
+    },
+    {
+      "epoch": 0.0317,
+      "grad_norm": 0.9091839519331014,
+      "learning_rate": 0.003,
+      "loss": 4.1008,
+      "step": 3170
+    },
+    {
+      "epoch": 0.03171,
+      "grad_norm": 0.7528676805145117,
+      "learning_rate": 0.003,
+      "loss": 4.1362,
+      "step": 3171
+    },
+    {
+      "epoch": 0.03172,
+      "grad_norm": 0.6184044517416146,
+      "learning_rate": 0.003,
+      "loss": 4.1235,
+      "step": 3172
+    },
+    {
+      "epoch": 0.03173,
+      "grad_norm": 0.7308731124601682,
+      "learning_rate": 0.003,
+      "loss": 4.0974,
+      "step": 3173
+    },
+    {
+      "epoch": 0.03174,
+      "grad_norm": 0.8588565114301099,
+      "learning_rate": 0.003,
+      "loss": 4.1133,
+      "step": 3174
+    },
+    {
+      "epoch": 0.03175,
+      "grad_norm": 1.2162741354980293,
+      "learning_rate": 0.003,
+      "loss": 4.1527,
+      "step": 3175
+    },
+    {
+      "epoch": 0.03176,
+      "grad_norm": 0.9397594133644137,
+      "learning_rate": 0.003,
+      "loss": 4.1148,
+      "step": 3176
+    },
+    {
+      "epoch": 0.03177,
+      "grad_norm": 0.9189809109610272,
+      "learning_rate": 0.003,
+      "loss": 4.1275,
+      "step": 3177
+    },
+    {
+      "epoch": 0.03178,
+      "grad_norm": 0.8047272875332512,
+      "learning_rate": 0.003,
+      "loss": 4.1657,
+      "step": 3178
+    },
+    {
+      "epoch": 0.03179,
+      "grad_norm": 0.932150949680668,
+      "learning_rate": 0.003,
+      "loss": 4.1268,
+      "step": 3179
+    },
+    {
+      "epoch": 0.0318,
+      "grad_norm": 0.9592877689147434,
+      "learning_rate": 0.003,
+      "loss": 4.1293,
+      "step": 3180
+    },
+    {
+      "epoch": 0.03181,
+      "grad_norm": 0.9595136242555029,
+      "learning_rate": 0.003,
+      "loss": 4.1419,
+      "step": 3181
+    },
+    {
+      "epoch": 0.03182,
+      "grad_norm": 0.9565837686401868,
+      "learning_rate": 0.003,
+      "loss": 4.142,
+      "step": 3182
+    },
+    {
+      "epoch": 0.03183,
+      "grad_norm": 0.9741938675445059,
+      "learning_rate": 0.003,
+      "loss": 4.1478,
+      "step": 3183
+    },
+    {
+      "epoch": 0.03184,
+      "grad_norm": 0.9857805137742832,
+      "learning_rate": 0.003,
+      "loss": 4.1174,
+      "step": 3184
+    },
+    {
+      "epoch": 0.03185,
+      "grad_norm": 0.9176062885733086,
+      "learning_rate": 0.003,
+      "loss": 4.1379,
+      "step": 3185
+    },
+    {
+      "epoch": 0.03186,
+      "grad_norm": 0.8884334076590751,
+      "learning_rate": 0.003,
+      "loss": 4.1367,
+      "step": 3186
+    },
+    {
+      "epoch": 0.03187,
+      "grad_norm": 0.8658125563673901,
+      "learning_rate": 0.003,
+      "loss": 4.0961,
+      "step": 3187
+    },
+    {
+      "epoch": 0.03188,
+      "grad_norm": 0.8999051178552305,
+      "learning_rate": 0.003,
+      "loss": 4.1334,
+      "step": 3188
+    },
+    {
+      "epoch": 0.03189,
+      "grad_norm": 0.7859126574348619,
+      "learning_rate": 0.003,
+      "loss": 4.1212,
+      "step": 3189
+    },
+    {
+      "epoch": 0.0319,
+      "grad_norm": 0.8606531029730619,
+      "learning_rate": 0.003,
+      "loss": 4.1147,
+      "step": 3190
+    },
+    {
+      "epoch": 0.03191,
+      "grad_norm": 0.9918147629707751,
+      "learning_rate": 0.003,
+      "loss": 4.149,
+      "step": 3191
+    },
+    {
+      "epoch": 0.03192,
+      "grad_norm": 1.0056390969187974,
+      "learning_rate": 0.003,
+      "loss": 4.132,
+      "step": 3192
+    },
+    {
+      "epoch": 0.03193,
+      "grad_norm": 0.8860275310714892,
+      "learning_rate": 0.003,
+      "loss": 4.1258,
+      "step": 3193
+    },
+    {
+      "epoch": 0.03194,
+      "grad_norm": 0.9173243755728898,
+      "learning_rate": 0.003,
+      "loss": 4.1534,
+      "step": 3194
+    },
+    {
+      "epoch": 0.03195,
+      "grad_norm": 0.8392077257777418,
+      "learning_rate": 0.003,
+      "loss": 4.1203,
+      "step": 3195
+    },
+    {
+      "epoch": 0.03196,
+      "grad_norm": 0.7352847590623333,
+      "learning_rate": 0.003,
+      "loss": 4.1274,
+      "step": 3196
+    },
+    {
+      "epoch": 0.03197,
+      "grad_norm": 0.715497912918627,
+      "learning_rate": 0.003,
+      "loss": 4.0961,
+      "step": 3197
+    },
+    {
+      "epoch": 0.03198,
+      "grad_norm": 0.6376443032868672,
+      "learning_rate": 0.003,
+      "loss": 4.1222,
+      "step": 3198
+    },
+    {
+      "epoch": 0.03199,
+      "grad_norm": 0.5811704510301426,
+      "learning_rate": 0.003,
+      "loss": 4.0959,
+      "step": 3199
+    },
+    {
+      "epoch": 0.032,
+      "grad_norm": 0.5381272704276707,
+      "learning_rate": 0.003,
+      "loss": 4.116,
+      "step": 3200
+    },
+    {
+      "epoch": 0.03201,
+      "grad_norm": 0.5120575482476429,
+      "learning_rate": 0.003,
+      "loss": 4.0758,
+      "step": 3201
+    },
+    {
+      "epoch": 0.03202,
+      "grad_norm": 0.5958579649120576,
+      "learning_rate": 0.003,
+      "loss": 4.1164,
+      "step": 3202
+    },
+    {
+      "epoch": 0.03203,
+      "grad_norm": 0.7890366994930119,
+      "learning_rate": 0.003,
+      "loss": 4.1041,
+      "step": 3203
+    },
+    {
+      "epoch": 0.03204,
+      "grad_norm": 0.9077676416657651,
+      "learning_rate": 0.003,
+      "loss": 4.1246,
+      "step": 3204
+    },
+    {
+      "epoch": 0.03205,
+      "grad_norm": 0.754000654803963,
+      "learning_rate": 0.003,
+      "loss": 4.1157,
+      "step": 3205
+    },
+    {
+      "epoch": 0.03206,
+      "grad_norm": 0.600502725716816,
+      "learning_rate": 0.003,
+      "loss": 4.1078,
+      "step": 3206
+    },
+    {
+      "epoch": 0.03207,
+      "grad_norm": 0.6048683504208053,
+      "learning_rate": 0.003,
+      "loss": 4.0876,
+      "step": 3207
+    },
+    {
+      "epoch": 0.03208,
+      "grad_norm": 0.7129517255787963,
+      "learning_rate": 0.003,
+      "loss": 4.1136,
+      "step": 3208
+    },
+    {
+      "epoch": 0.03209,
+      "grad_norm": 0.8158420961472652,
+      "learning_rate": 0.003,
+      "loss": 4.1516,
+      "step": 3209
+    },
+    {
+      "epoch": 0.0321,
+      "grad_norm": 0.7005791064338225,
+      "learning_rate": 0.003,
+      "loss": 4.1273,
+      "step": 3210
+    },
+    {
+      "epoch": 0.03211,
+      "grad_norm": 0.6098399298288999,
+      "learning_rate": 0.003,
+      "loss": 4.0984,
+      "step": 3211
+    },
+    {
+      "epoch": 0.03212,
+      "grad_norm": 0.6538561751290226,
+      "learning_rate": 0.003,
+      "loss": 4.1089,
+      "step": 3212
+    },
+    {
+      "epoch": 0.03213,
+      "grad_norm": 0.5790618052777453,
+      "learning_rate": 0.003,
+      "loss": 4.1087,
+      "step": 3213
+    },
+    {
+      "epoch": 0.03214,
+      "grad_norm": 0.6875003076630353,
+      "learning_rate": 0.003,
+      "loss": 4.0786,
+      "step": 3214
+    },
+    {
+      "epoch": 0.03215,
+      "grad_norm": 0.799314724709517,
+      "learning_rate": 0.003,
+      "loss": 4.0949,
+      "step": 3215
+    },
+    {
+      "epoch": 0.03216,
+      "grad_norm": 0.8890107745271224,
+      "learning_rate": 0.003,
+      "loss": 4.1075,
+      "step": 3216
+    },
+    {
+      "epoch": 0.03217,
+      "grad_norm": 0.7725727639400009,
+      "learning_rate": 0.003,
+      "loss": 4.1044,
+      "step": 3217
+    },
+    {
+      "epoch": 0.03218,
+      "grad_norm": 0.7089931384963014,
+      "learning_rate": 0.003,
+      "loss": 4.096,
+      "step": 3218
+    },
+    {
+      "epoch": 0.03219,
+      "grad_norm": 0.755051345678096,
+      "learning_rate": 0.003,
+      "loss": 4.1025,
+      "step": 3219
+    },
+    {
+      "epoch": 0.0322,
+      "grad_norm": 0.9521310432872768,
+      "learning_rate": 0.003,
+      "loss": 4.1255,
+      "step": 3220
+    },
+    {
+      "epoch": 0.03221,
+      "grad_norm": 0.8616808278278768,
+      "learning_rate": 0.003,
+      "loss": 4.1285,
+      "step": 3221
+    },
+    {
+      "epoch": 0.03222,
+      "grad_norm": 0.873993324764959,
+      "learning_rate": 0.003,
+      "loss": 4.1188,
+      "step": 3222
+    },
+    {
+      "epoch": 0.03223,
+      "grad_norm": 0.8264507288043698,
+      "learning_rate": 0.003,
+      "loss": 4.1232,
+      "step": 3223
+    },
+    {
+      "epoch": 0.03224,
+      "grad_norm": 0.8289275480200801,
+      "learning_rate": 0.003,
+      "loss": 4.0847,
+      "step": 3224
+    },
+    {
+      "epoch": 0.03225,
+      "grad_norm": 0.7514528620660997,
+      "learning_rate": 0.003,
+      "loss": 4.1167,
+      "step": 3225
+    },
+    {
+      "epoch": 0.03226,
+      "grad_norm": 0.7197038673889892,
+      "learning_rate": 0.003,
+      "loss": 4.0974,
+      "step": 3226
+    },
+    {
+      "epoch": 0.03227,
+      "grad_norm": 0.8892719165128083,
+      "learning_rate": 0.003,
+      "loss": 4.1198,
+      "step": 3227
+    },
+    {
+      "epoch": 0.03228,
+      "grad_norm": 0.9570294472379692,
+      "learning_rate": 0.003,
+      "loss": 4.1042,
+      "step": 3228
+    },
+    {
+      "epoch": 0.03229,
+      "grad_norm": 0.9375751530879919,
+      "learning_rate": 0.003,
+      "loss": 4.12,
+      "step": 3229
+    },
+    {
+      "epoch": 0.0323,
+      "grad_norm": 0.9755322805010285,
+      "learning_rate": 0.003,
+      "loss": 4.1181,
+      "step": 3230
+    },
+    {
+      "epoch": 0.03231,
+      "grad_norm": 1.2740697032530537,
+      "learning_rate": 0.003,
+      "loss": 4.1456,
+      "step": 3231
+    },
+    {
+      "epoch": 0.03232,
+      "grad_norm": 0.8205568087154713,
+      "learning_rate": 0.003,
+      "loss": 4.1234,
+      "step": 3232
+    },
+    {
+      "epoch": 0.03233,
+      "grad_norm": 0.7719838274157066,
+      "learning_rate": 0.003,
+      "loss": 4.1376,
+      "step": 3233
+    },
+    {
+      "epoch": 0.03234,
+      "grad_norm": 0.7880067935874491,
+      "learning_rate": 0.003,
+      "loss": 4.1178,
+      "step": 3234
+    },
+    {
+      "epoch": 0.03235,
+      "grad_norm": 0.8025776008313478,
+      "learning_rate": 0.003,
+      "loss": 4.1267,
+      "step": 3235
+    },
+    {
+      "epoch": 0.03236,
+      "grad_norm": 1.0153689767602323,
+      "learning_rate": 0.003,
+      "loss": 4.124,
+      "step": 3236
+    },
+    {
+      "epoch": 0.03237,
+      "grad_norm": 1.0960896554896313,
+      "learning_rate": 0.003,
+      "loss": 4.1491,
+      "step": 3237
+    },
+    {
+      "epoch": 0.03238,
+      "grad_norm": 0.9653438379897337,
+      "learning_rate": 0.003,
+      "loss": 4.1538,
+      "step": 3238
+    },
+    {
+      "epoch": 0.03239,
+      "grad_norm": 0.9477216778661465,
+      "learning_rate": 0.003,
+      "loss": 4.1309,
+      "step": 3239
+    },
+    {
+      "epoch": 0.0324,
+      "grad_norm": 1.0000644526629416,
+      "learning_rate": 0.003,
+      "loss": 4.1522,
+      "step": 3240
+    },
+    {
+      "epoch": 0.03241,
+      "grad_norm": 0.9623250673117251,
+      "learning_rate": 0.003,
+      "loss": 4.1424,
+      "step": 3241
+    },
+    {
+      "epoch": 0.03242,
+      "grad_norm": 1.00080805991717,
+      "learning_rate": 0.003,
+      "loss": 4.1214,
+      "step": 3242
+    },
+    {
+      "epoch": 0.03243,
+      "grad_norm": 0.864922721509246,
+      "learning_rate": 0.003,
+      "loss": 4.1035,
+      "step": 3243
+    },
+    {
+      "epoch": 0.03244,
+      "grad_norm": 0.7896866879394736,
+      "learning_rate": 0.003,
+      "loss": 4.13,
+      "step": 3244
+    },
+    {
+      "epoch": 0.03245,
+      "grad_norm": 0.723475186710102,
+      "learning_rate": 0.003,
+      "loss": 4.1345,
+      "step": 3245
+    },
+    {
+      "epoch": 0.03246,
+      "grad_norm": 0.6786037554737262,
+      "learning_rate": 0.003,
+      "loss": 4.1199,
+      "step": 3246
+    },
+    {
+      "epoch": 0.03247,
+      "grad_norm": 0.6637920051672874,
+      "learning_rate": 0.003,
+      "loss": 4.1275,
+      "step": 3247
+    },
+    {
+      "epoch": 0.03248,
+      "grad_norm": 0.7161721376620257,
+      "learning_rate": 0.003,
+      "loss": 4.1439,
+      "step": 3248
+    },
+    {
+      "epoch": 0.03249,
+      "grad_norm": 0.7558986889375228,
+      "learning_rate": 0.003,
+      "loss": 4.1108,
+      "step": 3249
+    },
+    {
+      "epoch": 0.0325,
+      "grad_norm": 0.7122471010381649,
+      "learning_rate": 0.003,
+      "loss": 4.1412,
+      "step": 3250
+    },
+    {
+      "epoch": 0.03251,
+      "grad_norm": 0.7576764147111074,
+      "learning_rate": 0.003,
+      "loss": 4.1378,
+      "step": 3251
+    },
+    {
+      "epoch": 0.03252,
+      "grad_norm": 0.6662542349208606,
+      "learning_rate": 0.003,
+      "loss": 4.1022,
+      "step": 3252
+    },
+    {
+      "epoch": 0.03253,
+      "grad_norm": 0.6584169138349598,
+      "learning_rate": 0.003,
+      "loss": 4.1033,
+      "step": 3253
+    },
+    {
+      "epoch": 0.03254,
+      "grad_norm": 0.7740732689052414,
+      "learning_rate": 0.003,
+      "loss": 4.1012,
+      "step": 3254
+    },
+    {
+      "epoch": 0.03255,
+      "grad_norm": 0.9473846923391861,
+      "learning_rate": 0.003,
+      "loss": 4.1216,
+      "step": 3255
+    },
+    {
+      "epoch": 0.03256,
+      "grad_norm": 0.929085241706198,
+      "learning_rate": 0.003,
+      "loss": 4.1245,
+      "step": 3256
+    },
+    {
+      "epoch": 0.03257,
+      "grad_norm": 0.9501158349657576,
+      "learning_rate": 0.003,
+      "loss": 4.1102,
+      "step": 3257
+    },
+    {
+      "epoch": 0.03258,
+      "grad_norm": 0.9141127030789927,
+      "learning_rate": 0.003,
+      "loss": 4.1248,
+      "step": 3258
+    },
+    {
+      "epoch": 0.03259,
+      "grad_norm": 0.7903443980069681,
+      "learning_rate": 0.003,
+      "loss": 4.1197,
+      "step": 3259
+    },
+    {
+      "epoch": 0.0326,
+      "grad_norm": 0.7186252333798928,
+      "learning_rate": 0.003,
+      "loss": 4.1173,
+      "step": 3260
+    },
+    {
+      "epoch": 0.03261,
+      "grad_norm": 0.6176139155287416,
+      "learning_rate": 0.003,
+      "loss": 4.0891,
+      "step": 3261
+    },
+    {
+      "epoch": 0.03262,
+      "grad_norm": 0.5770619992009712,
+      "learning_rate": 0.003,
+      "loss": 4.1129,
+      "step": 3262
+    },
+    {
+      "epoch": 0.03263,
+      "grad_norm": 0.6179754292775074,
+      "learning_rate": 0.003,
+      "loss": 4.1218,
+      "step": 3263
+    },
+    {
+      "epoch": 0.03264,
+      "grad_norm": 0.7950184256183296,
+      "learning_rate": 0.003,
+      "loss": 4.1379,
+      "step": 3264
+    },
+    {
+      "epoch": 0.03265,
+      "grad_norm": 0.8260324885629711,
+      "learning_rate": 0.003,
+      "loss": 4.1313,
+      "step": 3265
+    },
+    {
+      "epoch": 0.03266,
+      "grad_norm": 0.9331143722743607,
+      "learning_rate": 0.003,
+      "loss": 4.0906,
+      "step": 3266
+    },
+    {
+      "epoch": 0.03267,
+      "grad_norm": 0.8951624994125074,
+      "learning_rate": 0.003,
+      "loss": 4.1009,
+      "step": 3267
+    },
+    {
+      "epoch": 0.03268,
+      "grad_norm": 0.8052096070419401,
+      "learning_rate": 0.003,
+      "loss": 4.0866,
+      "step": 3268
+    },
+    {
+      "epoch": 0.03269,
+      "grad_norm": 0.8166220426949835,
+      "learning_rate": 0.003,
+      "loss": 4.115,
+      "step": 3269
+    },
+    {
+      "epoch": 0.0327,
+      "grad_norm": 0.7018775720106071,
+      "learning_rate": 0.003,
+      "loss": 4.1382,
+      "step": 3270
+    },
+    {
+      "epoch": 0.03271,
+      "grad_norm": 0.7111861211006557,
+      "learning_rate": 0.003,
+      "loss": 4.1005,
+      "step": 3271
+    },
+    {
+      "epoch": 0.03272,
+      "grad_norm": 0.8446103418486255,
+      "learning_rate": 0.003,
+      "loss": 4.111,
+      "step": 3272
+    },
+    {
+      "epoch": 0.03273,
+      "grad_norm": 0.9056741913837429,
+      "learning_rate": 0.003,
+      "loss": 4.1151,
+      "step": 3273
+    },
+    {
+      "epoch": 0.03274,
+      "grad_norm": 0.8569785882714194,
+      "learning_rate": 0.003,
+      "loss": 4.1354,
+      "step": 3274
+    },
+    {
+      "epoch": 0.03275,
+      "grad_norm": 0.8267745539239577,
+      "learning_rate": 0.003,
+      "loss": 4.1349,
+      "step": 3275
+    },
+    {
+      "epoch": 0.03276,
+      "grad_norm": 0.8644593844634266,
+      "learning_rate": 0.003,
+      "loss": 4.1132,
+      "step": 3276
+    },
+    {
+      "epoch": 0.03277,
+      "grad_norm": 1.0535041439341994,
+      "learning_rate": 0.003,
+      "loss": 4.1198,
+      "step": 3277
+    },
+    {
+      "epoch": 0.03278,
+      "grad_norm": 0.993769735898057,
+      "learning_rate": 0.003,
+      "loss": 4.1303,
+      "step": 3278
+    },
+    {
+      "epoch": 0.03279,
+      "grad_norm": 1.1362817862452346,
+      "learning_rate": 0.003,
+      "loss": 4.1232,
+      "step": 3279
+    },
+    {
+      "epoch": 0.0328,
+      "grad_norm": 0.785895471391003,
+      "learning_rate": 0.003,
+      "loss": 4.0976,
+      "step": 3280
+    },
+    {
+      "epoch": 0.03281,
+      "grad_norm": 0.8032123764058076,
+      "learning_rate": 0.003,
+      "loss": 4.1444,
+      "step": 3281
+    },
+    {
+      "epoch": 0.03282,
+      "grad_norm": 0.9387436206649247,
+      "learning_rate": 0.003,
+      "loss": 4.103,
+      "step": 3282
+    },
+    {
+      "epoch": 0.03283,
+      "grad_norm": 1.1369187077903837,
+      "learning_rate": 0.003,
+      "loss": 4.1105,
+      "step": 3283
+    },
+    {
+      "epoch": 0.03284,
+      "grad_norm": 0.9173031886562234,
+      "learning_rate": 0.003,
+      "loss": 4.1437,
+      "step": 3284
+    },
+    {
+      "epoch": 0.03285,
+      "grad_norm": 1.0358430479898526,
+      "learning_rate": 0.003,
+      "loss": 4.0955,
+      "step": 3285
+    },
+    {
+      "epoch": 0.03286,
+      "grad_norm": 0.8561133575195394,
+      "learning_rate": 0.003,
+      "loss": 4.1319,
+      "step": 3286
+    },
+    {
+      "epoch": 0.03287,
+      "grad_norm": 0.8329432797036997,
+      "learning_rate": 0.003,
+      "loss": 4.1379,
+      "step": 3287
+    },
+    {
+      "epoch": 0.03288,
+      "grad_norm": 0.8334315378634501,
+      "learning_rate": 0.003,
+      "loss": 4.1202,
+      "step": 3288
+    },
+    {
+      "epoch": 0.03289,
+      "grad_norm": 0.6838577814234701,
+      "learning_rate": 0.003,
+      "loss": 4.1181,
+      "step": 3289
+    },
+    {
+      "epoch": 0.0329,
+      "grad_norm": 0.6781901884347332,
+      "learning_rate": 0.003,
+      "loss": 4.1462,
+      "step": 3290
+    },
+    {
+      "epoch": 0.03291,
+      "grad_norm": 0.6974761035595561,
+      "learning_rate": 0.003,
+      "loss": 4.1285,
+      "step": 3291
+    },
+    {
+      "epoch": 0.03292,
+      "grad_norm": 0.8026120729452656,
+      "learning_rate": 0.003,
+      "loss": 4.093,
+      "step": 3292
+    },
+    {
+      "epoch": 0.03293,
+      "grad_norm": 0.8409575609279364,
+      "learning_rate": 0.003,
+      "loss": 4.116,
+      "step": 3293
+    },
+    {
+      "epoch": 0.03294,
+      "grad_norm": 0.9672433152587747,
+      "learning_rate": 0.003,
+      "loss": 4.1206,
+      "step": 3294
+    },
+    {
+      "epoch": 0.03295,
+      "grad_norm": 0.952646103687869,
+      "learning_rate": 0.003,
+      "loss": 4.117,
+      "step": 3295
+    },
+    {
+      "epoch": 0.03296,
+      "grad_norm": 0.898170814180958,
+      "learning_rate": 0.003,
+      "loss": 4.1096,
+      "step": 3296
+    },
+    {
+      "epoch": 0.03297,
+      "grad_norm": 1.021226648982521,
+      "learning_rate": 0.003,
+      "loss": 4.1174,
+      "step": 3297
+    },
+    {
+      "epoch": 0.03298,
+      "grad_norm": 1.0948474288454675,
+      "learning_rate": 0.003,
+      "loss": 4.1462,
+      "step": 3298
+    },
+    {
+      "epoch": 0.03299,
+      "grad_norm": 1.0192347868788103,
+      "learning_rate": 0.003,
+      "loss": 4.1229,
+      "step": 3299
+    },
+    {
+      "epoch": 0.033,
+      "grad_norm": 1.0495723694795078,
+      "learning_rate": 0.003,
+      "loss": 4.1193,
+      "step": 3300
+    },
+    {
+      "epoch": 0.03301,
+      "grad_norm": 1.0851645957051337,
+      "learning_rate": 0.003,
+      "loss": 4.155,
+      "step": 3301
+    },
+    {
+      "epoch": 0.03302,
+      "grad_norm": 0.9231732440912226,
+      "learning_rate": 0.003,
+      "loss": 4.1229,
+      "step": 3302
+    },
+    {
+      "epoch": 0.03303,
+      "grad_norm": 0.8311956950138633,
+      "learning_rate": 0.003,
+      "loss": 4.1277,
+      "step": 3303
+    },
+    {
+      "epoch": 0.03304,
+      "grad_norm": 0.7259197244925034,
+      "learning_rate": 0.003,
+      "loss": 4.1175,
+      "step": 3304
+    },
+    {
+      "epoch": 0.03305,
+      "grad_norm": 0.6328132146169181,
+      "learning_rate": 0.003,
+      "loss": 4.1191,
+      "step": 3305
+    },
+    {
+      "epoch": 0.03306,
+      "grad_norm": 0.680683566379763,
+      "learning_rate": 0.003,
+      "loss": 4.1365,
+      "step": 3306
+    },
+    {
+      "epoch": 0.03307,
+      "grad_norm": 0.6927470498017374,
+      "learning_rate": 0.003,
+      "loss": 4.1345,
+      "step": 3307
+    },
+    {
+      "epoch": 0.03308,
+      "grad_norm": 0.6675628417853611,
+      "learning_rate": 0.003,
+      "loss": 4.1149,
+      "step": 3308
+    },
+    {
+      "epoch": 0.03309,
+      "grad_norm": 0.7202067492998604,
+      "learning_rate": 0.003,
+      "loss": 4.1005,
+      "step": 3309
+    },
+    {
+      "epoch": 0.0331,
+      "grad_norm": 0.7699244574031957,
+      "learning_rate": 0.003,
+      "loss": 4.1053,
+      "step": 3310
+    },
+    {
+      "epoch": 0.03311,
+      "grad_norm": 0.8693826092494591,
+      "learning_rate": 0.003,
+      "loss": 4.1276,
+      "step": 3311
+    },
+    {
+      "epoch": 0.03312,
+      "grad_norm": 1.172876733274895,
+      "learning_rate": 0.003,
+      "loss": 4.1076,
+      "step": 3312
+    },
+    {
+      "epoch": 0.03313,
+      "grad_norm": 0.9783748676175467,
+      "learning_rate": 0.003,
+      "loss": 4.1391,
+      "step": 3313
+    },
+    {
+      "epoch": 0.03314,
+      "grad_norm": 0.9677078544866878,
+      "learning_rate": 0.003,
+      "loss": 4.0804,
+      "step": 3314
+    },
+    {
+      "epoch": 0.03315,
+      "grad_norm": 0.8922546596507663,
+      "learning_rate": 0.003,
+      "loss": 4.131,
+      "step": 3315
+    },
+    {
+      "epoch": 0.03316,
+      "grad_norm": 0.8601480724971649,
+      "learning_rate": 0.003,
+      "loss": 4.1318,
+      "step": 3316
+    },
+    {
+      "epoch": 0.03317,
+      "grad_norm": 0.8689703079288349,
+      "learning_rate": 0.003,
+      "loss": 4.1234,
+      "step": 3317
+    },
+    {
+      "epoch": 0.03318,
+      "grad_norm": 0.8404981974903554,
+      "learning_rate": 0.003,
+      "loss": 4.1039,
+      "step": 3318
+    },
+    {
+      "epoch": 0.03319,
+      "grad_norm": 0.7816646536926355,
+      "learning_rate": 0.003,
+      "loss": 4.0965,
+      "step": 3319
+    },
+    {
+      "epoch": 0.0332,
+      "grad_norm": 0.7686980103144088,
+      "learning_rate": 0.003,
+      "loss": 4.1225,
+      "step": 3320
+    },
+    {
+      "epoch": 0.03321,
+      "grad_norm": 0.6688242043093231,
+      "learning_rate": 0.003,
+      "loss": 4.1337,
+      "step": 3321
+    },
+    {
+      "epoch": 0.03322,
+      "grad_norm": 0.6171612564014062,
+      "learning_rate": 0.003,
+      "loss": 4.0854,
+      "step": 3322
+    },
+    {
+      "epoch": 0.03323,
+      "grad_norm": 0.5970300363358997,
+      "learning_rate": 0.003,
+      "loss": 4.1264,
+      "step": 3323
+    },
+    {
+      "epoch": 0.03324,
+      "grad_norm": 0.5953328227942196,
+      "learning_rate": 0.003,
+      "loss": 4.0936,
+      "step": 3324
+    },
+    {
+      "epoch": 0.03325,
+      "grad_norm": 0.6169531392155312,
+      "learning_rate": 0.003,
+      "loss": 4.1268,
+      "step": 3325
+    },
+    {
+      "epoch": 0.03326,
+      "grad_norm": 0.7413959337275615,
+      "learning_rate": 0.003,
+      "loss": 4.0852,
+      "step": 3326
+    },
+    {
+      "epoch": 0.03327,
+      "grad_norm": 0.9232832965259093,
+      "learning_rate": 0.003,
+      "loss": 4.1215,
+      "step": 3327
+    },
+    {
+      "epoch": 0.03328,
+      "grad_norm": 1.0485483129958542,
+      "learning_rate": 0.003,
+      "loss": 4.1432,
+      "step": 3328
+    },
+    {
+      "epoch": 0.03329,
+      "grad_norm": 0.8420326285667852,
+      "learning_rate": 0.003,
+      "loss": 4.0969,
+      "step": 3329
+    },
+    {
+      "epoch": 0.0333,
+      "grad_norm": 0.7052108452174394,
+      "learning_rate": 0.003,
+      "loss": 4.1002,
+      "step": 3330
+    },
+    {
+      "epoch": 0.03331,
+      "grad_norm": 0.728092857943501,
+      "learning_rate": 0.003,
+      "loss": 4.1003,
+      "step": 3331
+    },
+    {
+      "epoch": 0.03332,
+      "grad_norm": 0.9200654034817243,
+      "learning_rate": 0.003,
+      "loss": 4.1031,
+      "step": 3332
+    },
+    {
+      "epoch": 0.03333,
+      "grad_norm": 0.9758232395574222,
+      "learning_rate": 0.003,
+      "loss": 4.0967,
+      "step": 3333
+    },
+    {
+      "epoch": 0.03334,
+      "grad_norm": 1.0419984825931974,
+      "learning_rate": 0.003,
+      "loss": 4.1301,
+      "step": 3334
+    },
+    {
+      "epoch": 0.03335,
+      "grad_norm": 0.9936868073161941,
+      "learning_rate": 0.003,
+      "loss": 4.1367,
+      "step": 3335
+    },
+    {
+      "epoch": 0.03336,
+      "grad_norm": 0.9188784917760547,
+      "learning_rate": 0.003,
+      "loss": 4.1343,
+      "step": 3336
+    },
+    {
+      "epoch": 0.03337,
+      "grad_norm": 0.9676171269128566,
+      "learning_rate": 0.003,
+      "loss": 4.113,
+      "step": 3337
+    },
+    {
+      "epoch": 0.03338,
+      "grad_norm": 1.2398133112982033,
+      "learning_rate": 0.003,
+      "loss": 4.1319,
+      "step": 3338
+    },
+    {
+      "epoch": 0.03339,
+      "grad_norm": 0.8757441136351082,
+      "learning_rate": 0.003,
+      "loss": 4.1259,
+      "step": 3339
+    },
+    {
+      "epoch": 0.0334,
+      "grad_norm": 0.803431258859875,
+      "learning_rate": 0.003,
+      "loss": 4.102,
+      "step": 3340
+    },
+    {
+      "epoch": 0.03341,
+      "grad_norm": 0.7508144599659284,
+      "learning_rate": 0.003,
+      "loss": 4.1183,
+      "step": 3341
+    },
+    {
+      "epoch": 0.03342,
+      "grad_norm": 0.6682938205970289,
+      "learning_rate": 0.003,
+      "loss": 4.1049,
+      "step": 3342
+    },
+    {
+      "epoch": 0.03343,
+      "grad_norm": 0.6286049563460856,
+      "learning_rate": 0.003,
+      "loss": 4.0788,
+      "step": 3343
+    },
+    {
+      "epoch": 0.03344,
+      "grad_norm": 0.7479131859288655,
+      "learning_rate": 0.003,
+      "loss": 4.1327,
+      "step": 3344
+    },
+    {
+      "epoch": 0.03345,
+      "grad_norm": 0.9317587885861235,
+      "learning_rate": 0.003,
+      "loss": 4.1167,
+      "step": 3345
+    },
+    {
+      "epoch": 0.03346,
+      "grad_norm": 1.1810549914532213,
+      "learning_rate": 0.003,
+      "loss": 4.1402,
+      "step": 3346
+    },
+    {
+      "epoch": 0.03347,
+      "grad_norm": 0.803326085791172,
+      "learning_rate": 0.003,
+      "loss": 4.1157,
+      "step": 3347
+    },
+    {
+      "epoch": 0.03348,
+      "grad_norm": 0.6682684868194113,
+      "learning_rate": 0.003,
+      "loss": 4.0993,
+      "step": 3348
+    },
+    {
+      "epoch": 0.03349,
+      "grad_norm": 0.7128883398636897,
+      "learning_rate": 0.003,
+      "loss": 4.1453,
+      "step": 3349
+    },
+    {
+      "epoch": 0.0335,
+      "grad_norm": 0.8166001203873546,
+      "learning_rate": 0.003,
+      "loss": 4.1007,
+      "step": 3350
+    },
+    {
+      "epoch": 0.03351,
+      "grad_norm": 0.8984169700598436,
+      "learning_rate": 0.003,
+      "loss": 4.1127,
+      "step": 3351
+    },
+    {
+      "epoch": 0.03352,
+      "grad_norm": 0.891385033781036,
+      "learning_rate": 0.003,
+      "loss": 4.1282,
+      "step": 3352
+    },
+    {
+      "epoch": 0.03353,
+      "grad_norm": 0.7436503547090926,
+      "learning_rate": 0.003,
+      "loss": 4.1246,
+      "step": 3353
+    },
+    {
+      "epoch": 0.03354,
+      "grad_norm": 0.7000314128832471,
+      "learning_rate": 0.003,
+      "loss": 4.0858,
+      "step": 3354
+    },
+    {
+      "epoch": 0.03355,
+      "grad_norm": 0.8076872538085662,
+      "learning_rate": 0.003,
+      "loss": 4.1143,
+      "step": 3355
+    },
+    {
+      "epoch": 0.03356,
+      "grad_norm": 0.9065676166442113,
+      "learning_rate": 0.003,
+      "loss": 4.141,
+      "step": 3356
+    },
+    {
+      "epoch": 0.03357,
+      "grad_norm": 0.8283445403636854,
+      "learning_rate": 0.003,
+      "loss": 4.1149,
+      "step": 3357
+    },
+    {
+      "epoch": 0.03358,
+      "grad_norm": 0.711728954112842,
+      "learning_rate": 0.003,
+      "loss": 4.1298,
+      "step": 3358
+    },
+    {
+      "epoch": 0.03359,
+      "grad_norm": 0.6700966992589463,
+      "learning_rate": 0.003,
+      "loss": 4.1152,
+      "step": 3359
+    },
+    {
+      "epoch": 0.0336,
+      "grad_norm": 0.6990021919801547,
+      "learning_rate": 0.003,
+      "loss": 4.1287,
+      "step": 3360
+    },
+    {
+      "epoch": 0.03361,
+      "grad_norm": 0.6512206263292598,
+      "learning_rate": 0.003,
+      "loss": 4.1284,
+      "step": 3361
+    },
+    {
+      "epoch": 0.03362,
+      "grad_norm": 0.6471137948913391,
+      "learning_rate": 0.003,
+      "loss": 4.1245,
+      "step": 3362
+    },
+    {
+      "epoch": 0.03363,
+      "grad_norm": 0.6486978718096742,
+      "learning_rate": 0.003,
+      "loss": 4.0746,
+      "step": 3363
+    },
+    {
+      "epoch": 0.03364,
+      "grad_norm": 0.7056083808952703,
+      "learning_rate": 0.003,
+      "loss": 4.0975,
+      "step": 3364
+    },
+    {
+      "epoch": 0.03365,
+      "grad_norm": 0.7889254390774209,
+      "learning_rate": 0.003,
+      "loss": 4.096,
+      "step": 3365
+    },
+    {
+      "epoch": 0.03366,
+      "grad_norm": 0.8858274543513762,
+      "learning_rate": 0.003,
+      "loss": 4.0742,
+      "step": 3366
+    },
+    {
+      "epoch": 0.03367,
+      "grad_norm": 0.8996550932306419,
+      "learning_rate": 0.003,
+      "loss": 4.0941,
+      "step": 3367
+    },
+    {
+      "epoch": 0.03368,
+      "grad_norm": 0.9535511924749507,
+      "learning_rate": 0.003,
+      "loss": 4.1436,
+      "step": 3368
+    },
+    {
+      "epoch": 0.03369,
+      "grad_norm": 1.025280504328262,
+      "learning_rate": 0.003,
+      "loss": 4.1064,
+      "step": 3369
+    },
+    {
+      "epoch": 0.0337,
+      "grad_norm": 0.9606870288728299,
+      "learning_rate": 0.003,
+      "loss": 4.1369,
+      "step": 3370
+    },
+    {
+      "epoch": 0.03371,
+      "grad_norm": 0.911846692093779,
+      "learning_rate": 0.003,
+      "loss": 4.0718,
+      "step": 3371
+    },
+    {
+      "epoch": 0.03372,
+      "grad_norm": 0.8892950330733644,
+      "learning_rate": 0.003,
+      "loss": 4.1005,
+      "step": 3372
+    },
+    {
+      "epoch": 0.03373,
+      "grad_norm": 0.9613355807854816,
+      "learning_rate": 0.003,
+      "loss": 4.1365,
+      "step": 3373
+    },
+    {
+      "epoch": 0.03374,
+      "grad_norm": 0.9293495359333047,
+      "learning_rate": 0.003,
+      "loss": 4.1205,
+      "step": 3374
+    },
+    {
+      "epoch": 0.03375,
+      "grad_norm": 0.9945254272923405,
+      "learning_rate": 0.003,
+      "loss": 4.1112,
+      "step": 3375
+    },
+    {
+      "epoch": 0.03376,
+      "grad_norm": 0.9458311049733065,
+      "learning_rate": 0.003,
+      "loss": 4.085,
+      "step": 3376
+    },
+    {
+      "epoch": 0.03377,
+      "grad_norm": 1.0143873093094964,
+      "learning_rate": 0.003,
+      "loss": 4.1348,
+      "step": 3377
+    },
+    {
+      "epoch": 0.03378,
+      "grad_norm": 0.9867149119212901,
+      "learning_rate": 0.003,
+      "loss": 4.1415,
+      "step": 3378
+    },
+    {
+      "epoch": 0.03379,
+      "grad_norm": 0.909410152436646,
+      "learning_rate": 0.003,
+      "loss": 4.1127,
+      "step": 3379
+    },
+    {
+      "epoch": 0.0338,
+      "grad_norm": 0.9582033813869726,
+      "learning_rate": 0.003,
+      "loss": 4.1382,
+      "step": 3380
+    },
+    {
+      "epoch": 0.03381,
+      "grad_norm": 1.0515732546439378,
+      "learning_rate": 0.003,
+      "loss": 4.1483,
+      "step": 3381
+    },
+    {
+      "epoch": 0.03382,
+      "grad_norm": 0.9000543481782362,
+      "learning_rate": 0.003,
+      "loss": 4.1344,
+      "step": 3382
+    },
+    {
+      "epoch": 0.03383,
+      "grad_norm": 0.7851905547422763,
+      "learning_rate": 0.003,
+      "loss": 4.0953,
+      "step": 3383
+    },
+    {
+      "epoch": 0.03384,
+      "grad_norm": 0.7425344508528795,
+      "learning_rate": 0.003,
+      "loss": 4.1169,
+      "step": 3384
+    },
+    {
+      "epoch": 0.03385,
+      "grad_norm": 0.7242449162586684,
+      "learning_rate": 0.003,
+      "loss": 4.1158,
+      "step": 3385
+    },
+    {
+      "epoch": 0.03386,
+      "grad_norm": 0.7178134850877214,
+      "learning_rate": 0.003,
+      "loss": 4.1127,
+      "step": 3386
+    },
+    {
+      "epoch": 0.03387,
+      "grad_norm": 0.6034717016906057,
+      "learning_rate": 0.003,
+      "loss": 4.1113,
+      "step": 3387
+    },
+    {
+      "epoch": 0.03388,
+      "grad_norm": 0.6250385791221151,
+      "learning_rate": 0.003,
+      "loss": 4.1231,
+      "step": 3388
+    },
+    {
+      "epoch": 0.03389,
+      "grad_norm": 0.5925814922250926,
+      "learning_rate": 0.003,
+      "loss": 4.1382,
+      "step": 3389
+    },
+    {
+      "epoch": 0.0339,
+      "grad_norm": 0.6938565572265246,
+      "learning_rate": 0.003,
+      "loss": 4.1269,
+      "step": 3390
+    },
+    {
+      "epoch": 0.03391,
+      "grad_norm": 0.8588428651999951,
+      "learning_rate": 0.003,
+      "loss": 4.1075,
+      "step": 3391
+    },
+    {
+      "epoch": 0.03392,
+      "grad_norm": 1.0828213831544788,
+      "learning_rate": 0.003,
+      "loss": 4.1215,
+      "step": 3392
+    },
+    {
+      "epoch": 0.03393,
+      "grad_norm": 0.9139268583774075,
+      "learning_rate": 0.003,
+      "loss": 4.1312,
+      "step": 3393
+    },
+    {
+      "epoch": 0.03394,
+      "grad_norm": 0.8962526152154793,
+      "learning_rate": 0.003,
+      "loss": 4.1265,
+      "step": 3394
+    },
+    {
+      "epoch": 0.03395,
+      "grad_norm": 0.9550321739497196,
+      "learning_rate": 0.003,
+      "loss": 4.1325,
+      "step": 3395
+    },
+    {
+      "epoch": 0.03396,
+      "grad_norm": 0.9058142633647185,
+      "learning_rate": 0.003,
+      "loss": 4.1149,
+      "step": 3396
+    },
+    {
+      "epoch": 0.03397,
+      "grad_norm": 1.030198089545334,
+      "learning_rate": 0.003,
+      "loss": 4.0875,
+      "step": 3397
+    },
+    {
+      "epoch": 0.03398,
+      "grad_norm": 0.9817159965257135,
+      "learning_rate": 0.003,
+      "loss": 4.1455,
+      "step": 3398
+    },
+    {
+      "epoch": 0.03399,
+      "grad_norm": 0.7537874005656849,
+      "learning_rate": 0.003,
+      "loss": 4.1585,
+      "step": 3399
+    },
+    {
+      "epoch": 0.034,
+      "grad_norm": 0.7534398090951075,
+      "learning_rate": 0.003,
+      "loss": 4.1353,
+      "step": 3400
+    },
+    {
+      "epoch": 0.03401,
+      "grad_norm": 0.846711011216498,
+      "learning_rate": 0.003,
+      "loss": 4.1125,
+      "step": 3401
+    },
+    {
+      "epoch": 0.03402,
+      "grad_norm": 1.0304564422923,
+      "learning_rate": 0.003,
+      "loss": 4.1474,
+      "step": 3402
+    },
+    {
+      "epoch": 0.03403,
+      "grad_norm": 0.8955445182551958,
+      "learning_rate": 0.003,
+      "loss": 4.1078,
+      "step": 3403
+    },
+    {
+      "epoch": 0.03404,
+      "grad_norm": 0.8683725321318734,
+      "learning_rate": 0.003,
+      "loss": 4.1404,
+      "step": 3404
+    },
+    {
+      "epoch": 0.03405,
+      "grad_norm": 0.8700430489216084,
+      "learning_rate": 0.003,
+      "loss": 4.126,
+      "step": 3405
+    },
+    {
+      "epoch": 0.03406,
+      "grad_norm": 0.856851032889231,
+      "learning_rate": 0.003,
+      "loss": 4.1086,
+      "step": 3406
+    },
+    {
+      "epoch": 0.03407,
+      "grad_norm": 0.8583304198024593,
+      "learning_rate": 0.003,
+      "loss": 4.1254,
+      "step": 3407
+    },
+    {
+      "epoch": 0.03408,
+      "grad_norm": 0.8259652751248255,
+      "learning_rate": 0.003,
+      "loss": 4.1052,
+      "step": 3408
+    },
+    {
+      "epoch": 0.03409,
+      "grad_norm": 0.9359241584231073,
+      "learning_rate": 0.003,
+      "loss": 4.1182,
+      "step": 3409
+    },
+    {
+      "epoch": 0.0341,
+      "grad_norm": 0.938170801150728,
+      "learning_rate": 0.003,
+      "loss": 4.1147,
+      "step": 3410
+    },
+    {
+      "epoch": 0.03411,
+      "grad_norm": 0.9444575938733144,
+      "learning_rate": 0.003,
+      "loss": 4.1289,
+      "step": 3411
+    },
+    {
+      "epoch": 0.03412,
+      "grad_norm": 0.9224218450030172,
+      "learning_rate": 0.003,
+      "loss": 4.1473,
+      "step": 3412
+    },
+    {
+      "epoch": 0.03413,
+      "grad_norm": 0.9312506396494133,
+      "learning_rate": 0.003,
+      "loss": 4.1534,
+      "step": 3413
+    },
+    {
+      "epoch": 0.03414,
+      "grad_norm": 0.9761972676777384,
+      "learning_rate": 0.003,
+      "loss": 4.1424,
+      "step": 3414
+    },
+    {
+      "epoch": 0.03415,
+      "grad_norm": 1.189613033448552,
+      "learning_rate": 0.003,
+      "loss": 4.1381,
+      "step": 3415
+    },
+    {
+      "epoch": 0.03416,
+      "grad_norm": 0.8255201977611025,
+      "learning_rate": 0.003,
+      "loss": 4.1273,
+      "step": 3416
+    },
+    {
+      "epoch": 0.03417,
+      "grad_norm": 0.8947733285821016,
+      "learning_rate": 0.003,
+      "loss": 4.125,
+      "step": 3417
+    },
+    {
+      "epoch": 0.03418,
+      "grad_norm": 0.7984406024805614,
+      "learning_rate": 0.003,
+      "loss": 4.0949,
+      "step": 3418
+    },
+    {
+      "epoch": 0.03419,
+      "grad_norm": 0.6630379610145436,
+      "learning_rate": 0.003,
+      "loss": 4.1135,
+      "step": 3419
+    },
+    {
+      "epoch": 0.0342,
+      "grad_norm": 0.5975770128227218,
+      "learning_rate": 0.003,
+      "loss": 4.12,
+      "step": 3420
+    },
+    {
+      "epoch": 0.03421,
+      "grad_norm": 0.6059481665025913,
+      "learning_rate": 0.003,
+      "loss": 4.1103,
+      "step": 3421
+    },
+    {
+      "epoch": 0.03422,
+      "grad_norm": 0.6007015602316165,
+      "learning_rate": 0.003,
+      "loss": 4.1246,
+      "step": 3422
+    },
+    {
+      "epoch": 0.03423,
+      "grad_norm": 0.6213374488193623,
+      "learning_rate": 0.003,
+      "loss": 4.1042,
+      "step": 3423
+    },
+    {
+      "epoch": 0.03424,
+      "grad_norm": 0.7357379283476251,
+      "learning_rate": 0.003,
+      "loss": 4.1285,
+      "step": 3424
+    },
+    {
+      "epoch": 0.03425,
+      "grad_norm": 1.1388627740722068,
+      "learning_rate": 0.003,
+      "loss": 4.1027,
+      "step": 3425
+    },
+    {
+      "epoch": 0.03426,
+      "grad_norm": 1.2676165376349322,
+      "learning_rate": 0.003,
+      "loss": 4.1396,
+      "step": 3426
+    },
+    {
+      "epoch": 0.03427,
+      "grad_norm": 0.8591095580169765,
+      "learning_rate": 0.003,
+      "loss": 4.0962,
+      "step": 3427
+    },
+    {
+      "epoch": 0.03428,
+      "grad_norm": 0.7637078879713047,
+      "learning_rate": 0.003,
+      "loss": 4.096,
+      "step": 3428
+    },
+    {
+      "epoch": 0.03429,
+      "grad_norm": 0.7598510773275634,
+      "learning_rate": 0.003,
+      "loss": 4.1094,
+      "step": 3429
+    },
+    {
+      "epoch": 0.0343,
+      "grad_norm": 0.7189854251191673,
+      "learning_rate": 0.003,
+      "loss": 4.1044,
+      "step": 3430
+    },
+    {
+      "epoch": 0.03431,
+      "grad_norm": 0.6960848938203806,
+      "learning_rate": 0.003,
+      "loss": 4.0955,
+      "step": 3431
+    },
+    {
+      "epoch": 0.03432,
+      "grad_norm": 0.7905143093567869,
+      "learning_rate": 0.003,
+      "loss": 4.1069,
+      "step": 3432
+    },
+    {
+      "epoch": 0.03433,
+      "grad_norm": 0.6763604081182782,
+      "learning_rate": 0.003,
+      "loss": 4.0757,
+      "step": 3433
+    },
+    {
+      "epoch": 0.03434,
+      "grad_norm": 0.6584657241744457,
+      "learning_rate": 0.003,
+      "loss": 4.098,
+      "step": 3434
+    },
+    {
+      "epoch": 0.03435,
+      "grad_norm": 0.6146074495577768,
+      "learning_rate": 0.003,
+      "loss": 4.0797,
+      "step": 3435
+    },
+    {
+      "epoch": 0.03436,
+      "grad_norm": 0.6821260061437763,
+      "learning_rate": 0.003,
+      "loss": 4.1092,
+      "step": 3436
+    },
+    {
+      "epoch": 0.03437,
+      "grad_norm": 0.7002392953868479,
+      "learning_rate": 0.003,
+      "loss": 4.0975,
+      "step": 3437
+    },
+    {
+      "epoch": 0.03438,
+      "grad_norm": 0.6865303492632201,
+      "learning_rate": 0.003,
+      "loss": 4.1125,
+      "step": 3438
+    },
+    {
+      "epoch": 0.03439,
+      "grad_norm": 0.6862765139756455,
+      "learning_rate": 0.003,
+      "loss": 4.109,
+      "step": 3439
+    },
+    {
+      "epoch": 0.0344,
+      "grad_norm": 0.7374953588444623,
+      "learning_rate": 0.003,
+      "loss": 4.085,
+      "step": 3440
+    },
+    {
+      "epoch": 0.03441,
+      "grad_norm": 0.8034314810869642,
+      "learning_rate": 0.003,
+      "loss": 4.1097,
+      "step": 3441
+    },
+    {
+      "epoch": 0.03442,
+      "grad_norm": 0.9713109980816932,
+      "learning_rate": 0.003,
+      "loss": 4.0808,
+      "step": 3442
+    },
+    {
+      "epoch": 0.03443,
+      "grad_norm": 1.2417830843481585,
+      "learning_rate": 0.003,
+      "loss": 4.1193,
+      "step": 3443
+    },
+    {
+      "epoch": 0.03444,
+      "grad_norm": 0.9221286248093234,
+      "learning_rate": 0.003,
+      "loss": 4.1273,
+      "step": 3444
+    },
+    {
+      "epoch": 0.03445,
+      "grad_norm": 0.95846171622353,
+      "learning_rate": 0.003,
+      "loss": 4.0932,
+      "step": 3445
+    },
+    {
+      "epoch": 0.03446,
+      "grad_norm": 1.0528525400170592,
+      "learning_rate": 0.003,
+      "loss": 4.0839,
+      "step": 3446
+    },
+    {
+      "epoch": 0.03447,
+      "grad_norm": 1.0620743717942975,
+      "learning_rate": 0.003,
+      "loss": 4.1319,
+      "step": 3447
+    },
+    {
+      "epoch": 0.03448,
+      "grad_norm": 1.0787590252225165,
+      "learning_rate": 0.003,
+      "loss": 4.1092,
+      "step": 3448
+    },
+    {
+      "epoch": 0.03449,
+      "grad_norm": 1.078404883653542,
+      "learning_rate": 0.003,
+      "loss": 4.0986,
+      "step": 3449
+    },
+    {
+      "epoch": 0.0345,
+      "grad_norm": 1.1540054715604005,
+      "learning_rate": 0.003,
+      "loss": 4.1381,
+      "step": 3450
+    },
+    {
+      "epoch": 0.03451,
+      "grad_norm": 0.8053901369440304,
+      "learning_rate": 0.003,
+      "loss": 4.102,
+      "step": 3451
+    },
+    {
+      "epoch": 0.03452,
+      "grad_norm": 0.8242631664830717,
+      "learning_rate": 0.003,
+      "loss": 4.1318,
+      "step": 3452
+    },
+    {
+      "epoch": 0.03453,
+      "grad_norm": 0.8626897904731716,
+      "learning_rate": 0.003,
+      "loss": 4.1077,
+      "step": 3453
+    },
+    {
+      "epoch": 0.03454,
+      "grad_norm": 0.8237109468250721,
+      "learning_rate": 0.003,
+      "loss": 4.0895,
+      "step": 3454
+    },
+    {
+      "epoch": 0.03455,
+      "grad_norm": 0.804980006239357,
+      "learning_rate": 0.003,
+      "loss": 4.0889,
+      "step": 3455
+    },
+    {
+      "epoch": 0.03456,
+      "grad_norm": 0.729280374546535,
+      "learning_rate": 0.003,
+      "loss": 4.1054,
+      "step": 3456
+    },
+    {
+      "epoch": 0.03457,
+      "grad_norm": 0.7776253504611971,
+      "learning_rate": 0.003,
+      "loss": 4.0704,
+      "step": 3457
+    },
+    {
+      "epoch": 0.03458,
+      "grad_norm": 0.7417498419116878,
+      "learning_rate": 0.003,
+      "loss": 4.1078,
+      "step": 3458
+    },
+    {
+      "epoch": 0.03459,
+      "grad_norm": 0.6919191119122285,
+      "learning_rate": 0.003,
+      "loss": 4.1315,
+      "step": 3459
+    },
+    {
+      "epoch": 0.0346,
+      "grad_norm": 0.6964165299460008,
+      "learning_rate": 0.003,
+      "loss": 4.1312,
+      "step": 3460
+    },
+    {
+      "epoch": 0.03461,
+      "grad_norm": 0.715480299476076,
+      "learning_rate": 0.003,
+      "loss": 4.109,
+      "step": 3461
+    },
+    {
+      "epoch": 0.03462,
+      "grad_norm": 0.7536303091133582,
+      "learning_rate": 0.003,
+      "loss": 4.1045,
+      "step": 3462
+    },
+    {
+      "epoch": 0.03463,
+      "grad_norm": 0.7082893489121429,
+      "learning_rate": 0.003,
+      "loss": 4.087,
+      "step": 3463
+    },
+    {
+      "epoch": 0.03464,
+      "grad_norm": 0.6417430899292896,
+      "learning_rate": 0.003,
+      "loss": 4.099,
+      "step": 3464
+    },
+    {
+      "epoch": 0.03465,
+      "grad_norm": 0.6337345648035622,
+      "learning_rate": 0.003,
+      "loss": 4.1065,
+      "step": 3465
+    },
+    {
+      "epoch": 0.03466,
+      "grad_norm": 0.6853326694383307,
+      "learning_rate": 0.003,
+      "loss": 4.107,
+      "step": 3466
+    },
+    {
+      "epoch": 0.03467,
+      "grad_norm": 0.6969053686300763,
+      "learning_rate": 0.003,
+      "loss": 4.0908,
+      "step": 3467
+    },
+    {
+      "epoch": 0.03468,
+      "grad_norm": 0.7399108221307881,
+      "learning_rate": 0.003,
+      "loss": 4.1065,
+      "step": 3468
+    },
+    {
+      "epoch": 0.03469,
+      "grad_norm": 0.8787869181598735,
+      "learning_rate": 0.003,
+      "loss": 4.1074,
+      "step": 3469
+    },
+    {
+      "epoch": 0.0347,
+      "grad_norm": 1.0101947797716855,
+      "learning_rate": 0.003,
+      "loss": 4.1289,
+      "step": 3470
+    },
+    {
+      "epoch": 0.03471,
+      "grad_norm": 1.1334674964796345,
+      "learning_rate": 0.003,
+      "loss": 4.1276,
+      "step": 3471
+    },
+    {
+      "epoch": 0.03472,
+      "grad_norm": 0.8368294781332597,
+      "learning_rate": 0.003,
+      "loss": 4.1132,
+      "step": 3472
+    },
+    {
+      "epoch": 0.03473,
+      "grad_norm": 0.8757089713498055,
+      "learning_rate": 0.003,
+      "loss": 4.1133,
+      "step": 3473
+    },
+    {
+      "epoch": 0.03474,
+      "grad_norm": 0.8138215418551573,
+      "learning_rate": 0.003,
+      "loss": 4.1375,
+      "step": 3474
+    },
+    {
+      "epoch": 0.03475,
+      "grad_norm": 0.9122035240346946,
+      "learning_rate": 0.003,
+      "loss": 4.1102,
+      "step": 3475
+    },
+    {
+      "epoch": 0.03476,
+      "grad_norm": 1.0107344329113028,
+      "learning_rate": 0.003,
+      "loss": 4.1232,
+      "step": 3476
+    },
+    {
+      "epoch": 0.03477,
+      "grad_norm": 1.0671405774179539,
+      "learning_rate": 0.003,
+      "loss": 4.1215,
+      "step": 3477
+    },
+    {
+      "epoch": 0.03478,
+      "grad_norm": 1.011473839771263,
+      "learning_rate": 0.003,
+      "loss": 4.128,
+      "step": 3478
+    },
+    {
+      "epoch": 0.03479,
+      "grad_norm": 1.0143839143452251,
+      "learning_rate": 0.003,
+      "loss": 4.1035,
+      "step": 3479
+    },
+    {
+      "epoch": 0.0348,
+      "grad_norm": 1.01956672356136,
+      "learning_rate": 0.003,
+      "loss": 4.1097,
+      "step": 3480
+    },
+    {
+      "epoch": 0.03481,
+      "grad_norm": 0.8544300973866115,
+      "learning_rate": 0.003,
+      "loss": 4.1174,
+      "step": 3481
+    },
+    {
+      "epoch": 0.03482,
+      "grad_norm": 1.0322476593810674,
+      "learning_rate": 0.003,
+      "loss": 4.133,
+      "step": 3482
+    },
+    {
+      "epoch": 0.03483,
+      "grad_norm": 1.1510621333444928,
+      "learning_rate": 0.003,
+      "loss": 4.1164,
+      "step": 3483
+    },
+    {
+      "epoch": 0.03484,
+      "grad_norm": 0.929657295041821,
+      "learning_rate": 0.003,
+      "loss": 4.1548,
+      "step": 3484
+    },
+    {
+      "epoch": 0.03485,
+      "grad_norm": 0.9553453027677797,
+      "learning_rate": 0.003,
+      "loss": 4.146,
+      "step": 3485
+    },
+    {
+      "epoch": 0.03486,
+      "grad_norm": 0.9940865088692804,
+      "learning_rate": 0.003,
+      "loss": 4.1221,
+      "step": 3486
+    },
+    {
+      "epoch": 0.03487,
+      "grad_norm": 0.9250098589178641,
+      "learning_rate": 0.003,
+      "loss": 4.1107,
+      "step": 3487
+    },
+    {
+      "epoch": 0.03488,
+      "grad_norm": 0.9460673526858474,
+      "learning_rate": 0.003,
+      "loss": 4.111,
+      "step": 3488
+    },
+    {
+      "epoch": 0.03489,
+      "grad_norm": 0.9687600332195162,
+      "learning_rate": 0.003,
+      "loss": 4.1276,
+      "step": 3489
+    },
+    {
+      "epoch": 0.0349,
+      "grad_norm": 1.0784208838544622,
+      "learning_rate": 0.003,
+      "loss": 4.1277,
+      "step": 3490
+    },
+    {
+      "epoch": 0.03491,
+      "grad_norm": 0.9346645940080963,
+      "learning_rate": 0.003,
+      "loss": 4.1409,
+      "step": 3491
+    },
+    {
+      "epoch": 0.03492,
+      "grad_norm": 0.8257240466605443,
+      "learning_rate": 0.003,
+      "loss": 4.1242,
+      "step": 3492
+    },
+    {
+      "epoch": 0.03493,
+      "grad_norm": 0.6651031888705147,
+      "learning_rate": 0.003,
+      "loss": 4.1079,
+      "step": 3493
+    },
+    {
+      "epoch": 0.03494,
+      "grad_norm": 0.6641292093348049,
+      "learning_rate": 0.003,
+      "loss": 4.1103,
+      "step": 3494
+    },
+    {
+      "epoch": 0.03495,
+      "grad_norm": 0.7091189628573692,
+      "learning_rate": 0.003,
+      "loss": 4.1192,
+      "step": 3495
+    },
+    {
+      "epoch": 0.03496,
+      "grad_norm": 0.6860863869815901,
+      "learning_rate": 0.003,
+      "loss": 4.0904,
+      "step": 3496
+    },
+    {
+      "epoch": 0.03497,
+      "grad_norm": 0.6717227669489184,
+      "learning_rate": 0.003,
+      "loss": 4.1269,
+      "step": 3497
+    },
+    {
+      "epoch": 0.03498,
+      "grad_norm": 0.6858788932767997,
+      "learning_rate": 0.003,
+      "loss": 4.098,
+      "step": 3498
+    },
+    {
+      "epoch": 0.03499,
+      "grad_norm": 0.6840439194259218,
+      "learning_rate": 0.003,
+      "loss": 4.1009,
+      "step": 3499
+    },
+    {
+      "epoch": 0.035,
+      "grad_norm": 0.7718701545173198,
+      "learning_rate": 0.003,
+      "loss": 4.1217,
+      "step": 3500
+    },
+    {
+      "epoch": 0.03501,
+      "grad_norm": 0.9467030386692938,
+      "learning_rate": 0.003,
+      "loss": 4.1179,
+      "step": 3501
+    },
+    {
+      "epoch": 0.03502,
+      "grad_norm": 1.1344054626766904,
+      "learning_rate": 0.003,
+      "loss": 4.1239,
+      "step": 3502
+    },
+    {
+      "epoch": 0.03503,
+      "grad_norm": 0.8227576423331606,
+      "learning_rate": 0.003,
+      "loss": 4.1319,
+      "step": 3503
+    },
+    {
+      "epoch": 0.03504,
+      "grad_norm": 0.7907161997976049,
+      "learning_rate": 0.003,
+      "loss": 4.103,
+      "step": 3504
+    },
+    {
+      "epoch": 0.03505,
+      "grad_norm": 0.7949755989311121,
+      "learning_rate": 0.003,
+      "loss": 4.1199,
+      "step": 3505
+    },
+    {
+      "epoch": 0.03506,
+      "grad_norm": 0.8242114597731752,
+      "learning_rate": 0.003,
+      "loss": 4.1307,
+      "step": 3506
+    },
+    {
+      "epoch": 0.03507,
+      "grad_norm": 0.827615657879298,
+      "learning_rate": 0.003,
+      "loss": 4.1076,
+      "step": 3507
+    },
+    {
+      "epoch": 0.03508,
+      "grad_norm": 0.8269261240893123,
+      "learning_rate": 0.003,
+      "loss": 4.0859,
+      "step": 3508
+    },
+    {
+      "epoch": 0.03509,
+      "grad_norm": 0.8828534433530547,
+      "learning_rate": 0.003,
+      "loss": 4.1022,
+      "step": 3509
+    },
+    {
+      "epoch": 0.0351,
+      "grad_norm": 1.0949297929244133,
+      "learning_rate": 0.003,
+      "loss": 4.0996,
+      "step": 3510
+    },
+    {
+      "epoch": 0.03511,
+      "grad_norm": 0.8474490719439868,
+      "learning_rate": 0.003,
+      "loss": 4.1094,
+      "step": 3511
+    },
+    {
+      "epoch": 0.03512,
+      "grad_norm": 0.6104860735283346,
+      "learning_rate": 0.003,
+      "loss": 4.0928,
+      "step": 3512
+    },
+    {
+      "epoch": 0.03513,
+      "grad_norm": 0.6050004310996162,
+      "learning_rate": 0.003,
+      "loss": 4.0934,
+      "step": 3513
+    },
+    {
+      "epoch": 0.03514,
+      "grad_norm": 0.6520473843771001,
+      "learning_rate": 0.003,
+      "loss": 4.1051,
+      "step": 3514
+    },
+    {
+      "epoch": 0.03515,
+      "grad_norm": 0.6055869327734182,
+      "learning_rate": 0.003,
+      "loss": 4.1134,
+      "step": 3515
+    },
+    {
+      "epoch": 0.03516,
+      "grad_norm": 0.6086175138721427,
+      "learning_rate": 0.003,
+      "loss": 4.136,
+      "step": 3516
+    },
+    {
+      "epoch": 0.03517,
+      "grad_norm": 0.6697942755212305,
+      "learning_rate": 0.003,
+      "loss": 4.0895,
+      "step": 3517
+    },
+    {
+      "epoch": 0.03518,
+      "grad_norm": 0.7287441549864627,
+      "learning_rate": 0.003,
+      "loss": 4.0785,
+      "step": 3518
+    },
+    {
+      "epoch": 0.03519,
+      "grad_norm": 0.6795543307624995,
+      "learning_rate": 0.003,
+      "loss": 4.1231,
+      "step": 3519
+    },
+    {
+      "epoch": 0.0352,
+      "grad_norm": 0.5583215165075127,
+      "learning_rate": 0.003,
+      "loss": 4.0972,
+      "step": 3520
+    },
+    {
+      "epoch": 0.03521,
+      "grad_norm": 0.6145999630971761,
+      "learning_rate": 0.003,
+      "loss": 4.0939,
+      "step": 3521
+    },
+    {
+      "epoch": 0.03522,
+      "grad_norm": 0.6235299979345595,
+      "learning_rate": 0.003,
+      "loss": 4.0958,
+      "step": 3522
+    },
+    {
+      "epoch": 0.03523,
+      "grad_norm": 0.6982274043250706,
+      "learning_rate": 0.003,
+      "loss": 4.0634,
+      "step": 3523
+    },
+    {
+      "epoch": 0.03524,
+      "grad_norm": 0.8135221205032699,
+      "learning_rate": 0.003,
+      "loss": 4.1134,
+      "step": 3524
+    },
+    {
+      "epoch": 0.03525,
+      "grad_norm": 0.9550932431193345,
+      "learning_rate": 0.003,
+      "loss": 4.0866,
+      "step": 3525
+    },
+    {
+      "epoch": 0.03526,
+      "grad_norm": 0.9769559861681798,
+      "learning_rate": 0.003,
+      "loss": 4.0996,
+      "step": 3526
+    },
+    {
+      "epoch": 0.03527,
+      "grad_norm": 0.9916459776325028,
+      "learning_rate": 0.003,
+      "loss": 4.1281,
+      "step": 3527
+    },
+    {
+      "epoch": 0.03528,
+      "grad_norm": 1.161858614363555,
+      "learning_rate": 0.003,
+      "loss": 4.1182,
+      "step": 3528
+    },
+    {
+      "epoch": 0.03529,
+      "grad_norm": 0.8840893570988297,
+      "learning_rate": 0.003,
+      "loss": 4.1083,
+      "step": 3529
+    },
+    {
+      "epoch": 0.0353,
+      "grad_norm": 0.9045833698319836,
+      "learning_rate": 0.003,
+      "loss": 4.1059,
+      "step": 3530
+    },
+    {
+      "epoch": 0.03531,
+      "grad_norm": 1.0824064864090153,
+      "learning_rate": 0.003,
+      "loss": 4.1235,
+      "step": 3531
+    },
+    {
+      "epoch": 0.03532,
+      "grad_norm": 1.04041676924932,
+      "learning_rate": 0.003,
+      "loss": 4.1238,
+      "step": 3532
+    },
+    {
+      "epoch": 0.03533,
+      "grad_norm": 1.0971254119119307,
+      "learning_rate": 0.003,
+      "loss": 4.1274,
+      "step": 3533
+    },
+    {
+      "epoch": 0.03534,
+      "grad_norm": 0.9892749396028874,
+      "learning_rate": 0.003,
+      "loss": 4.1235,
+      "step": 3534
+    },
+    {
+      "epoch": 0.03535,
+      "grad_norm": 0.8985518501036314,
+      "learning_rate": 0.003,
+      "loss": 4.0881,
+      "step": 3535
+    },
+    {
+      "epoch": 0.03536,
+      "grad_norm": 0.7993862708923696,
+      "learning_rate": 0.003,
+      "loss": 4.0885,
+      "step": 3536
+    },
+    {
+      "epoch": 0.03537,
+      "grad_norm": 0.8735445800029786,
+      "learning_rate": 0.003,
+      "loss": 4.1018,
+      "step": 3537
+    },
+    {
+      "epoch": 0.03538,
+      "grad_norm": 0.9507196424596396,
+      "learning_rate": 0.003,
+      "loss": 4.1074,
+      "step": 3538
+    },
+    {
+      "epoch": 0.03539,
+      "grad_norm": 0.9320860917835899,
+      "learning_rate": 0.003,
+      "loss": 4.0979,
+      "step": 3539
+    },
+    {
+      "epoch": 0.0354,
+      "grad_norm": 1.0666914195552315,
+      "learning_rate": 0.003,
+      "loss": 4.1405,
+      "step": 3540
+    },
+    {
+      "epoch": 0.03541,
+      "grad_norm": 0.9666553633993341,
+      "learning_rate": 0.003,
+      "loss": 4.1236,
+      "step": 3541
+    },
+    {
+      "epoch": 0.03542,
+      "grad_norm": 1.075546280507871,
+      "learning_rate": 0.003,
+      "loss": 4.1084,
+      "step": 3542
+    },
+    {
+      "epoch": 0.03543,
+      "grad_norm": 0.8581109533371994,
+      "learning_rate": 0.003,
+      "loss": 4.1149,
+      "step": 3543
+    },
+    {
+      "epoch": 0.03544,
+      "grad_norm": 0.8504542868770893,
+      "learning_rate": 0.003,
+      "loss": 4.1221,
+      "step": 3544
+    },
+    {
+      "epoch": 0.03545,
+      "grad_norm": 0.8221740518533669,
+      "learning_rate": 0.003,
+      "loss": 4.0889,
+      "step": 3545
+    },
+    {
+      "epoch": 0.03546,
+      "grad_norm": 0.856238149878472,
+      "learning_rate": 0.003,
+      "loss": 4.1099,
+      "step": 3546
+    },
+    {
+      "epoch": 0.03547,
+      "grad_norm": 0.8136205837732846,
+      "learning_rate": 0.003,
+      "loss": 4.1181,
+      "step": 3547
+    },
+    {
+      "epoch": 0.03548,
+      "grad_norm": 0.9120415275886503,
+      "learning_rate": 0.003,
+      "loss": 4.1234,
+      "step": 3548
+    },
+    {
+      "epoch": 0.03549,
+      "grad_norm": 0.8741924577842357,
+      "learning_rate": 0.003,
+      "loss": 4.1189,
+      "step": 3549
+    },
+    {
+      "epoch": 0.0355,
+      "grad_norm": 0.964690101454288,
+      "learning_rate": 0.003,
+      "loss": 4.0735,
+      "step": 3550
+    },
+    {
+      "epoch": 0.03551,
+      "grad_norm": 1.1078408326241436,
+      "learning_rate": 0.003,
+      "loss": 4.1406,
+      "step": 3551
+    },
+    {
+      "epoch": 0.03552,
+      "grad_norm": 0.8276304700566389,
+      "learning_rate": 0.003,
+      "loss": 4.0932,
+      "step": 3552
+    },
+    {
+      "epoch": 0.03553,
+      "grad_norm": 0.6988104369223903,
+      "learning_rate": 0.003,
+      "loss": 4.1033,
+      "step": 3553
+    },
+    {
+      "epoch": 0.03554,
+      "grad_norm": 0.8099473741524069,
+      "learning_rate": 0.003,
+      "loss": 4.1096,
+      "step": 3554
+    },
+    {
+      "epoch": 0.03555,
+      "grad_norm": 0.7657626977957569,
+      "learning_rate": 0.003,
+      "loss": 4.1203,
+      "step": 3555
+    },
+    {
+      "epoch": 0.03556,
+      "grad_norm": 0.6884161752719029,
+      "learning_rate": 0.003,
+      "loss": 4.1008,
+      "step": 3556
+    },
+    {
+      "epoch": 0.03557,
+      "grad_norm": 0.6257341319138348,
+      "learning_rate": 0.003,
+      "loss": 4.0964,
+      "step": 3557
+    },
+    {
+      "epoch": 0.03558,
+      "grad_norm": 0.6075327774193832,
+      "learning_rate": 0.003,
+      "loss": 4.1258,
+      "step": 3558
+    },
+    {
+      "epoch": 0.03559,
+      "grad_norm": 0.7848274408146922,
+      "learning_rate": 0.003,
+      "loss": 4.1122,
+      "step": 3559
+    },
+    {
+      "epoch": 0.0356,
+      "grad_norm": 1.0374578386942082,
+      "learning_rate": 0.003,
+      "loss": 4.1241,
+      "step": 3560
+    },
+    {
+      "epoch": 0.03561,
+      "grad_norm": 1.0758010083557943,
+      "learning_rate": 0.003,
+      "loss": 4.1508,
+      "step": 3561
+    },
+    {
+      "epoch": 0.03562,
+      "grad_norm": 0.7322873944575906,
+      "learning_rate": 0.003,
+      "loss": 4.1072,
+      "step": 3562
+    },
+    {
+      "epoch": 0.03563,
+      "grad_norm": 0.7971284325210378,
+      "learning_rate": 0.003,
+      "loss": 4.1392,
+      "step": 3563
+    },
+    {
+      "epoch": 0.03564,
+      "grad_norm": 1.0246051569009555,
+      "learning_rate": 0.003,
+      "loss": 4.1233,
+      "step": 3564
+    },
+    {
+      "epoch": 0.03565,
+      "grad_norm": 1.2267527206427153,
+      "learning_rate": 0.003,
+      "loss": 4.1252,
+      "step": 3565
+    },
+    {
+      "epoch": 0.03566,
+      "grad_norm": 0.8984275051065349,
+      "learning_rate": 0.003,
+      "loss": 4.1198,
+      "step": 3566
+    },
+    {
+      "epoch": 0.03567,
+      "grad_norm": 0.9814381465438691,
+      "learning_rate": 0.003,
+      "loss": 4.1164,
+      "step": 3567
+    },
+    {
+      "epoch": 0.03568,
+      "grad_norm": 0.862338747629593,
+      "learning_rate": 0.003,
+      "loss": 4.1125,
+      "step": 3568
+    },
+    {
+      "epoch": 0.03569,
+      "grad_norm": 0.8507776080373626,
+      "learning_rate": 0.003,
+      "loss": 4.1044,
+      "step": 3569
+    },
+    {
+      "epoch": 0.0357,
+      "grad_norm": 0.8450119567501945,
+      "learning_rate": 0.003,
+      "loss": 4.108,
+      "step": 3570
+    },
+    {
+      "epoch": 0.03571,
+      "grad_norm": 0.8474288365898862,
+      "learning_rate": 0.003,
+      "loss": 4.1151,
+      "step": 3571
+    },
+    {
+      "epoch": 0.03572,
+      "grad_norm": 0.8860990170054822,
+      "learning_rate": 0.003,
+      "loss": 4.1365,
+      "step": 3572
+    },
+    {
+      "epoch": 0.03573,
+      "grad_norm": 0.9378112912832053,
+      "learning_rate": 0.003,
+      "loss": 4.1084,
+      "step": 3573
+    },
+    {
+      "epoch": 0.03574,
+      "grad_norm": 0.7793011201012979,
+      "learning_rate": 0.003,
+      "loss": 4.1265,
+      "step": 3574
+    },
+    {
+      "epoch": 0.03575,
+      "grad_norm": 0.6347551630020188,
+      "learning_rate": 0.003,
+      "loss": 4.099,
+      "step": 3575
+    },
+    {
+      "epoch": 0.03576,
+      "grad_norm": 0.6273033455713914,
+      "learning_rate": 0.003,
+      "loss": 4.0843,
+      "step": 3576
+    },
+    {
+      "epoch": 0.03577,
+      "grad_norm": 0.5948625546551696,
+      "learning_rate": 0.003,
+      "loss": 4.1162,
+      "step": 3577
+    },
+    {
+      "epoch": 0.03578,
+      "grad_norm": 0.5805191268794814,
+      "learning_rate": 0.003,
+      "loss": 4.1044,
+      "step": 3578
+    },
+    {
+      "epoch": 0.03579,
+      "grad_norm": 0.5381920460033986,
+      "learning_rate": 0.003,
+      "loss": 4.1175,
+      "step": 3579
+    },
+    {
+      "epoch": 0.0358,
+      "grad_norm": 0.7081167212858254,
+      "learning_rate": 0.003,
+      "loss": 4.0774,
+      "step": 3580
+    },
+    {
+      "epoch": 0.03581,
+      "grad_norm": 0.7386340619770309,
+      "learning_rate": 0.003,
+      "loss": 4.0886,
+      "step": 3581
+    },
+    {
+      "epoch": 0.03582,
+      "grad_norm": 0.7587714591488295,
+      "learning_rate": 0.003,
+      "loss": 4.0906,
+      "step": 3582
+    },
+    {
+      "epoch": 0.03583,
+      "grad_norm": 0.7113793632761408,
+      "learning_rate": 0.003,
+      "loss": 4.0903,
+      "step": 3583
+    },
+    {
+      "epoch": 0.03584,
+      "grad_norm": 0.7188054852978065,
+      "learning_rate": 0.003,
+      "loss": 4.0791,
+      "step": 3584
+    },
+    {
+      "epoch": 0.03585,
+      "grad_norm": 0.8853261674987007,
+      "learning_rate": 0.003,
+      "loss": 4.0815,
+      "step": 3585
+    },
+    {
+      "epoch": 0.03586,
+      "grad_norm": 0.92627880930667,
+      "learning_rate": 0.003,
+      "loss": 4.1062,
+      "step": 3586
+    },
+    {
+      "epoch": 0.03587,
+      "grad_norm": 0.9331834952096867,
+      "learning_rate": 0.003,
+      "loss": 4.105,
+      "step": 3587
+    },
+    {
+      "epoch": 0.03588,
+      "grad_norm": 1.056355930051097,
+      "learning_rate": 0.003,
+      "loss": 4.1099,
+      "step": 3588
+    },
+    {
+      "epoch": 0.03589,
+      "grad_norm": 0.8554656185849556,
+      "learning_rate": 0.003,
+      "loss": 4.1212,
+      "step": 3589
+    },
+    {
+      "epoch": 0.0359,
+      "grad_norm": 0.7792232208377492,
+      "learning_rate": 0.003,
+      "loss": 4.0836,
+      "step": 3590
+    },
+    {
+      "epoch": 0.03591,
+      "grad_norm": 0.9342912310042636,
+      "learning_rate": 0.003,
+      "loss": 4.1217,
+      "step": 3591
+    },
+    {
+      "epoch": 0.03592,
+      "grad_norm": 1.0503229079193128,
+      "learning_rate": 0.003,
+      "loss": 4.1126,
+      "step": 3592
+    },
+    {
+      "epoch": 0.03593,
+      "grad_norm": 0.984608248076784,
+      "learning_rate": 0.003,
+      "loss": 4.1077,
+      "step": 3593
+    },
+    {
+      "epoch": 0.03594,
+      "grad_norm": 0.9992015208659966,
+      "learning_rate": 0.003,
+      "loss": 4.1142,
+      "step": 3594
+    },
+    {
+      "epoch": 0.03595,
+      "grad_norm": 0.9486124739849024,
+      "learning_rate": 0.003,
+      "loss": 4.1062,
+      "step": 3595
+    },
+    {
+      "epoch": 0.03596,
+      "grad_norm": 0.8044450679529417,
+      "learning_rate": 0.003,
+      "loss": 4.1375,
+      "step": 3596
+    },
+    {
+      "epoch": 0.03597,
+      "grad_norm": 0.7529472117586564,
+      "learning_rate": 0.003,
+      "loss": 4.1092,
+      "step": 3597
+    },
+    {
+      "epoch": 0.03598,
+      "grad_norm": 0.8016519620578548,
+      "learning_rate": 0.003,
+      "loss": 4.1136,
+      "step": 3598
+    },
+    {
+      "epoch": 0.03599,
+      "grad_norm": 0.8552258797660491,
+      "learning_rate": 0.003,
+      "loss": 4.1107,
+      "step": 3599
+    },
+    {
+      "epoch": 0.036,
+      "grad_norm": 1.122743201613801,
+      "learning_rate": 0.003,
+      "loss": 4.1422,
+      "step": 3600
+    },
+    {
+      "epoch": 0.03601,
+      "grad_norm": 0.9558780975359691,
+      "learning_rate": 0.003,
+      "loss": 4.1108,
+      "step": 3601
+    },
+    {
+      "epoch": 0.03602,
+      "grad_norm": 1.0399279121526195,
+      "learning_rate": 0.003,
+      "loss": 4.1008,
+      "step": 3602
+    },
+    {
+      "epoch": 0.03603,
+      "grad_norm": 1.1551062763334126,
+      "learning_rate": 0.003,
+      "loss": 4.0958,
+      "step": 3603
+    },
+    {
+      "epoch": 0.03604,
+      "grad_norm": 1.0548506296076239,
+      "learning_rate": 0.003,
+      "loss": 4.1313,
+      "step": 3604
+    },
+    {
+      "epoch": 0.03605,
+      "grad_norm": 1.001983402921064,
+      "learning_rate": 0.003,
+      "loss": 4.1215,
+      "step": 3605
+    },
+    {
+      "epoch": 0.03606,
+      "grad_norm": 1.0312319108923618,
+      "learning_rate": 0.003,
+      "loss": 4.1116,
+      "step": 3606
+    },
+    {
+      "epoch": 0.03607,
+      "grad_norm": 0.8219603208628181,
+      "learning_rate": 0.003,
+      "loss": 4.1019,
+      "step": 3607
+    },
+    {
+      "epoch": 0.03608,
+      "grad_norm": 0.6498677677486123,
+      "learning_rate": 0.003,
+      "loss": 4.0969,
+      "step": 3608
+    },
+    {
+      "epoch": 0.03609,
+      "grad_norm": 0.7278852617397139,
+      "learning_rate": 0.003,
+      "loss": 4.1257,
+      "step": 3609
+    },
+    {
+      "epoch": 0.0361,
+      "grad_norm": 0.6666501580008998,
+      "learning_rate": 0.003,
+      "loss": 4.126,
+      "step": 3610
+    },
+    {
+      "epoch": 0.03611,
+      "grad_norm": 0.6972920943122692,
+      "learning_rate": 0.003,
+      "loss": 4.0758,
+      "step": 3611
+    },
+    {
+      "epoch": 0.03612,
+      "grad_norm": 0.8293279199659314,
+      "learning_rate": 0.003,
+      "loss": 4.0764,
+      "step": 3612
+    },
+    {
+      "epoch": 0.03613,
+      "grad_norm": 0.8280160598898497,
+      "learning_rate": 0.003,
+      "loss": 4.1168,
+      "step": 3613
+    },
+    {
+      "epoch": 0.03614,
+      "grad_norm": 0.8586511775157879,
+      "learning_rate": 0.003,
+      "loss": 4.0947,
+      "step": 3614
+    },
+    {
+      "epoch": 0.03615,
+      "grad_norm": 0.8273149005356012,
+      "learning_rate": 0.003,
+      "loss": 4.1065,
+      "step": 3615
+    },
+    {
+      "epoch": 0.03616,
+      "grad_norm": 0.7395998497715127,
+      "learning_rate": 0.003,
+      "loss": 4.1143,
+      "step": 3616
+    },
+    {
+      "epoch": 0.03617,
+      "grad_norm": 0.6904839766505554,
+      "learning_rate": 0.003,
+      "loss": 4.1067,
+      "step": 3617
+    },
+    {
+      "epoch": 0.03618,
+      "grad_norm": 0.7602021438934125,
+      "learning_rate": 0.003,
+      "loss": 4.0988,
+      "step": 3618
+    },
+    {
+      "epoch": 0.03619,
+      "grad_norm": 0.9970287144190367,
+      "learning_rate": 0.003,
+      "loss": 4.1035,
+      "step": 3619
+    },
+    {
+      "epoch": 0.0362,
+      "grad_norm": 1.0227512354316275,
+      "learning_rate": 0.003,
+      "loss": 4.1153,
+      "step": 3620
+    },
+    {
+      "epoch": 0.03621,
+      "grad_norm": 0.8480434130474432,
+      "learning_rate": 0.003,
+      "loss": 4.112,
+      "step": 3621
+    },
+    {
+      "epoch": 0.03622,
+      "grad_norm": 0.9271509010196743,
+      "learning_rate": 0.003,
+      "loss": 4.0842,
+      "step": 3622
+    },
+    {
+      "epoch": 0.03623,
+      "grad_norm": 1.022948927798352,
+      "learning_rate": 0.003,
+      "loss": 4.0792,
+      "step": 3623
+    },
+    {
+      "epoch": 0.03624,
+      "grad_norm": 0.9265641312425992,
+      "learning_rate": 0.003,
+      "loss": 4.0852,
+      "step": 3624
+    },
+    {
+      "epoch": 0.03625,
+      "grad_norm": 0.7813804518175715,
+      "learning_rate": 0.003,
+      "loss": 4.1199,
+      "step": 3625
+    },
+    {
+      "epoch": 0.03626,
+      "grad_norm": 0.7288040056584648,
+      "learning_rate": 0.003,
+      "loss": 4.0939,
+      "step": 3626
+    },
+    {
+      "epoch": 0.03627,
+      "grad_norm": 0.7898367328534155,
+      "learning_rate": 0.003,
+      "loss": 4.0953,
+      "step": 3627
+    },
+    {
+      "epoch": 0.03628,
+      "grad_norm": 0.7391500282099692,
+      "learning_rate": 0.003,
+      "loss": 4.1039,
+      "step": 3628
+    },
+    {
+      "epoch": 0.03629,
+      "grad_norm": 0.7244163565447427,
+      "learning_rate": 0.003,
+      "loss": 4.1225,
+      "step": 3629
+    },
+    {
+      "epoch": 0.0363,
+      "grad_norm": 0.8351668360599707,
+      "learning_rate": 0.003,
+      "loss": 4.108,
+      "step": 3630
+    },
+    {
+      "epoch": 0.03631,
+      "grad_norm": 1.1144221832925818,
+      "learning_rate": 0.003,
+      "loss": 4.1113,
+      "step": 3631
+    },
+    {
+      "epoch": 0.03632,
+      "grad_norm": 0.9360712274389132,
+      "learning_rate": 0.003,
+      "loss": 4.1011,
+      "step": 3632
+    },
+    {
+      "epoch": 0.03633,
+      "grad_norm": 0.9876936083254465,
+      "learning_rate": 0.003,
+      "loss": 4.1331,
+      "step": 3633
+    },
+    {
+      "epoch": 0.03634,
+      "grad_norm": 0.955681965536761,
+      "learning_rate": 0.003,
+      "loss": 4.1088,
+      "step": 3634
+    },
+    {
+      "epoch": 0.03635,
+      "grad_norm": 0.9023651977998233,
+      "learning_rate": 0.003,
+      "loss": 4.0924,
+      "step": 3635
+    },
+    {
+      "epoch": 0.03636,
+      "grad_norm": 0.8274154329119018,
+      "learning_rate": 0.003,
+      "loss": 4.1097,
+      "step": 3636
+    },
+    {
+      "epoch": 0.03637,
+      "grad_norm": 0.9898592340376765,
+      "learning_rate": 0.003,
+      "loss": 4.1049,
+      "step": 3637
+    },
+    {
+      "epoch": 0.03638,
+      "grad_norm": 0.961405507097599,
+      "learning_rate": 0.003,
+      "loss": 4.1193,
+      "step": 3638
+    },
+    {
+      "epoch": 0.03639,
+      "grad_norm": 1.0204413055029042,
+      "learning_rate": 0.003,
+      "loss": 4.145,
+      "step": 3639
+    },
+    {
+      "epoch": 0.0364,
+      "grad_norm": 0.9674075028743515,
+      "learning_rate": 0.003,
+      "loss": 4.1379,
+      "step": 3640
+    },
+    {
+      "epoch": 0.03641,
+      "grad_norm": 1.0768346328712466,
+      "learning_rate": 0.003,
+      "loss": 4.125,
+      "step": 3641
+    },
+    {
+      "epoch": 0.03642,
+      "grad_norm": 0.9224514479840723,
+      "learning_rate": 0.003,
+      "loss": 4.1408,
+      "step": 3642
+    },
+    {
+      "epoch": 0.03643,
+      "grad_norm": 1.0336265072437074,
+      "learning_rate": 0.003,
+      "loss": 4.1059,
+      "step": 3643
+    },
+    {
+      "epoch": 0.03644,
+      "grad_norm": 0.8524724447399462,
+      "learning_rate": 0.003,
+      "loss": 4.1268,
+      "step": 3644
+    },
+    {
+      "epoch": 0.03645,
+      "grad_norm": 0.746873753702558,
+      "learning_rate": 0.003,
+      "loss": 4.0924,
+      "step": 3645
+    },
+    {
+      "epoch": 0.03646,
+      "grad_norm": 0.8800991852578832,
+      "learning_rate": 0.003,
+      "loss": 4.1164,
+      "step": 3646
+    },
+    {
+      "epoch": 0.03647,
+      "grad_norm": 0.9340999264767859,
+      "learning_rate": 0.003,
+      "loss": 4.1139,
+      "step": 3647
+    },
+    {
+      "epoch": 0.03648,
+      "grad_norm": 0.9312651206865153,
+      "learning_rate": 0.003,
+      "loss": 4.1023,
+      "step": 3648
+    },
+    {
+      "epoch": 0.03649,
+      "grad_norm": 0.9165122392124831,
+      "learning_rate": 0.003,
+      "loss": 4.1262,
+      "step": 3649
+    },
+    {
+      "epoch": 0.0365,
+      "grad_norm": 0.956526663147831,
+      "learning_rate": 0.003,
+      "loss": 4.1081,
+      "step": 3650
+    },
+    {
+      "epoch": 0.03651,
+      "grad_norm": 0.8484726129203977,
+      "learning_rate": 0.003,
+      "loss": 4.1246,
+      "step": 3651
+    },
+    {
+      "epoch": 0.03652,
+      "grad_norm": 0.7894465078154086,
+      "learning_rate": 0.003,
+      "loss": 4.1058,
+      "step": 3652
+    },
+    {
+      "epoch": 0.03653,
+      "grad_norm": 0.6892303382737016,
+      "learning_rate": 0.003,
+      "loss": 4.0903,
+      "step": 3653
+    },
+    {
+      "epoch": 0.03654,
+      "grad_norm": 0.6682166624701149,
+      "learning_rate": 0.003,
+      "loss": 4.0881,
+      "step": 3654
+    },
+    {
+      "epoch": 0.03655,
+      "grad_norm": 0.6644222696456153,
+      "learning_rate": 0.003,
+      "loss": 4.1114,
+      "step": 3655
+    },
+    {
+      "epoch": 0.03656,
+      "grad_norm": 0.6972374084601054,
+      "learning_rate": 0.003,
+      "loss": 4.0992,
+      "step": 3656
+    },
+    {
+      "epoch": 0.03657,
+      "grad_norm": 0.6757860783494947,
+      "learning_rate": 0.003,
+      "loss": 4.071,
+      "step": 3657
+    },
+    {
+      "epoch": 0.03658,
+      "grad_norm": 0.7083328895452327,
+      "learning_rate": 0.003,
+      "loss": 4.1336,
+      "step": 3658
+    },
+    {
+      "epoch": 0.03659,
+      "grad_norm": 0.7718946593018728,
+      "learning_rate": 0.003,
+      "loss": 4.0862,
+      "step": 3659
+    },
+    {
+      "epoch": 0.0366,
+      "grad_norm": 0.7376732318109632,
+      "learning_rate": 0.003,
+      "loss": 4.1142,
+      "step": 3660
+    },
+    {
+      "epoch": 0.03661,
+      "grad_norm": 0.8048594254747962,
+      "learning_rate": 0.003,
+      "loss": 4.0967,
+      "step": 3661
+    },
+    {
+      "epoch": 0.03662,
+      "grad_norm": 0.9041684527522658,
+      "learning_rate": 0.003,
+      "loss": 4.0936,
+      "step": 3662
+    },
+    {
+      "epoch": 0.03663,
+      "grad_norm": 0.8857606200128569,
+      "learning_rate": 0.003,
+      "loss": 4.1121,
+      "step": 3663
+    },
+    {
+      "epoch": 0.03664,
+      "grad_norm": 0.9256759603717064,
+      "learning_rate": 0.003,
+      "loss": 4.0945,
+      "step": 3664
+    },
+    {
+      "epoch": 0.03665,
+      "grad_norm": 0.9238563202936576,
+      "learning_rate": 0.003,
+      "loss": 4.1202,
+      "step": 3665
+    },
+    {
+      "epoch": 0.03666,
+      "grad_norm": 0.8203365130003366,
+      "learning_rate": 0.003,
+      "loss": 4.074,
+      "step": 3666
+    },
+    {
+      "epoch": 0.03667,
+      "grad_norm": 0.9024471801382937,
+      "learning_rate": 0.003,
+      "loss": 4.0876,
+      "step": 3667
+    },
+    {
+      "epoch": 0.03668,
+      "grad_norm": 1.0552967059744773,
+      "learning_rate": 0.003,
+      "loss": 4.1067,
+      "step": 3668
+    },
+    {
+      "epoch": 0.03669,
+      "grad_norm": 1.0820390277779695,
+      "learning_rate": 0.003,
+      "loss": 4.0894,
+      "step": 3669
+    },
+    {
+      "epoch": 0.0367,
+      "grad_norm": 0.9454674065362177,
+      "learning_rate": 0.003,
+      "loss": 4.1199,
+      "step": 3670
+    },
+    {
+      "epoch": 0.03671,
+      "grad_norm": 1.0059330853811892,
+      "learning_rate": 0.003,
+      "loss": 4.1144,
+      "step": 3671
+    },
+    {
+      "epoch": 0.03672,
+      "grad_norm": 1.2723317844660584,
+      "learning_rate": 0.003,
+      "loss": 4.1282,
+      "step": 3672
+    },
+    {
+      "epoch": 0.03673,
+      "grad_norm": 0.8050766891971283,
+      "learning_rate": 0.003,
+      "loss": 4.1214,
+      "step": 3673
+    },
+    {
+      "epoch": 0.03674,
+      "grad_norm": 0.8682507056177615,
+      "learning_rate": 0.003,
+      "loss": 4.0873,
+      "step": 3674
+    },
+    {
+      "epoch": 0.03675,
+      "grad_norm": 1.0276133687150726,
+      "learning_rate": 0.003,
+      "loss": 4.1309,
+      "step": 3675
+    },
+    {
+      "epoch": 0.03676,
+      "grad_norm": 1.0466449018255517,
+      "learning_rate": 0.003,
+      "loss": 4.0901,
+      "step": 3676
+    },
+    {
+      "epoch": 0.03677,
+      "grad_norm": 0.9830229711282336,
+      "learning_rate": 0.003,
+      "loss": 4.1062,
+      "step": 3677
+    },
+    {
+      "epoch": 0.03678,
+      "grad_norm": 0.9217069489116423,
+      "learning_rate": 0.003,
+      "loss": 4.1303,
+      "step": 3678
+    },
+    {
+      "epoch": 0.03679,
+      "grad_norm": 1.0822177384710947,
+      "learning_rate": 0.003,
+      "loss": 4.1232,
+      "step": 3679
+    },
+    {
+      "epoch": 0.0368,
+      "grad_norm": 1.0907486819128838,
+      "learning_rate": 0.003,
+      "loss": 4.1207,
+      "step": 3680
+    },
+    {
+      "epoch": 0.03681,
+      "grad_norm": 0.8227793104650016,
+      "learning_rate": 0.003,
+      "loss": 4.0847,
+      "step": 3681
+    },
+    {
+      "epoch": 0.03682,
+      "grad_norm": 0.7808126016328544,
+      "learning_rate": 0.003,
+      "loss": 4.1234,
+      "step": 3682
+    },
+    {
+      "epoch": 0.03683,
+      "grad_norm": 0.7174400556511353,
+      "learning_rate": 0.003,
+      "loss": 4.0807,
+      "step": 3683
+    },
+    {
+      "epoch": 0.03684,
+      "grad_norm": 0.6775479041332162,
+      "learning_rate": 0.003,
+      "loss": 4.104,
+      "step": 3684
+    },
+    {
+      "epoch": 0.03685,
+      "grad_norm": 0.6420088354614908,
+      "learning_rate": 0.003,
+      "loss": 4.0865,
+      "step": 3685
+    },
+    {
+      "epoch": 0.03686,
+      "grad_norm": 0.598867465128322,
+      "learning_rate": 0.003,
+      "loss": 4.1055,
+      "step": 3686
+    },
+    {
+      "epoch": 0.03687,
+      "grad_norm": 0.6115731068077563,
+      "learning_rate": 0.003,
+      "loss": 4.1238,
+      "step": 3687
+    },
+    {
+      "epoch": 0.03688,
+      "grad_norm": 0.7067511764658732,
+      "learning_rate": 0.003,
+      "loss": 4.1159,
+      "step": 3688
+    },
+    {
+      "epoch": 0.03689,
+      "grad_norm": 0.7020192080483855,
+      "learning_rate": 0.003,
+      "loss": 4.0827,
+      "step": 3689
+    },
+    {
+      "epoch": 0.0369,
+      "grad_norm": 0.6267089096242754,
+      "learning_rate": 0.003,
+      "loss": 4.1099,
+      "step": 3690
+    },
+    {
+      "epoch": 0.03691,
+      "grad_norm": 0.5456674081937926,
+      "learning_rate": 0.003,
+      "loss": 4.0896,
+      "step": 3691
+    },
+    {
+      "epoch": 0.03692,
+      "grad_norm": 0.6082919076452641,
+      "learning_rate": 0.003,
+      "loss": 4.127,
+      "step": 3692
+    },
+    {
+      "epoch": 0.03693,
+      "grad_norm": 0.7939340117807796,
+      "learning_rate": 0.003,
+      "loss": 4.0993,
+      "step": 3693
+    },
+    {
+      "epoch": 0.03694,
+      "grad_norm": 1.0013591289075594,
+      "learning_rate": 0.003,
+      "loss": 4.1044,
+      "step": 3694
+    },
+    {
+      "epoch": 0.03695,
+      "grad_norm": 1.022061020358787,
+      "learning_rate": 0.003,
+      "loss": 4.1043,
+      "step": 3695
+    },
+    {
+      "epoch": 0.03696,
+      "grad_norm": 0.6767445565263814,
+      "learning_rate": 0.003,
+      "loss": 4.0932,
+      "step": 3696
+    },
+    {
+      "epoch": 0.03697,
+      "grad_norm": 0.6543502581869196,
+      "learning_rate": 0.003,
+      "loss": 4.0827,
+      "step": 3697
+    },
+    {
+      "epoch": 0.03698,
+      "grad_norm": 0.898978343569856,
+      "learning_rate": 0.003,
+      "loss": 4.1225,
+      "step": 3698
+    },
+    {
+      "epoch": 0.03699,
+      "grad_norm": 0.8905816524859695,
+      "learning_rate": 0.003,
+      "loss": 4.1114,
+      "step": 3699
+    },
+    {
+      "epoch": 0.037,
+      "grad_norm": 0.7721797452363818,
+      "learning_rate": 0.003,
+      "loss": 4.0978,
+      "step": 3700
+    },
+    {
+      "epoch": 0.03701,
+      "grad_norm": 0.9150903630097929,
+      "learning_rate": 0.003,
+      "loss": 4.1031,
+      "step": 3701
+    },
+    {
+      "epoch": 0.03702,
+      "grad_norm": 0.846227592035782,
+      "learning_rate": 0.003,
+      "loss": 4.0942,
+      "step": 3702
+    },
+    {
+      "epoch": 0.03703,
+      "grad_norm": 0.797196639545616,
+      "learning_rate": 0.003,
+      "loss": 4.105,
+      "step": 3703
+    },
+    {
+      "epoch": 0.03704,
+      "grad_norm": 0.9810543408588123,
+      "learning_rate": 0.003,
+      "loss": 4.1134,
+      "step": 3704
+    },
+    {
+      "epoch": 0.03705,
+      "grad_norm": 1.0722834307776175,
+      "learning_rate": 0.003,
+      "loss": 4.1352,
+      "step": 3705
+    },
+    {
+      "epoch": 0.03706,
+      "grad_norm": 0.9721358338604488,
+      "learning_rate": 0.003,
+      "loss": 4.1164,
+      "step": 3706
+    },
+    {
+      "epoch": 0.03707,
+      "grad_norm": 0.9790738664900162,
+      "learning_rate": 0.003,
+      "loss": 4.1016,
+      "step": 3707
+    },
+    {
+      "epoch": 0.03708,
+      "grad_norm": 0.9353887990568662,
+      "learning_rate": 0.003,
+      "loss": 4.1207,
+      "step": 3708
+    },
+    {
+      "epoch": 0.03709,
+      "grad_norm": 0.9856206989122267,
+      "learning_rate": 0.003,
+      "loss": 4.1007,
+      "step": 3709
+    },
+    {
+      "epoch": 0.0371,
+      "grad_norm": 1.0451787463045008,
+      "learning_rate": 0.003,
+      "loss": 4.138,
+      "step": 3710
+    },
+    {
+      "epoch": 0.03711,
+      "grad_norm": 1.0753305901256527,
+      "learning_rate": 0.003,
+      "loss": 4.1107,
+      "step": 3711
+    },
+    {
+      "epoch": 0.03712,
+      "grad_norm": 0.9432866794357371,
+      "learning_rate": 0.003,
+      "loss": 4.1099,
+      "step": 3712
+    },
+    {
+      "epoch": 0.03713,
+      "grad_norm": 1.0228561402780971,
+      "learning_rate": 0.003,
+      "loss": 4.1028,
+      "step": 3713
+    },
+    {
+      "epoch": 0.03714,
+      "grad_norm": 1.0155399617885337,
+      "learning_rate": 0.003,
+      "loss": 4.1147,
+      "step": 3714
+    },
+    {
+      "epoch": 0.03715,
+      "grad_norm": 0.9501309057637405,
+      "learning_rate": 0.003,
+      "loss": 4.1227,
+      "step": 3715
+    },
+    {
+      "epoch": 0.03716,
+      "grad_norm": 0.8368341993061407,
+      "learning_rate": 0.003,
+      "loss": 4.1051,
+      "step": 3716
+    },
+    {
+      "epoch": 0.03717,
+      "grad_norm": 0.8200844350513596,
+      "learning_rate": 0.003,
+      "loss": 4.1116,
+      "step": 3717
+    },
+    {
+      "epoch": 0.03718,
+      "grad_norm": 0.8192379479874041,
+      "learning_rate": 0.003,
+      "loss": 4.0813,
+      "step": 3718
+    },
+    {
+      "epoch": 0.03719,
+      "grad_norm": 0.9007315714309677,
+      "learning_rate": 0.003,
+      "loss": 4.1138,
+      "step": 3719
+    },
+    {
+      "epoch": 0.0372,
+      "grad_norm": 0.9179074609676963,
+      "learning_rate": 0.003,
+      "loss": 4.1166,
+      "step": 3720
+    },
+    {
+      "epoch": 0.03721,
+      "grad_norm": 0.848049967304013,
+      "learning_rate": 0.003,
+      "loss": 4.1026,
+      "step": 3721
+    },
+    {
+      "epoch": 0.03722,
+      "grad_norm": 0.7009453544906782,
+      "learning_rate": 0.003,
+      "loss": 4.0992,
+      "step": 3722
+    },
+    {
+      "epoch": 0.03723,
+      "grad_norm": 0.6973322955990157,
+      "learning_rate": 0.003,
+      "loss": 4.1295,
+      "step": 3723
+    },
+    {
+      "epoch": 0.03724,
+      "grad_norm": 0.6858684932844772,
+      "learning_rate": 0.003,
+      "loss": 4.1048,
+      "step": 3724
+    },
+    {
+      "epoch": 0.03725,
+      "grad_norm": 0.5759232117551641,
+      "learning_rate": 0.003,
+      "loss": 4.1114,
+      "step": 3725
+    },
+    {
+      "epoch": 0.03726,
+      "grad_norm": 0.7355358643002436,
+      "learning_rate": 0.003,
+      "loss": 4.0888,
+      "step": 3726
+    },
+    {
+      "epoch": 0.03727,
+      "grad_norm": 0.975453002086878,
+      "learning_rate": 0.003,
+      "loss": 4.128,
+      "step": 3727
+    },
+    {
+      "epoch": 0.03728,
+      "grad_norm": 1.2871266145714035,
+      "learning_rate": 0.003,
+      "loss": 4.0871,
+      "step": 3728
+    },
+    {
+      "epoch": 0.03729,
+      "grad_norm": 0.6800175356883061,
+      "learning_rate": 0.003,
+      "loss": 4.1097,
+      "step": 3729
+    },
+    {
+      "epoch": 0.0373,
+      "grad_norm": 0.8995970728462417,
+      "learning_rate": 0.003,
+      "loss": 4.0995,
+      "step": 3730
+    },
+    {
+      "epoch": 0.03731,
+      "grad_norm": 0.8748991092762279,
+      "learning_rate": 0.003,
+      "loss": 4.1225,
+      "step": 3731
+    },
+    {
+      "epoch": 0.03732,
+      "grad_norm": 0.7674871139345869,
+      "learning_rate": 0.003,
+      "loss": 4.1157,
+      "step": 3732
+    },
+    {
+      "epoch": 0.03733,
+      "grad_norm": 0.7725141224382513,
+      "learning_rate": 0.003,
+      "loss": 4.0939,
+      "step": 3733
+    },
+    {
+      "epoch": 0.03734,
+      "grad_norm": 0.923649436345944,
+      "learning_rate": 0.003,
+      "loss": 4.101,
+      "step": 3734
+    },
+    {
+      "epoch": 0.03735,
+      "grad_norm": 0.85452781445576,
+      "learning_rate": 0.003,
+      "loss": 4.0922,
+      "step": 3735
+    },
+    {
+      "epoch": 0.03736,
+      "grad_norm": 0.8691053620191994,
+      "learning_rate": 0.003,
+      "loss": 4.1222,
+      "step": 3736
+    },
+    {
+      "epoch": 0.03737,
+      "grad_norm": 0.8505997578362463,
+      "learning_rate": 0.003,
+      "loss": 4.1089,
+      "step": 3737
+    },
+    {
+      "epoch": 0.03738,
+      "grad_norm": 0.7742946039499282,
+      "learning_rate": 0.003,
+      "loss": 4.1245,
+      "step": 3738
+    },
+    {
+      "epoch": 0.03739,
+      "grad_norm": 0.7994977680328913,
+      "learning_rate": 0.003,
+      "loss": 4.0899,
+      "step": 3739
+    },
+    {
+      "epoch": 0.0374,
+      "grad_norm": 0.8536215986910768,
+      "learning_rate": 0.003,
+      "loss": 4.1105,
+      "step": 3740
+    },
+    {
+      "epoch": 0.03741,
+      "grad_norm": 0.9409826319598602,
+      "learning_rate": 0.003,
+      "loss": 4.0963,
+      "step": 3741
+    },
+    {
+      "epoch": 0.03742,
+      "grad_norm": 0.8593777917195926,
+      "learning_rate": 0.003,
+      "loss": 4.0892,
+      "step": 3742
+    },
+    {
+      "epoch": 0.03743,
+      "grad_norm": 0.7563917031775911,
+      "learning_rate": 0.003,
+      "loss": 4.0831,
+      "step": 3743
+    },
+    {
+      "epoch": 0.03744,
+      "grad_norm": 0.7103064287871557,
+      "learning_rate": 0.003,
+      "loss": 4.1122,
+      "step": 3744
+    },
+    {
+      "epoch": 0.03745,
+      "grad_norm": 0.7615538447680247,
+      "learning_rate": 0.003,
+      "loss": 4.0968,
+      "step": 3745
+    },
+    {
+      "epoch": 0.03746,
+      "grad_norm": 0.9955801956257352,
+      "learning_rate": 0.003,
+      "loss": 4.1095,
+      "step": 3746
+    },
+    {
+      "epoch": 0.03747,
+      "grad_norm": 1.0171288172949948,
+      "learning_rate": 0.003,
+      "loss": 4.1216,
+      "step": 3747
+    },
+    {
+      "epoch": 0.03748,
+      "grad_norm": 0.7896231689985856,
+      "learning_rate": 0.003,
+      "loss": 4.0881,
+      "step": 3748
+    },
+    {
+      "epoch": 0.03749,
+      "grad_norm": 0.6375567183654871,
+      "learning_rate": 0.003,
+      "loss": 4.095,
+      "step": 3749
+    },
+    {
+      "epoch": 0.0375,
+      "grad_norm": 0.7372117367719312,
+      "learning_rate": 0.003,
+      "loss": 4.1199,
+      "step": 3750
+    },
+    {
+      "epoch": 0.03751,
+      "grad_norm": 0.7461639244591386,
+      "learning_rate": 0.003,
+      "loss": 4.0717,
+      "step": 3751
+    },
+    {
+      "epoch": 0.03752,
+      "grad_norm": 0.7827559728941343,
+      "learning_rate": 0.003,
+      "loss": 4.1018,
+      "step": 3752
+    },
+    {
+      "epoch": 0.03753,
+      "grad_norm": 0.7771731000337748,
+      "learning_rate": 0.003,
+      "loss": 4.1046,
+      "step": 3753
+    },
+    {
+      "epoch": 0.03754,
+      "grad_norm": 1.0118496549896825,
+      "learning_rate": 0.003,
+      "loss": 4.1181,
+      "step": 3754
+    },
+    {
+      "epoch": 0.03755,
+      "grad_norm": 1.2993270154749912,
+      "learning_rate": 0.003,
+      "loss": 4.124,
+      "step": 3755
+    },
+    {
+      "epoch": 0.03756,
+      "grad_norm": 0.8028019558848566,
+      "learning_rate": 0.003,
+      "loss": 4.0805,
+      "step": 3756
+    },
+    {
+      "epoch": 0.03757,
+      "grad_norm": 0.9071996665258343,
+      "learning_rate": 0.003,
+      "loss": 4.1147,
+      "step": 3757
+    },
+    {
+      "epoch": 0.03758,
+      "grad_norm": 0.8773892948854304,
+      "learning_rate": 0.003,
+      "loss": 4.0569,
+      "step": 3758
+    },
+    {
+      "epoch": 0.03759,
+      "grad_norm": 0.8198221769200625,
+      "learning_rate": 0.003,
+      "loss": 4.1152,
+      "step": 3759
+    },
+    {
+      "epoch": 0.0376,
+      "grad_norm": 0.8540457248460523,
+      "learning_rate": 0.003,
+      "loss": 4.1047,
+      "step": 3760
+    },
+    {
+      "epoch": 0.03761,
+      "grad_norm": 0.7667243415560708,
+      "learning_rate": 0.003,
+      "loss": 4.071,
+      "step": 3761
+    },
+    {
+      "epoch": 0.03762,
+      "grad_norm": 0.8865050328861852,
+      "learning_rate": 0.003,
+      "loss": 4.1272,
+      "step": 3762
+    },
+    {
+      "epoch": 0.03763,
+      "grad_norm": 0.9591731803168125,
+      "learning_rate": 0.003,
+      "loss": 4.1125,
+      "step": 3763
+    },
+    {
+      "epoch": 0.03764,
+      "grad_norm": 0.9480606119145719,
+      "learning_rate": 0.003,
+      "loss": 4.125,
+      "step": 3764
+    },
+    {
+      "epoch": 0.03765,
+      "grad_norm": 0.9684597356680353,
+      "learning_rate": 0.003,
+      "loss": 4.1473,
+      "step": 3765
+    },
+    {
+      "epoch": 0.03766,
+      "grad_norm": 0.8344731198089695,
+      "learning_rate": 0.003,
+      "loss": 4.0824,
+      "step": 3766
+    },
+    {
+      "epoch": 0.03767,
+      "grad_norm": 0.7765792550643911,
+      "learning_rate": 0.003,
+      "loss": 4.1232,
+      "step": 3767
+    },
+    {
+      "epoch": 0.03768,
+      "grad_norm": 0.7642334739357078,
+      "learning_rate": 0.003,
+      "loss": 4.098,
+      "step": 3768
+    },
+    {
+      "epoch": 0.03769,
+      "grad_norm": 0.7211211499394363,
+      "learning_rate": 0.003,
+      "loss": 4.089,
+      "step": 3769
+    },
+    {
+      "epoch": 0.0377,
+      "grad_norm": 0.6106973291196678,
+      "learning_rate": 0.003,
+      "loss": 4.0668,
+      "step": 3770
+    },
+    {
+      "epoch": 0.03771,
+      "grad_norm": 0.5628256340241241,
+      "learning_rate": 0.003,
+      "loss": 4.0822,
+      "step": 3771
+    },
+    {
+      "epoch": 0.03772,
+      "grad_norm": 0.5845993807831852,
+      "learning_rate": 0.003,
+      "loss": 4.1005,
+      "step": 3772
+    },
+    {
+      "epoch": 0.03773,
+      "grad_norm": 0.6697980766862262,
+      "learning_rate": 0.003,
+      "loss": 4.1105,
+      "step": 3773
+    },
+    {
+      "epoch": 0.03774,
+      "grad_norm": 0.8252901528282155,
+      "learning_rate": 0.003,
+      "loss": 4.0962,
+      "step": 3774
+    },
+    {
+      "epoch": 0.03775,
+      "grad_norm": 0.9517527171183254,
+      "learning_rate": 0.003,
+      "loss": 4.1173,
+      "step": 3775
+    },
+    {
+      "epoch": 0.03776,
+      "grad_norm": 1.0897263182368793,
+      "learning_rate": 0.003,
+      "loss": 4.1013,
+      "step": 3776
+    },
+    {
+      "epoch": 0.03777,
+      "grad_norm": 0.8653535869810033,
+      "learning_rate": 0.003,
+      "loss": 4.1103,
+      "step": 3777
+    },
+    {
+      "epoch": 0.03778,
+      "grad_norm": 0.7342155379460369,
+      "learning_rate": 0.003,
+      "loss": 4.0844,
+      "step": 3778
+    },
+    {
+      "epoch": 0.03779,
+      "grad_norm": 0.7679190695396096,
+      "learning_rate": 0.003,
+      "loss": 4.0992,
+      "step": 3779
+    },
+    {
+      "epoch": 0.0378,
+      "grad_norm": 0.8679609457918785,
+      "learning_rate": 0.003,
+      "loss": 4.1037,
+      "step": 3780
+    },
+    {
+      "epoch": 0.03781,
+      "grad_norm": 1.0049631112041457,
+      "learning_rate": 0.003,
+      "loss": 4.1356,
+      "step": 3781
+    },
+    {
+      "epoch": 0.03782,
+      "grad_norm": 1.092459674146767,
+      "learning_rate": 0.003,
+      "loss": 4.1145,
+      "step": 3782
+    },
+    {
+      "epoch": 0.03783,
+      "grad_norm": 0.9622890806365146,
+      "learning_rate": 0.003,
+      "loss": 4.1132,
+      "step": 3783
+    },
+    {
+      "epoch": 0.03784,
+      "grad_norm": 1.0671033317728673,
+      "learning_rate": 0.003,
+      "loss": 4.1163,
+      "step": 3784
+    },
+    {
+      "epoch": 0.03785,
+      "grad_norm": 1.0407807642051592,
+      "learning_rate": 0.003,
+      "loss": 4.1009,
+      "step": 3785
+    },
+    {
+      "epoch": 0.03786,
+      "grad_norm": 1.125690448525855,
+      "learning_rate": 0.003,
+      "loss": 4.1206,
+      "step": 3786
+    },
+    {
+      "epoch": 0.03787,
+      "grad_norm": 1.0253559423721736,
+      "learning_rate": 0.003,
+      "loss": 4.108,
+      "step": 3787
+    },
+    {
+      "epoch": 0.03788,
+      "grad_norm": 1.0136069570600932,
+      "learning_rate": 0.003,
+      "loss": 4.1383,
+      "step": 3788
+    },
+    {
+      "epoch": 0.03789,
+      "grad_norm": 1.1118575565083433,
+      "learning_rate": 0.003,
+      "loss": 4.0781,
+      "step": 3789
+    },
+    {
+      "epoch": 0.0379,
+      "grad_norm": 0.7950650221590647,
+      "learning_rate": 0.003,
+      "loss": 4.0936,
+      "step": 3790
+    },
+    {
+      "epoch": 0.03791,
+      "grad_norm": 0.6642703034618491,
+      "learning_rate": 0.003,
+      "loss": 4.102,
+      "step": 3791
+    },
+    {
+      "epoch": 0.03792,
+      "grad_norm": 0.6118532884669681,
+      "learning_rate": 0.003,
+      "loss": 4.131,
+      "step": 3792
+    },
+    {
+      "epoch": 0.03793,
+      "grad_norm": 0.6538749880963972,
+      "learning_rate": 0.003,
+      "loss": 4.1243,
+      "step": 3793
+    },
+    {
+      "epoch": 0.03794,
+      "grad_norm": 0.7185276449261756,
+      "learning_rate": 0.003,
+      "loss": 4.1251,
+      "step": 3794
+    },
+    {
+      "epoch": 0.03795,
+      "grad_norm": 0.8476725115524525,
+      "learning_rate": 0.003,
+      "loss": 4.1113,
+      "step": 3795
+    },
+    {
+      "epoch": 0.03796,
+      "grad_norm": 0.912186760325603,
+      "learning_rate": 0.003,
+      "loss": 4.0943,
+      "step": 3796
+    },
+    {
+      "epoch": 0.03797,
+      "grad_norm": 0.8535289933328677,
+      "learning_rate": 0.003,
+      "loss": 4.0821,
+      "step": 3797
+    },
+    {
+      "epoch": 0.03798,
+      "grad_norm": 0.8439916131790419,
+      "learning_rate": 0.003,
+      "loss": 4.0994,
+      "step": 3798
+    },
+    {
+      "epoch": 0.03799,
+      "grad_norm": 0.977315385277912,
+      "learning_rate": 0.003,
+      "loss": 4.1126,
+      "step": 3799
+    },
+    {
+      "epoch": 0.038,
+      "grad_norm": 1.2434381910310124,
+      "learning_rate": 0.003,
+      "loss": 4.1299,
+      "step": 3800
+    },
+    {
+      "epoch": 0.03801,
+      "grad_norm": 0.8428497034893558,
+      "learning_rate": 0.003,
+      "loss": 4.1253,
+      "step": 3801
+    },
+    {
+      "epoch": 0.03802,
+      "grad_norm": 0.7525631044925924,
+      "learning_rate": 0.003,
+      "loss": 4.0961,
+      "step": 3802
+    },
+    {
+      "epoch": 0.03803,
+      "grad_norm": 0.8068378132076305,
+      "learning_rate": 0.003,
+      "loss": 4.1244,
+      "step": 3803
+    },
+    {
+      "epoch": 0.03804,
+      "grad_norm": 0.8984500712876651,
+      "learning_rate": 0.003,
+      "loss": 4.0932,
+      "step": 3804
+    },
+    {
+      "epoch": 0.03805,
+      "grad_norm": 0.9405144408554322,
+      "learning_rate": 0.003,
+      "loss": 4.1103,
+      "step": 3805
+    },
+    {
+      "epoch": 0.03806,
+      "grad_norm": 1.019634044722222,
+      "learning_rate": 0.003,
+      "loss": 4.1129,
+      "step": 3806
+    },
+    {
+      "epoch": 0.03807,
+      "grad_norm": 0.8853714667588649,
+      "learning_rate": 0.003,
+      "loss": 4.1011,
+      "step": 3807
+    },
+    {
+      "epoch": 0.03808,
+      "grad_norm": 0.9597650598957446,
+      "learning_rate": 0.003,
+      "loss": 4.1172,
+      "step": 3808
+    },
+    {
+      "epoch": 0.03809,
+      "grad_norm": 1.067683764868459,
+      "learning_rate": 0.003,
+      "loss": 4.1028,
+      "step": 3809
+    },
+    {
+      "epoch": 0.0381,
+      "grad_norm": 0.9853327608719147,
+      "learning_rate": 0.003,
+      "loss": 4.0906,
+      "step": 3810
+    },
+    {
+      "epoch": 0.03811,
+      "grad_norm": 1.1386409326086608,
+      "learning_rate": 0.003,
+      "loss": 4.1243,
+      "step": 3811
+    },
+    {
+      "epoch": 0.03812,
+      "grad_norm": 0.8785836941800041,
+      "learning_rate": 0.003,
+      "loss": 4.1117,
+      "step": 3812
+    },
+    {
+      "epoch": 0.03813,
+      "grad_norm": 0.719604168428238,
+      "learning_rate": 0.003,
+      "loss": 4.0807,
+      "step": 3813
+    },
+    {
+      "epoch": 0.03814,
+      "grad_norm": 0.8437531723051056,
+      "learning_rate": 0.003,
+      "loss": 4.1129,
+      "step": 3814
+    },
+    {
+      "epoch": 0.03815,
+      "grad_norm": 0.9017994260619784,
+      "learning_rate": 0.003,
+      "loss": 4.1038,
+      "step": 3815
+    },
+    {
+      "epoch": 0.03816,
+      "grad_norm": 1.0098474951212937,
+      "learning_rate": 0.003,
+      "loss": 4.107,
+      "step": 3816
+    },
+    {
+      "epoch": 0.03817,
+      "grad_norm": 0.9361762835503497,
+      "learning_rate": 0.003,
+      "loss": 4.0878,
+      "step": 3817
+    },
+    {
+      "epoch": 0.03818,
+      "grad_norm": 1.037066830030048,
+      "learning_rate": 0.003,
+      "loss": 4.1377,
+      "step": 3818
+    },
+    {
+      "epoch": 0.03819,
+      "grad_norm": 1.156007843641428,
+      "learning_rate": 0.003,
+      "loss": 4.0914,
+      "step": 3819
+    },
+    {
+      "epoch": 0.0382,
+      "grad_norm": 0.9923419777257736,
+      "learning_rate": 0.003,
+      "loss": 4.1377,
+      "step": 3820
+    },
+    {
+      "epoch": 0.03821,
+      "grad_norm": 1.1885176826442838,
+      "learning_rate": 0.003,
+      "loss": 4.0908,
+      "step": 3821
+    },
+    {
+      "epoch": 0.03822,
+      "grad_norm": 0.8699977531184279,
+      "learning_rate": 0.003,
+      "loss": 4.1066,
+      "step": 3822
+    },
+    {
+      "epoch": 0.03823,
+      "grad_norm": 0.9310049147972063,
+      "learning_rate": 0.003,
+      "loss": 4.1051,
+      "step": 3823
+    },
+    {
+      "epoch": 0.03824,
+      "grad_norm": 1.0606557350696768,
+      "learning_rate": 0.003,
+      "loss": 4.1039,
+      "step": 3824
+    },
+    {
+      "epoch": 0.03825,
+      "grad_norm": 0.8761874314983482,
+      "learning_rate": 0.003,
+      "loss": 4.1167,
+      "step": 3825
+    },
+    {
+      "epoch": 0.03826,
+      "grad_norm": 0.709566654404231,
+      "learning_rate": 0.003,
+      "loss": 4.0953,
+      "step": 3826
+    },
+    {
+      "epoch": 0.03827,
+      "grad_norm": 0.665621012314629,
+      "learning_rate": 0.003,
+      "loss": 4.1211,
+      "step": 3827
+    },
+    {
+      "epoch": 0.03828,
+      "grad_norm": 0.6539927553498923,
+      "learning_rate": 0.003,
+      "loss": 4.0957,
+      "step": 3828
+    },
+    {
+      "epoch": 0.03829,
+      "grad_norm": 0.6659373242823122,
+      "learning_rate": 0.003,
+      "loss": 4.1003,
+      "step": 3829
+    },
+    {
+      "epoch": 0.0383,
+      "grad_norm": 0.719614104181746,
+      "learning_rate": 0.003,
+      "loss": 4.0887,
+      "step": 3830
+    },
+    {
+      "epoch": 0.03831,
+      "grad_norm": 0.7380247193128887,
+      "learning_rate": 0.003,
+      "loss": 4.0917,
+      "step": 3831
+    },
+    {
+      "epoch": 0.03832,
+      "grad_norm": 0.7602873975407207,
+      "learning_rate": 0.003,
+      "loss": 4.0885,
+      "step": 3832
+    },
+    {
+      "epoch": 0.03833,
+      "grad_norm": 0.7801412356291381,
+      "learning_rate": 0.003,
+      "loss": 4.091,
+      "step": 3833
+    },
+    {
+      "epoch": 0.03834,
+      "grad_norm": 1.035527155697683,
+      "learning_rate": 0.003,
+      "loss": 4.1267,
+      "step": 3834
+    },
+    {
+      "epoch": 0.03835,
+      "grad_norm": 1.2017267869538604,
+      "learning_rate": 0.003,
+      "loss": 4.1248,
+      "step": 3835
+    },
+    {
+      "epoch": 0.03836,
+      "grad_norm": 0.7780948979729766,
+      "learning_rate": 0.003,
+      "loss": 4.1159,
+      "step": 3836
+    },
+    {
+      "epoch": 0.03837,
+      "grad_norm": 0.6833191218840556,
+      "learning_rate": 0.003,
+      "loss": 4.0998,
+      "step": 3837
+    },
+    {
+      "epoch": 0.03838,
+      "grad_norm": 0.8599951602639524,
+      "learning_rate": 0.003,
+      "loss": 4.0887,
+      "step": 3838
+    },
+    {
+      "epoch": 0.03839,
+      "grad_norm": 0.9833951212613625,
+      "learning_rate": 0.003,
+      "loss": 4.0983,
+      "step": 3839
+    },
+    {
+      "epoch": 0.0384,
+      "grad_norm": 1.1478082520006916,
+      "learning_rate": 0.003,
+      "loss": 4.1098,
+      "step": 3840
+    },
+    {
+      "epoch": 0.03841,
+      "grad_norm": 0.8829906180137885,
+      "learning_rate": 0.003,
+      "loss": 4.1072,
+      "step": 3841
+    },
+    {
+      "epoch": 0.03842,
+      "grad_norm": 0.7834869010490613,
+      "learning_rate": 0.003,
+      "loss": 4.1048,
+      "step": 3842
+    },
+    {
+      "epoch": 0.03843,
+      "grad_norm": 0.7188713056155882,
+      "learning_rate": 0.003,
+      "loss": 4.0917,
+      "step": 3843
+    },
+    {
+      "epoch": 0.03844,
+      "grad_norm": 0.7540179942394175,
+      "learning_rate": 0.003,
+      "loss": 4.0985,
+      "step": 3844
+    },
+    {
+      "epoch": 0.03845,
+      "grad_norm": 0.7463415722264508,
+      "learning_rate": 0.003,
+      "loss": 4.0979,
+      "step": 3845
+    },
+    {
+      "epoch": 0.03846,
+      "grad_norm": 0.7138861193622222,
+      "learning_rate": 0.003,
+      "loss": 4.1121,
+      "step": 3846
+    },
+    {
+      "epoch": 0.03847,
+      "grad_norm": 0.648731470870398,
+      "learning_rate": 0.003,
+      "loss": 4.0983,
+      "step": 3847
+    },
+    {
+      "epoch": 0.03848,
+      "grad_norm": 0.7547795493913247,
+      "learning_rate": 0.003,
+      "loss": 4.0875,
+      "step": 3848
+    },
+    {
+      "epoch": 0.03849,
+      "grad_norm": 0.799301070310209,
+      "learning_rate": 0.003,
+      "loss": 4.1026,
+      "step": 3849
+    },
+    {
+      "epoch": 0.0385,
+      "grad_norm": 0.9203078148453605,
+      "learning_rate": 0.003,
+      "loss": 4.1155,
+      "step": 3850
+    },
+    {
+      "epoch": 0.03851,
+      "grad_norm": 1.0151353622998731,
+      "learning_rate": 0.003,
+      "loss": 4.0825,
+      "step": 3851
+    },
+    {
+      "epoch": 0.03852,
+      "grad_norm": 1.0144010495517577,
+      "learning_rate": 0.003,
+      "loss": 4.1208,
+      "step": 3852
+    },
+    {
+      "epoch": 0.03853,
+      "grad_norm": 1.0493705006427887,
+      "learning_rate": 0.003,
+      "loss": 4.1176,
+      "step": 3853
+    },
+    {
+      "epoch": 0.03854,
+      "grad_norm": 0.8074337792156867,
+      "learning_rate": 0.003,
+      "loss": 4.103,
+      "step": 3854
+    },
+    {
+      "epoch": 0.03855,
+      "grad_norm": 0.8026707872705597,
+      "learning_rate": 0.003,
+      "loss": 4.1197,
+      "step": 3855
+    },
+    {
+      "epoch": 0.03856,
+      "grad_norm": 0.7804974920617295,
+      "learning_rate": 0.003,
+      "loss": 4.091,
+      "step": 3856
+    },
+    {
+      "epoch": 0.03857,
+      "grad_norm": 0.8054893207885017,
+      "learning_rate": 0.003,
+      "loss": 4.0965,
+      "step": 3857
+    },
+    {
+      "epoch": 0.03858,
+      "grad_norm": 0.8546443108408268,
+      "learning_rate": 0.003,
+      "loss": 4.0935,
+      "step": 3858
+    },
+    {
+      "epoch": 0.03859,
+      "grad_norm": 0.8455331814308655,
+      "learning_rate": 0.003,
+      "loss": 4.1175,
+      "step": 3859
+    },
+    {
+      "epoch": 0.0386,
+      "grad_norm": 0.7390789874576452,
+      "learning_rate": 0.003,
+      "loss": 4.0757,
+      "step": 3860
+    },
+    {
+      "epoch": 0.03861,
+      "grad_norm": 0.7582288889706922,
+      "learning_rate": 0.003,
+      "loss": 4.1151,
+      "step": 3861
+    },
+    {
+      "epoch": 0.03862,
+      "grad_norm": 0.725403944101907,
+      "learning_rate": 0.003,
+      "loss": 4.0906,
+      "step": 3862
+    },
+    {
+      "epoch": 0.03863,
+      "grad_norm": 0.8633932250780971,
+      "learning_rate": 0.003,
+      "loss": 4.09,
+      "step": 3863
+    },
+    {
+      "epoch": 0.03864,
+      "grad_norm": 1.1650833586712557,
+      "learning_rate": 0.003,
+      "loss": 4.1044,
+      "step": 3864
+    },
+    {
+      "epoch": 0.03865,
+      "grad_norm": 0.9893693208520536,
+      "learning_rate": 0.003,
+      "loss": 4.1167,
+      "step": 3865
+    },
+    {
+      "epoch": 0.03866,
+      "grad_norm": 1.0874614460090002,
+      "learning_rate": 0.003,
+      "loss": 4.0949,
+      "step": 3866
+    },
+    {
+      "epoch": 0.03867,
+      "grad_norm": 0.7886630657768378,
+      "learning_rate": 0.003,
+      "loss": 4.0704,
+      "step": 3867
+    },
+    {
+      "epoch": 0.03868,
+      "grad_norm": 0.885679172374958,
+      "learning_rate": 0.003,
+      "loss": 4.108,
+      "step": 3868
+    },
+    {
+      "epoch": 0.03869,
+      "grad_norm": 1.0141213895298733,
+      "learning_rate": 0.003,
+      "loss": 4.0985,
+      "step": 3869
+    },
+    {
+      "epoch": 0.0387,
+      "grad_norm": 1.0688565646198265,
+      "learning_rate": 0.003,
+      "loss": 4.1191,
+      "step": 3870
+    },
+    {
+      "epoch": 0.03871,
+      "grad_norm": 0.9129625883332004,
+      "learning_rate": 0.003,
+      "loss": 4.0893,
+      "step": 3871
+    },
+    {
+      "epoch": 0.03872,
+      "grad_norm": 0.8736530068321784,
+      "learning_rate": 0.003,
+      "loss": 4.0788,
+      "step": 3872
+    },
+    {
+      "epoch": 0.03873,
+      "grad_norm": 0.9127445482435979,
+      "learning_rate": 0.003,
+      "loss": 4.1211,
+      "step": 3873
+    },
+    {
+      "epoch": 0.03874,
+      "grad_norm": 1.1125648535743125,
+      "learning_rate": 0.003,
+      "loss": 4.0982,
+      "step": 3874
+    },
+    {
+      "epoch": 0.03875,
+      "grad_norm": 1.0031183839809612,
+      "learning_rate": 0.003,
+      "loss": 4.1,
+      "step": 3875
+    },
+    {
+      "epoch": 0.03876,
+      "grad_norm": 0.9060582464048182,
+      "learning_rate": 0.003,
+      "loss": 4.1299,
+      "step": 3876
+    },
+    {
+      "epoch": 0.03877,
+      "grad_norm": 0.8392586887613771,
+      "learning_rate": 0.003,
+      "loss": 4.1084,
+      "step": 3877
+    },
+    {
+      "epoch": 0.03878,
+      "grad_norm": 0.8001456111460843,
+      "learning_rate": 0.003,
+      "loss": 4.1209,
+      "step": 3878
+    },
+    {
+      "epoch": 0.03879,
+      "grad_norm": 0.7457591238961055,
+      "learning_rate": 0.003,
+      "loss": 4.1183,
+      "step": 3879
+    },
+    {
+      "epoch": 0.0388,
+      "grad_norm": 0.6124933426277533,
+      "learning_rate": 0.003,
+      "loss": 4.0806,
+      "step": 3880
+    },
+    {
+      "epoch": 0.03881,
+      "grad_norm": 0.6131747582366786,
+      "learning_rate": 0.003,
+      "loss": 4.1053,
+      "step": 3881
+    },
+    {
+      "epoch": 0.03882,
+      "grad_norm": 0.6113752675559317,
+      "learning_rate": 0.003,
+      "loss": 4.1139,
+      "step": 3882
+    },
+    {
+      "epoch": 0.03883,
+      "grad_norm": 0.6919250963347912,
+      "learning_rate": 0.003,
+      "loss": 4.082,
+      "step": 3883
+    },
+    {
+      "epoch": 0.03884,
+      "grad_norm": 0.7426309370638033,
+      "learning_rate": 0.003,
+      "loss": 4.0938,
+      "step": 3884
+    },
+    {
+      "epoch": 0.03885,
+      "grad_norm": 0.9915545301535461,
+      "learning_rate": 0.003,
+      "loss": 4.1154,
+      "step": 3885
+    },
+    {
+      "epoch": 0.03886,
+      "grad_norm": 1.1987560180799988,
+      "learning_rate": 0.003,
+      "loss": 4.0994,
+      "step": 3886
+    },
+    {
+      "epoch": 0.03887,
+      "grad_norm": 0.8162692852302313,
+      "learning_rate": 0.003,
+      "loss": 4.1024,
+      "step": 3887
+    },
+    {
+      "epoch": 0.03888,
+      "grad_norm": 1.0025366149196155,
+      "learning_rate": 0.003,
+      "loss": 4.0827,
+      "step": 3888
+    },
+    {
+      "epoch": 0.03889,
+      "grad_norm": 1.146222148985757,
+      "learning_rate": 0.003,
+      "loss": 4.108,
+      "step": 3889
+    },
+    {
+      "epoch": 0.0389,
+      "grad_norm": 0.8230126221175265,
+      "learning_rate": 0.003,
+      "loss": 4.0881,
+      "step": 3890
+    },
+    {
+      "epoch": 0.03891,
+      "grad_norm": 0.8705081765994817,
+      "learning_rate": 0.003,
+      "loss": 4.0965,
+      "step": 3891
+    },
+    {
+      "epoch": 0.03892,
+      "grad_norm": 0.7747593450381118,
+      "learning_rate": 0.003,
+      "loss": 4.1087,
+      "step": 3892
+    },
+    {
+      "epoch": 0.03893,
+      "grad_norm": 0.7117098416936728,
+      "learning_rate": 0.003,
+      "loss": 4.0818,
+      "step": 3893
+    },
+    {
+      "epoch": 0.03894,
+      "grad_norm": 0.7380911807082656,
+      "learning_rate": 0.003,
+      "loss": 4.0931,
+      "step": 3894
+    },
+    {
+      "epoch": 0.03895,
+      "grad_norm": 0.9497127876188006,
+      "learning_rate": 0.003,
+      "loss": 4.0738,
+      "step": 3895
+    },
+    {
+      "epoch": 0.03896,
+      "grad_norm": 1.1241836801249476,
+      "learning_rate": 0.003,
+      "loss": 4.1122,
+      "step": 3896
+    },
+    {
+      "epoch": 0.03897,
+      "grad_norm": 0.9331139773236928,
+      "learning_rate": 0.003,
+      "loss": 4.0794,
+      "step": 3897
+    },
+    {
+      "epoch": 0.03898,
+      "grad_norm": 0.8717563333320789,
+      "learning_rate": 0.003,
+      "loss": 4.1109,
+      "step": 3898
+    },
+    {
+      "epoch": 0.03899,
+      "grad_norm": 0.9575796876780424,
+      "learning_rate": 0.003,
+      "loss": 4.1062,
+      "step": 3899
+    },
+    {
+      "epoch": 0.039,
+      "grad_norm": 1.1111071654055427,
+      "learning_rate": 0.003,
+      "loss": 4.1102,
+      "step": 3900
+    },
+    {
+      "epoch": 0.03901,
+      "grad_norm": 0.9101352152485117,
+      "learning_rate": 0.003,
+      "loss": 4.1223,
+      "step": 3901
+    },
+    {
+      "epoch": 0.03902,
+      "grad_norm": 0.9149491392794752,
+      "learning_rate": 0.003,
+      "loss": 4.0934,
+      "step": 3902
+    },
+    {
+      "epoch": 0.03903,
+      "grad_norm": 0.9599549632230119,
+      "learning_rate": 0.003,
+      "loss": 4.1314,
+      "step": 3903
+    },
+    {
+      "epoch": 0.03904,
+      "grad_norm": 0.9977595444316653,
+      "learning_rate": 0.003,
+      "loss": 4.1108,
+      "step": 3904
+    },
+    {
+      "epoch": 0.03905,
+      "grad_norm": 0.9772682724535922,
+      "learning_rate": 0.003,
+      "loss": 4.1314,
+      "step": 3905
+    },
+    {
+      "epoch": 0.03906,
+      "grad_norm": 1.116840612654856,
+      "learning_rate": 0.003,
+      "loss": 4.1091,
+      "step": 3906
+    },
+    {
+      "epoch": 0.03907,
+      "grad_norm": 0.9927827399452427,
+      "learning_rate": 0.003,
+      "loss": 4.1197,
+      "step": 3907
+    },
+    {
+      "epoch": 0.03908,
+      "grad_norm": 1.0513974248161086,
+      "learning_rate": 0.003,
+      "loss": 4.1024,
+      "step": 3908
+    },
+    {
+      "epoch": 0.03909,
+      "grad_norm": 0.951299576586947,
+      "learning_rate": 0.003,
+      "loss": 4.1068,
+      "step": 3909
+    },
+    {
+      "epoch": 0.0391,
+      "grad_norm": 0.9273375029445102,
+      "learning_rate": 0.003,
+      "loss": 4.132,
+      "step": 3910
+    },
+    {
+      "epoch": 0.03911,
+      "grad_norm": 0.960086352721459,
+      "learning_rate": 0.003,
+      "loss": 4.1206,
+      "step": 3911
+    },
+    {
+      "epoch": 0.03912,
+      "grad_norm": 0.9317199647764666,
+      "learning_rate": 0.003,
+      "loss": 4.116,
+      "step": 3912
+    },
+    {
+      "epoch": 0.03913,
+      "grad_norm": 0.8946789663336782,
+      "learning_rate": 0.003,
+      "loss": 4.0906,
+      "step": 3913
+    },
+    {
+      "epoch": 0.03914,
+      "grad_norm": 0.8487878737890102,
+      "learning_rate": 0.003,
+      "loss": 4.1249,
+      "step": 3914
+    },
+    {
+      "epoch": 0.03915,
+      "grad_norm": 0.8668410486160162,
+      "learning_rate": 0.003,
+      "loss": 4.0957,
+      "step": 3915
+    },
+    {
+      "epoch": 0.03916,
+      "grad_norm": 0.8897335359039827,
+      "learning_rate": 0.003,
+      "loss": 4.126,
+      "step": 3916
+    },
+    {
+      "epoch": 0.03917,
+      "grad_norm": 1.0123681906019413,
+      "learning_rate": 0.003,
+      "loss": 4.1215,
+      "step": 3917
+    },
+    {
+      "epoch": 0.03918,
+      "grad_norm": 1.0602217244059164,
+      "learning_rate": 0.003,
+      "loss": 4.1106,
+      "step": 3918
+    },
+    {
+      "epoch": 0.03919,
+      "grad_norm": 1.02799372222634,
+      "learning_rate": 0.003,
+      "loss": 4.0775,
+      "step": 3919
+    },
+    {
+      "epoch": 0.0392,
+      "grad_norm": 0.7728333569294379,
+      "learning_rate": 0.003,
+      "loss": 4.0936,
+      "step": 3920
+    },
+    {
+      "epoch": 0.03921,
+      "grad_norm": 0.7158476135413256,
+      "learning_rate": 0.003,
+      "loss": 4.0882,
+      "step": 3921
+    },
+    {
+      "epoch": 0.03922,
+      "grad_norm": 0.8125567354684526,
+      "learning_rate": 0.003,
+      "loss": 4.1303,
+      "step": 3922
+    },
+    {
+      "epoch": 0.03923,
+      "grad_norm": 0.9863034003179841,
+      "learning_rate": 0.003,
+      "loss": 4.0868,
+      "step": 3923
+    },
+    {
+      "epoch": 0.03924,
+      "grad_norm": 1.0668800143383192,
+      "learning_rate": 0.003,
+      "loss": 4.0999,
+      "step": 3924
+    },
+    {
+      "epoch": 0.03925,
+      "grad_norm": 0.9458022068309033,
+      "learning_rate": 0.003,
+      "loss": 4.1015,
+      "step": 3925
+    },
+    {
+      "epoch": 0.03926,
+      "grad_norm": 0.913203888262044,
+      "learning_rate": 0.003,
+      "loss": 4.0937,
+      "step": 3926
+    },
+    {
+      "epoch": 0.03927,
+      "grad_norm": 0.8813175388341771,
+      "learning_rate": 0.003,
+      "loss": 4.1061,
+      "step": 3927
+    },
+    {
+      "epoch": 0.03928,
+      "grad_norm": 0.8794380772976899,
+      "learning_rate": 0.003,
+      "loss": 4.1203,
+      "step": 3928
+    },
+    {
+      "epoch": 0.03929,
+      "grad_norm": 1.0579124240791549,
+      "learning_rate": 0.003,
+      "loss": 4.1352,
+      "step": 3929
+    },
+    {
+      "epoch": 0.0393,
+      "grad_norm": 1.1597780940734346,
+      "learning_rate": 0.003,
+      "loss": 4.1258,
+      "step": 3930
+    },
+    {
+      "epoch": 0.03931,
+      "grad_norm": 0.7081278586289745,
+      "learning_rate": 0.003,
+      "loss": 4.1169,
+      "step": 3931
+    },
+    {
+      "epoch": 0.03932,
+      "grad_norm": 0.7809455628695875,
+      "learning_rate": 0.003,
+      "loss": 4.1175,
+      "step": 3932
+    },
+    {
+      "epoch": 0.03933,
+      "grad_norm": 0.9272931464921295,
+      "learning_rate": 0.003,
+      "loss": 4.1094,
+      "step": 3933
+    },
+    {
+      "epoch": 0.03934,
+      "grad_norm": 0.849076414415576,
+      "learning_rate": 0.003,
+      "loss": 4.0983,
+      "step": 3934
+    },
+    {
+      "epoch": 0.03935,
+      "grad_norm": 0.7972121383758832,
+      "learning_rate": 0.003,
+      "loss": 4.0957,
+      "step": 3935
+    },
+    {
+      "epoch": 0.03936,
+      "grad_norm": 0.736286380065545,
+      "learning_rate": 0.003,
+      "loss": 4.0552,
+      "step": 3936
+    },
+    {
+      "epoch": 0.03937,
+      "grad_norm": 0.7969361690394817,
+      "learning_rate": 0.003,
+      "loss": 4.0919,
+      "step": 3937
+    },
+    {
+      "epoch": 0.03938,
+      "grad_norm": 0.7648853318696774,
+      "learning_rate": 0.003,
+      "loss": 4.0959,
+      "step": 3938
+    },
+    {
+      "epoch": 0.03939,
+      "grad_norm": 0.793082890734816,
+      "learning_rate": 0.003,
+      "loss": 4.0729,
+      "step": 3939
+    },
+    {
+      "epoch": 0.0394,
+      "grad_norm": 0.9421729446698976,
+      "learning_rate": 0.003,
+      "loss": 4.1093,
+      "step": 3940
+    },
+    {
+      "epoch": 0.03941,
+      "grad_norm": 0.9532085204003842,
+      "learning_rate": 0.003,
+      "loss": 4.1021,
+      "step": 3941
+    },
+    {
+      "epoch": 0.03942,
+      "grad_norm": 0.9597084811820548,
+      "learning_rate": 0.003,
+      "loss": 4.1174,
+      "step": 3942
+    },
+    {
+      "epoch": 0.03943,
+      "grad_norm": 0.9484931289816191,
+      "learning_rate": 0.003,
+      "loss": 4.1064,
+      "step": 3943
+    },
+    {
+      "epoch": 0.03944,
+      "grad_norm": 0.9294543546840306,
+      "learning_rate": 0.003,
+      "loss": 4.121,
+      "step": 3944
+    },
+    {
+      "epoch": 0.03945,
+      "grad_norm": 0.8988004519802858,
+      "learning_rate": 0.003,
+      "loss": 4.0913,
+      "step": 3945
+    },
+    {
+      "epoch": 0.03946,
+      "grad_norm": 1.0598680988385802,
+      "learning_rate": 0.003,
+      "loss": 4.1233,
+      "step": 3946
+    },
+    {
+      "epoch": 0.03947,
+      "grad_norm": 0.8720535328674782,
+      "learning_rate": 0.003,
+      "loss": 4.1206,
+      "step": 3947
+    },
+    {
+      "epoch": 0.03948,
+      "grad_norm": 0.9223454341748609,
+      "learning_rate": 0.003,
+      "loss": 4.1371,
+      "step": 3948
+    },
+    {
+      "epoch": 0.03949,
+      "grad_norm": 0.8915715222436728,
+      "learning_rate": 0.003,
+      "loss": 4.0824,
+      "step": 3949
+    },
+    {
+      "epoch": 0.0395,
+      "grad_norm": 0.9448936409245293,
+      "learning_rate": 0.003,
+      "loss": 4.1126,
+      "step": 3950
+    },
+    {
+      "epoch": 0.03951,
+      "grad_norm": 1.0385370830415304,
+      "learning_rate": 0.003,
+      "loss": 4.1462,
+      "step": 3951
+    },
+    {
+      "epoch": 0.03952,
+      "grad_norm": 0.9030178362877832,
+      "learning_rate": 0.003,
+      "loss": 4.1202,
+      "step": 3952
+    },
+    {
+      "epoch": 0.03953,
+      "grad_norm": 0.8709314822989492,
+      "learning_rate": 0.003,
+      "loss": 4.1006,
+      "step": 3953
+    },
+    {
+      "epoch": 0.03954,
+      "grad_norm": 0.6813347239160019,
+      "learning_rate": 0.003,
+      "loss": 4.095,
+      "step": 3954
+    },
+    {
+      "epoch": 0.03955,
+      "grad_norm": 0.5792685005630919,
+      "learning_rate": 0.003,
+      "loss": 4.0936,
+      "step": 3955
+    },
+    {
+      "epoch": 0.03956,
+      "grad_norm": 0.643092840993703,
+      "learning_rate": 0.003,
+      "loss": 4.0958,
+      "step": 3956
+    },
+    {
+      "epoch": 0.03957,
+      "grad_norm": 0.5958384751343148,
+      "learning_rate": 0.003,
+      "loss": 4.1,
+      "step": 3957
+    },
+    {
+      "epoch": 0.03958,
+      "grad_norm": 0.7216735106401475,
+      "learning_rate": 0.003,
+      "loss": 4.077,
+      "step": 3958
+    },
+    {
+      "epoch": 0.03959,
+      "grad_norm": 1.0447079173416385,
+      "learning_rate": 0.003,
+      "loss": 4.0851,
+      "step": 3959
+    },
+    {
+      "epoch": 0.0396,
+      "grad_norm": 1.390251410216987,
+      "learning_rate": 0.003,
+      "loss": 4.1242,
+      "step": 3960
+    },
+    {
+      "epoch": 0.03961,
+      "grad_norm": 0.6008725960965626,
+      "learning_rate": 0.003,
+      "loss": 4.1009,
+      "step": 3961
+    },
+    {
+      "epoch": 0.03962,
+      "grad_norm": 1.007662011162288,
+      "learning_rate": 0.003,
+      "loss": 4.1212,
+      "step": 3962
+    },
+    {
+      "epoch": 0.03963,
+      "grad_norm": 1.1370046987648283,
+      "learning_rate": 0.003,
+      "loss": 4.1122,
+      "step": 3963
+    },
+    {
+      "epoch": 0.03964,
+      "grad_norm": 0.747283204388864,
+      "learning_rate": 0.003,
+      "loss": 4.0999,
+      "step": 3964
+    },
+    {
+      "epoch": 0.03965,
+      "grad_norm": 0.8660291958106119,
+      "learning_rate": 0.003,
+      "loss": 4.1044,
+      "step": 3965
+    },
+    {
+      "epoch": 0.03966,
+      "grad_norm": 0.8173353175111122,
+      "learning_rate": 0.003,
+      "loss": 4.1372,
+      "step": 3966
+    },
+    {
+      "epoch": 0.03967,
+      "grad_norm": 0.8293952582170112,
+      "learning_rate": 0.003,
+      "loss": 4.0959,
+      "step": 3967
+    },
+    {
+      "epoch": 0.03968,
+      "grad_norm": 0.7178876491150251,
+      "learning_rate": 0.003,
+      "loss": 4.1208,
+      "step": 3968
+    },
+    {
+      "epoch": 0.03969,
+      "grad_norm": 0.6761246373413493,
+      "learning_rate": 0.003,
+      "loss": 4.0997,
+      "step": 3969
+    },
+    {
+      "epoch": 0.0397,
+      "grad_norm": 0.781718933884442,
+      "learning_rate": 0.003,
+      "loss": 4.1046,
+      "step": 3970
+    },
+    {
+      "epoch": 0.03971,
+      "grad_norm": 0.8449284031201099,
+      "learning_rate": 0.003,
+      "loss": 4.0832,
+      "step": 3971
+    },
+    {
+      "epoch": 0.03972,
+      "grad_norm": 0.7027976770057135,
+      "learning_rate": 0.003,
+      "loss": 4.1,
+      "step": 3972
+    },
+    {
+      "epoch": 0.03973,
+      "grad_norm": 0.7615706188874817,
+      "learning_rate": 0.003,
+      "loss": 4.1084,
+      "step": 3973
+    },
+    {
+      "epoch": 0.03974,
+      "grad_norm": 0.9198349150433933,
+      "learning_rate": 0.003,
+      "loss": 4.1094,
+      "step": 3974
+    },
+    {
+      "epoch": 0.03975,
+      "grad_norm": 0.9827265823761052,
+      "learning_rate": 0.003,
+      "loss": 4.1023,
+      "step": 3975
+    },
+    {
+      "epoch": 0.03976,
+      "grad_norm": 0.9713747801072962,
+      "learning_rate": 0.003,
+      "loss": 4.135,
+      "step": 3976
+    },
+    {
+      "epoch": 0.03977,
+      "grad_norm": 0.9509110934532634,
+      "learning_rate": 0.003,
+      "loss": 4.122,
+      "step": 3977
+    },
+    {
+      "epoch": 0.03978,
+      "grad_norm": 0.8340186563863297,
+      "learning_rate": 0.003,
+      "loss": 4.0857,
+      "step": 3978
+    },
+    {
+      "epoch": 0.03979,
+      "grad_norm": 0.8587622003381145,
+      "learning_rate": 0.003,
+      "loss": 4.1073,
+      "step": 3979
+    },
+    {
+      "epoch": 0.0398,
+      "grad_norm": 0.9900201402082706,
+      "learning_rate": 0.003,
+      "loss": 4.1077,
+      "step": 3980
+    },
+    {
+      "epoch": 0.03981,
+      "grad_norm": 0.8963808643427871,
+      "learning_rate": 0.003,
+      "loss": 4.0968,
+      "step": 3981
+    },
+    {
+      "epoch": 0.03982,
+      "grad_norm": 0.8219983933371988,
+      "learning_rate": 0.003,
+      "loss": 4.08,
+      "step": 3982
+    },
+    {
+      "epoch": 0.03983,
+      "grad_norm": 0.8040656765435304,
+      "learning_rate": 0.003,
+      "loss": 4.0972,
+      "step": 3983
+    },
+    {
+      "epoch": 0.03984,
+      "grad_norm": 0.7325063244064047,
+      "learning_rate": 0.003,
+      "loss": 4.1049,
+      "step": 3984
+    },
+    {
+      "epoch": 0.03985,
+      "grad_norm": 0.883058640101833,
+      "learning_rate": 0.003,
+      "loss": 4.1043,
+      "step": 3985
+    },
+    {
+      "epoch": 0.03986,
+      "grad_norm": 1.0904689835883374,
+      "learning_rate": 0.003,
+      "loss": 4.1096,
+      "step": 3986
+    },
+    {
+      "epoch": 0.03987,
+      "grad_norm": 1.0592742752953161,
+      "learning_rate": 0.003,
+      "loss": 4.1145,
+      "step": 3987
+    },
+    {
+      "epoch": 0.03988,
+      "grad_norm": 0.9215756713392262,
+      "learning_rate": 0.003,
+      "loss": 4.1143,
+      "step": 3988
+    },
+    {
+      "epoch": 0.03989,
+      "grad_norm": 0.9308579277666478,
+      "learning_rate": 0.003,
+      "loss": 4.1057,
+      "step": 3989
+    },
+    {
+      "epoch": 0.0399,
+      "grad_norm": 0.6893991687243239,
+      "learning_rate": 0.003,
+      "loss": 4.0686,
+      "step": 3990
+    },
+    {
+      "epoch": 0.03991,
+      "grad_norm": 0.6626762508427944,
+      "learning_rate": 0.003,
+      "loss": 4.1133,
+      "step": 3991
+    },
+    {
+      "epoch": 0.03992,
+      "grad_norm": 0.6277503646624452,
+      "learning_rate": 0.003,
+      "loss": 4.0926,
+      "step": 3992
+    },
+    {
+      "epoch": 0.03993,
+      "grad_norm": 0.5389226959273035,
+      "learning_rate": 0.003,
+      "loss": 4.0853,
+      "step": 3993
+    },
+    {
+      "epoch": 0.03994,
+      "grad_norm": 0.5575784523742429,
+      "learning_rate": 0.003,
+      "loss": 4.0842,
+      "step": 3994
+    },
+    {
+      "epoch": 0.03995,
+      "grad_norm": 0.5740995198289247,
+      "learning_rate": 0.003,
+      "loss": 4.0697,
+      "step": 3995
+    },
+    {
+      "epoch": 0.03996,
+      "grad_norm": 0.6506767895381821,
+      "learning_rate": 0.003,
+      "loss": 4.1048,
+      "step": 3996
+    },
+    {
+      "epoch": 0.03997,
+      "grad_norm": 0.8018658682841067,
+      "learning_rate": 0.003,
+      "loss": 4.1071,
+      "step": 3997
+    },
+    {
+      "epoch": 0.03998,
+      "grad_norm": 1.1165769471951579,
+      "learning_rate": 0.003,
+      "loss": 4.1178,
+      "step": 3998
+    },
+    {
+      "epoch": 0.03999,
+      "grad_norm": 0.9183179386537218,
+      "learning_rate": 0.003,
+      "loss": 4.1204,
+      "step": 3999
+    },
+    {
+      "epoch": 0.04,
+      "grad_norm": 0.7930924507289704,
+      "learning_rate": 0.003,
+      "loss": 4.118,
+      "step": 4000
+    },
+    {
+      "epoch": 0.04001,
+      "grad_norm": 0.7997730049089519,
+      "learning_rate": 0.003,
+      "loss": 4.114,
+      "step": 4001
+    },
+    {
+      "epoch": 0.04002,
+      "grad_norm": 0.9222776505941644,
+      "learning_rate": 0.003,
+      "loss": 4.0909,
+      "step": 4002
+    },
+    {
+      "epoch": 0.04003,
+      "grad_norm": 1.0818631603686821,
+      "learning_rate": 0.003,
+      "loss": 4.1176,
+      "step": 4003
+    },
+    {
+      "epoch": 0.04004,
+      "grad_norm": 0.9791417920974259,
+      "learning_rate": 0.003,
+      "loss": 4.1202,
+      "step": 4004
+    },
+    {
+      "epoch": 0.04005,
+      "grad_norm": 0.9808789569533176,
+      "learning_rate": 0.003,
+      "loss": 4.131,
+      "step": 4005
+    },
+    {
+      "epoch": 0.04006,
+      "grad_norm": 0.8961052673893415,
+      "learning_rate": 0.003,
+      "loss": 4.0864,
+      "step": 4006
+    },
+    {
+      "epoch": 0.04007,
+      "grad_norm": 0.8297969899387416,
+      "learning_rate": 0.003,
+      "loss": 4.1092,
+      "step": 4007
+    },
+    {
+      "epoch": 0.04008,
+      "grad_norm": 0.936733303052199,
+      "learning_rate": 0.003,
+      "loss": 4.0885,
+      "step": 4008
+    },
+    {
+      "epoch": 0.04009,
+      "grad_norm": 0.9644457814855122,
+      "learning_rate": 0.003,
+      "loss": 4.0918,
+      "step": 4009
+    },
+    {
+      "epoch": 0.0401,
+      "grad_norm": 1.013041858960856,
+      "learning_rate": 0.003,
+      "loss": 4.1123,
+      "step": 4010
+    },
+    {
+      "epoch": 0.04011,
+      "grad_norm": 1.0633452177910276,
+      "learning_rate": 0.003,
+      "loss": 4.123,
+      "step": 4011
+    },
+    {
+      "epoch": 0.04012,
+      "grad_norm": 1.0649433557962202,
+      "learning_rate": 0.003,
+      "loss": 4.1157,
+      "step": 4012
+    },
+    {
+      "epoch": 0.04013,
+      "grad_norm": 0.7933050292506824,
+      "learning_rate": 0.003,
+      "loss": 4.1084,
+      "step": 4013
+    },
+    {
+      "epoch": 0.04014,
+      "grad_norm": 0.7879046370570779,
+      "learning_rate": 0.003,
+      "loss": 4.0887,
+      "step": 4014
+    },
+    {
+      "epoch": 0.04015,
+      "grad_norm": 0.8241932652214115,
+      "learning_rate": 0.003,
+      "loss": 4.0718,
+      "step": 4015
+    },
+    {
+      "epoch": 0.04016,
+      "grad_norm": 0.8658550465105778,
+      "learning_rate": 0.003,
+      "loss": 4.0914,
+      "step": 4016
+    },
+    {
+      "epoch": 0.04017,
+      "grad_norm": 0.9228756535179865,
+      "learning_rate": 0.003,
+      "loss": 4.1344,
+      "step": 4017
+    },
+    {
+      "epoch": 0.04018,
+      "grad_norm": 0.9531173484679942,
+      "learning_rate": 0.003,
+      "loss": 4.1019,
+      "step": 4018
+    },
+    {
+      "epoch": 0.04019,
+      "grad_norm": 1.0866964724732295,
+      "learning_rate": 0.003,
+      "loss": 4.0876,
+      "step": 4019
+    },
+    {
+      "epoch": 0.0402,
+      "grad_norm": 1.0577908225334638,
+      "learning_rate": 0.003,
+      "loss": 4.1206,
+      "step": 4020
+    },
+    {
+      "epoch": 0.04021,
+      "grad_norm": 1.0725239975062906,
+      "learning_rate": 0.003,
+      "loss": 4.0972,
+      "step": 4021
+    },
+    {
+      "epoch": 0.04022,
+      "grad_norm": 1.0475988978108968,
+      "learning_rate": 0.003,
+      "loss": 4.0915,
+      "step": 4022
+    },
+    {
+      "epoch": 0.04023,
+      "grad_norm": 1.065198125983714,
+      "learning_rate": 0.003,
+      "loss": 4.1016,
+      "step": 4023
+    },
+    {
+      "epoch": 0.04024,
+      "grad_norm": 0.9510719367654007,
+      "learning_rate": 0.003,
+      "loss": 4.0848,
+      "step": 4024
+    },
+    {
+      "epoch": 0.04025,
+      "grad_norm": 0.9965721786284437,
+      "learning_rate": 0.003,
+      "loss": 4.0934,
+      "step": 4025
+    },
+    {
+      "epoch": 0.04026,
+      "grad_norm": 1.1597169306225377,
+      "learning_rate": 0.003,
+      "loss": 4.1247,
+      "step": 4026
+    },
+    {
+      "epoch": 0.04027,
+      "grad_norm": 0.8115276383463242,
+      "learning_rate": 0.003,
+      "loss": 4.118,
+      "step": 4027
+    },
+    {
+      "epoch": 0.04028,
+      "grad_norm": 0.7507067496165764,
+      "learning_rate": 0.003,
+      "loss": 4.0962,
+      "step": 4028
+    },
+    {
+      "epoch": 0.04029,
+      "grad_norm": 0.8043593457637668,
+      "learning_rate": 0.003,
+      "loss": 4.1237,
+      "step": 4029
+    },
+    {
+      "epoch": 0.0403,
+      "grad_norm": 0.8475092967533915,
+      "learning_rate": 0.003,
+      "loss": 4.0785,
+      "step": 4030
+    },
+    {
+      "epoch": 0.04031,
+      "grad_norm": 0.7950265087708841,
+      "learning_rate": 0.003,
+      "loss": 4.0908,
+      "step": 4031
+    },
+    {
+      "epoch": 0.04032,
+      "grad_norm": 0.7929178533450967,
+      "learning_rate": 0.003,
+      "loss": 4.1173,
+      "step": 4032
+    },
+    {
+      "epoch": 0.04033,
+      "grad_norm": 0.9419776254998102,
+      "learning_rate": 0.003,
+      "loss": 4.1137,
+      "step": 4033
+    },
+    {
+      "epoch": 0.04034,
+      "grad_norm": 1.0174202023914847,
+      "learning_rate": 0.003,
+      "loss": 4.0873,
+      "step": 4034
+    },
+    {
+      "epoch": 0.04035,
+      "grad_norm": 1.0008080685810283,
+      "learning_rate": 0.003,
+      "loss": 4.1085,
+      "step": 4035
+    },
+    {
+      "epoch": 0.04036,
+      "grad_norm": 0.9875885643657349,
+      "learning_rate": 0.003,
+      "loss": 4.1267,
+      "step": 4036
+    },
+    {
+      "epoch": 0.04037,
+      "grad_norm": 1.035750371651436,
+      "learning_rate": 0.003,
+      "loss": 4.0822,
+      "step": 4037
+    },
+    {
+      "epoch": 0.04038,
+      "grad_norm": 1.089114000312345,
+      "learning_rate": 0.003,
+      "loss": 4.1029,
+      "step": 4038
+    },
+    {
+      "epoch": 0.04039,
+      "grad_norm": 0.7787711745520574,
+      "learning_rate": 0.003,
+      "loss": 4.093,
+      "step": 4039
+    },
+    {
+      "epoch": 0.0404,
+      "grad_norm": 0.8304131011478352,
+      "learning_rate": 0.003,
+      "loss": 4.0899,
+      "step": 4040
+    },
+    {
+      "epoch": 0.04041,
+      "grad_norm": 0.8845677735505116,
+      "learning_rate": 0.003,
+      "loss": 4.0867,
+      "step": 4041
+    },
+    {
+      "epoch": 0.04042,
+      "grad_norm": 1.0056077779577761,
+      "learning_rate": 0.003,
+      "loss": 4.1323,
+      "step": 4042
+    },
+    {
+      "epoch": 0.04043,
+      "grad_norm": 1.303684048760548,
+      "learning_rate": 0.003,
+      "loss": 4.1299,
+      "step": 4043
+    },
+    {
+      "epoch": 0.04044,
+      "grad_norm": 0.6804116358869476,
+      "learning_rate": 0.003,
+      "loss": 4.1091,
+      "step": 4044
+    },
+    {
+      "epoch": 0.04045,
+      "grad_norm": 0.6777286555779822,
+      "learning_rate": 0.003,
+      "loss": 4.0524,
+      "step": 4045
+    },
+    {
+      "epoch": 0.04046,
+      "grad_norm": 0.7168976521438496,
+      "learning_rate": 0.003,
+      "loss": 4.1003,
+      "step": 4046
+    },
+    {
+      "epoch": 0.04047,
+      "grad_norm": 0.8016076938020208,
+      "learning_rate": 0.003,
+      "loss": 4.0921,
+      "step": 4047
+    },
+    {
+      "epoch": 0.04048,
+      "grad_norm": 0.8003360249896011,
+      "learning_rate": 0.003,
+      "loss": 4.0835,
+      "step": 4048
+    },
+    {
+      "epoch": 0.04049,
+      "grad_norm": 0.7905185716828504,
+      "learning_rate": 0.003,
+      "loss": 4.1233,
+      "step": 4049
+    },
+    {
+      "epoch": 0.0405,
+      "grad_norm": 0.7170296620649927,
+      "learning_rate": 0.003,
+      "loss": 4.1172,
+      "step": 4050
+    },
+    {
+      "epoch": 0.04051,
+      "grad_norm": 0.8022124254069631,
+      "learning_rate": 0.003,
+      "loss": 4.0999,
+      "step": 4051
+    },
+    {
+      "epoch": 0.04052,
+      "grad_norm": 0.9348016278831669,
+      "learning_rate": 0.003,
+      "loss": 4.11,
+      "step": 4052
+    },
+    {
+      "epoch": 0.04053,
+      "grad_norm": 0.8618520313571836,
+      "learning_rate": 0.003,
+      "loss": 4.0556,
+      "step": 4053
+    },
+    {
+      "epoch": 0.04054,
+      "grad_norm": 0.8261211062660251,
+      "learning_rate": 0.003,
+      "loss": 4.0768,
+      "step": 4054
+    },
+    {
+      "epoch": 0.04055,
+      "grad_norm": 0.6845606042201053,
+      "learning_rate": 0.003,
+      "loss": 4.0666,
+      "step": 4055
+    },
+    {
+      "epoch": 0.04056,
+      "grad_norm": 0.6070419310929447,
+      "learning_rate": 0.003,
+      "loss": 4.09,
+      "step": 4056
+    },
+    {
+      "epoch": 0.04057,
+      "grad_norm": 0.5685943884831188,
+      "learning_rate": 0.003,
+      "loss": 4.0894,
+      "step": 4057
+    },
+    {
+      "epoch": 0.04058,
+      "grad_norm": 0.6709504673340433,
+      "learning_rate": 0.003,
+      "loss": 4.0639,
+      "step": 4058
+    },
+    {
+      "epoch": 0.04059,
+      "grad_norm": 0.8379919064931752,
+      "learning_rate": 0.003,
+      "loss": 4.051,
+      "step": 4059
+    },
+    {
+      "epoch": 0.0406,
+      "grad_norm": 0.8620293391959047,
+      "learning_rate": 0.003,
+      "loss": 4.1127,
+      "step": 4060
+    },
+    {
+      "epoch": 0.04061,
+      "grad_norm": 0.7852271201009998,
+      "learning_rate": 0.003,
+      "loss": 4.0943,
+      "step": 4061
+    },
+    {
+      "epoch": 0.04062,
+      "grad_norm": 0.8559296475729065,
+      "learning_rate": 0.003,
+      "loss": 4.0773,
+      "step": 4062
+    },
+    {
+      "epoch": 0.04063,
+      "grad_norm": 0.8779279835196838,
+      "learning_rate": 0.003,
+      "loss": 4.0841,
+      "step": 4063
+    },
+    {
+      "epoch": 0.04064,
+      "grad_norm": 0.8897248490868346,
+      "learning_rate": 0.003,
+      "loss": 4.099,
+      "step": 4064
+    },
+    {
+      "epoch": 0.04065,
+      "grad_norm": 0.9175138271766847,
+      "learning_rate": 0.003,
+      "loss": 4.1364,
+      "step": 4065
+    },
+    {
+      "epoch": 0.04066,
+      "grad_norm": 0.9362345350031807,
+      "learning_rate": 0.003,
+      "loss": 4.0996,
+      "step": 4066
+    },
+    {
+      "epoch": 0.04067,
+      "grad_norm": 1.1545536887257815,
+      "learning_rate": 0.003,
+      "loss": 4.1086,
+      "step": 4067
+    },
+    {
+      "epoch": 0.04068,
+      "grad_norm": 1.0240578066067638,
+      "learning_rate": 0.003,
+      "loss": 4.1132,
+      "step": 4068
+    },
+    {
+      "epoch": 0.04069,
+      "grad_norm": 0.941652681317373,
+      "learning_rate": 0.003,
+      "loss": 4.1035,
+      "step": 4069
+    },
+    {
+      "epoch": 0.0407,
+      "grad_norm": 0.9346763708267345,
+      "learning_rate": 0.003,
+      "loss": 4.1051,
+      "step": 4070
+    },
+    {
+      "epoch": 0.04071,
+      "grad_norm": 0.9599025044832693,
+      "learning_rate": 0.003,
+      "loss": 4.0759,
+      "step": 4071
+    },
+    {
+      "epoch": 0.04072,
+      "grad_norm": 0.8612213977045744,
+      "learning_rate": 0.003,
+      "loss": 4.0953,
+      "step": 4072
+    },
+    {
+      "epoch": 0.04073,
+      "grad_norm": 0.8763585891787764,
+      "learning_rate": 0.003,
+      "loss": 4.0808,
+      "step": 4073
+    },
+    {
+      "epoch": 0.04074,
+      "grad_norm": 0.8539311608465675,
+      "learning_rate": 0.003,
+      "loss": 4.1083,
+      "step": 4074
+    },
+    {
+      "epoch": 0.04075,
+      "grad_norm": 0.8369021460447633,
+      "learning_rate": 0.003,
+      "loss": 4.1042,
+      "step": 4075
+    },
+    {
+      "epoch": 0.04076,
+      "grad_norm": 0.8578020250558176,
+      "learning_rate": 0.003,
+      "loss": 4.0979,
+      "step": 4076
+    },
+    {
+      "epoch": 0.04077,
+      "grad_norm": 0.85497575989368,
+      "learning_rate": 0.003,
+      "loss": 4.1111,
+      "step": 4077
+    },
+    {
+      "epoch": 0.04078,
+      "grad_norm": 0.9407927776815804,
+      "learning_rate": 0.003,
+      "loss": 4.0876,
+      "step": 4078
+    },
+    {
+      "epoch": 0.04079,
+      "grad_norm": 1.05128762652415,
+      "learning_rate": 0.003,
+      "loss": 4.1036,
+      "step": 4079
+    },
+    {
+      "epoch": 0.0408,
+      "grad_norm": 1.1019406938246195,
+      "learning_rate": 0.003,
+      "loss": 4.0979,
+      "step": 4080
+    },
+    {
+      "epoch": 0.04081,
+      "grad_norm": 0.9496061799293344,
+      "learning_rate": 0.003,
+      "loss": 4.0748,
+      "step": 4081
+    },
+    {
+      "epoch": 0.04082,
+      "grad_norm": 0.8481874389103903,
+      "learning_rate": 0.003,
+      "loss": 4.1091,
+      "step": 4082
+    },
+    {
+      "epoch": 0.04083,
+      "grad_norm": 0.7274191448577992,
+      "learning_rate": 0.003,
+      "loss": 4.082,
+      "step": 4083
+    },
+    {
+      "epoch": 0.04084,
+      "grad_norm": 0.7904195126005293,
+      "learning_rate": 0.003,
+      "loss": 4.1207,
+      "step": 4084
+    },
+    {
+      "epoch": 0.04085,
+      "grad_norm": 1.0202097759202264,
+      "learning_rate": 0.003,
+      "loss": 4.1066,
+      "step": 4085
+    },
+    {
+      "epoch": 0.04086,
+      "grad_norm": 1.201486915510698,
+      "learning_rate": 0.003,
+      "loss": 4.1299,
+      "step": 4086
+    },
+    {
+      "epoch": 0.04087,
+      "grad_norm": 0.868844436475151,
+      "learning_rate": 0.003,
+      "loss": 4.1076,
+      "step": 4087
+    },
+    {
+      "epoch": 0.04088,
+      "grad_norm": 0.9444985463912003,
+      "learning_rate": 0.003,
+      "loss": 4.089,
+      "step": 4088
+    },
+    {
+      "epoch": 0.04089,
+      "grad_norm": 0.9918074019046071,
+      "learning_rate": 0.003,
+      "loss": 4.0921,
+      "step": 4089
+    },
+    {
+      "epoch": 0.0409,
+      "grad_norm": 1.0955168255438645,
+      "learning_rate": 0.003,
+      "loss": 4.1134,
+      "step": 4090
+    },
+    {
+      "epoch": 0.04091,
+      "grad_norm": 1.063937537128444,
+      "learning_rate": 0.003,
+      "loss": 4.1242,
+      "step": 4091
+    },
+    {
+      "epoch": 0.04092,
+      "grad_norm": 0.9343133313137877,
+      "learning_rate": 0.003,
+      "loss": 4.0782,
+      "step": 4092
+    },
+    {
+      "epoch": 0.04093,
+      "grad_norm": 0.8938252574219301,
+      "learning_rate": 0.003,
+      "loss": 4.106,
+      "step": 4093
+    },
+    {
+      "epoch": 0.04094,
+      "grad_norm": 0.8646025553563678,
+      "learning_rate": 0.003,
+      "loss": 4.0958,
+      "step": 4094
+    },
+    {
+      "epoch": 0.04095,
+      "grad_norm": 0.8849098800450139,
+      "learning_rate": 0.003,
+      "loss": 4.0708,
+      "step": 4095
+    },
+    {
+      "epoch": 0.04096,
+      "grad_norm": 0.8639434333725027,
+      "learning_rate": 0.003,
+      "loss": 4.0767,
+      "step": 4096
+    },
+    {
+      "epoch": 0.04097,
+      "grad_norm": 0.8248508344367464,
+      "learning_rate": 0.003,
+      "loss": 4.096,
+      "step": 4097
+    },
+    {
+      "epoch": 0.04098,
+      "grad_norm": 0.7799456648454658,
+      "learning_rate": 0.003,
+      "loss": 4.0966,
+      "step": 4098
+    },
+    {
+      "epoch": 0.04099,
+      "grad_norm": 0.8045819576448443,
+      "learning_rate": 0.003,
+      "loss": 4.1195,
+      "step": 4099
+    },
+    {
+      "epoch": 0.041,
+      "grad_norm": 0.8747001760069023,
+      "learning_rate": 0.003,
+      "loss": 4.0786,
+      "step": 4100
+    },
+    {
+      "epoch": 0.04101,
+      "grad_norm": 0.8938516305008589,
+      "learning_rate": 0.003,
+      "loss": 4.0744,
+      "step": 4101
+    },
+    {
+      "epoch": 0.04102,
+      "grad_norm": 0.8240754586578543,
+      "learning_rate": 0.003,
+      "loss": 4.0914,
+      "step": 4102
+    },
+    {
+      "epoch": 0.04103,
+      "grad_norm": 0.7515676676481744,
+      "learning_rate": 0.003,
+      "loss": 4.1135,
+      "step": 4103
+    },
+    {
+      "epoch": 0.04104,
+      "grad_norm": 0.7588461451530453,
+      "learning_rate": 0.003,
+      "loss": 4.099,
+      "step": 4104
+    },
+    {
+      "epoch": 0.04105,
+      "grad_norm": 0.8416483809920972,
+      "learning_rate": 0.003,
+      "loss": 4.0658,
+      "step": 4105
+    },
+    {
+      "epoch": 0.04106,
+      "grad_norm": 0.9997989043605089,
+      "learning_rate": 0.003,
+      "loss": 4.1091,
+      "step": 4106
+    },
+    {
+      "epoch": 0.04107,
+      "grad_norm": 1.4519560189758212,
+      "learning_rate": 0.003,
+      "loss": 4.1167,
+      "step": 4107
+    },
+    {
+      "epoch": 0.04108,
+      "grad_norm": 0.6881032147408636,
+      "learning_rate": 0.003,
+      "loss": 4.0891,
+      "step": 4108
+    },
+    {
+      "epoch": 0.04109,
+      "grad_norm": 0.7741592484410211,
+      "learning_rate": 0.003,
+      "loss": 4.0795,
+      "step": 4109
+    },
+    {
+      "epoch": 0.0411,
+      "grad_norm": 0.8818900570270763,
+      "learning_rate": 0.003,
+      "loss": 4.091,
+      "step": 4110
+    },
+    {
+      "epoch": 0.04111,
+      "grad_norm": 1.0723961097005286,
+      "learning_rate": 0.003,
+      "loss": 4.0794,
+      "step": 4111
+    },
+    {
+      "epoch": 0.04112,
+      "grad_norm": 1.0495234672278315,
+      "learning_rate": 0.003,
+      "loss": 4.0982,
+      "step": 4112
+    },
+    {
+      "epoch": 0.04113,
+      "grad_norm": 1.028878109333595,
+      "learning_rate": 0.003,
+      "loss": 4.1422,
+      "step": 4113
+    },
+    {
+      "epoch": 0.04114,
+      "grad_norm": 0.7814661056488829,
+      "learning_rate": 0.003,
+      "loss": 4.0787,
+      "step": 4114
+    },
+    {
+      "epoch": 0.04115,
+      "grad_norm": 0.7046355162923472,
+      "learning_rate": 0.003,
+      "loss": 4.089,
+      "step": 4115
+    },
+    {
+      "epoch": 0.04116,
+      "grad_norm": 0.7912332755348991,
+      "learning_rate": 0.003,
+      "loss": 4.1044,
+      "step": 4116
+    },
+    {
+      "epoch": 0.04117,
+      "grad_norm": 0.8341278502744938,
+      "learning_rate": 0.003,
+      "loss": 4.0668,
+      "step": 4117
+    },
+    {
+      "epoch": 0.04118,
+      "grad_norm": 0.8875854653843481,
+      "learning_rate": 0.003,
+      "loss": 4.1278,
+      "step": 4118
+    },
+    {
+      "epoch": 0.04119,
+      "grad_norm": 0.8609577714531109,
+      "learning_rate": 0.003,
+      "loss": 4.1038,
+      "step": 4119
+    },
+    {
+      "epoch": 0.0412,
+      "grad_norm": 0.9318473319692638,
+      "learning_rate": 0.003,
+      "loss": 4.0972,
+      "step": 4120
+    },
+    {
+      "epoch": 0.04121,
+      "grad_norm": 0.9493333365772441,
+      "learning_rate": 0.003,
+      "loss": 4.1233,
+      "step": 4121
+    },
+    {
+      "epoch": 0.04122,
+      "grad_norm": 0.9836587921676327,
+      "learning_rate": 0.003,
+      "loss": 4.1001,
+      "step": 4122
+    },
+    {
+      "epoch": 0.04123,
+      "grad_norm": 0.9314136846053894,
+      "learning_rate": 0.003,
+      "loss": 4.1073,
+      "step": 4123
+    },
+    {
+      "epoch": 0.04124,
+      "grad_norm": 0.8615395432982054,
+      "learning_rate": 0.003,
+      "loss": 4.0853,
+      "step": 4124
+    },
+    {
+      "epoch": 0.04125,
+      "grad_norm": 0.8303231719235581,
+      "learning_rate": 0.003,
+      "loss": 4.107,
+      "step": 4125
+    },
+    {
+      "epoch": 0.04126,
+      "grad_norm": 0.9351121203757022,
+      "learning_rate": 0.003,
+      "loss": 4.1031,
+      "step": 4126
+    },
+    {
+      "epoch": 0.04127,
+      "grad_norm": 0.9548721015062305,
+      "learning_rate": 0.003,
+      "loss": 4.0928,
+      "step": 4127
+    },
+    {
+      "epoch": 0.04128,
+      "grad_norm": 0.904134032279689,
+      "learning_rate": 0.003,
+      "loss": 4.0795,
+      "step": 4128
+    },
+    {
+      "epoch": 0.04129,
+      "grad_norm": 1.0360809502468598,
+      "learning_rate": 0.003,
+      "loss": 4.1288,
+      "step": 4129
+    },
+    {
+      "epoch": 0.0413,
+      "grad_norm": 1.141478517660115,
+      "learning_rate": 0.003,
+      "loss": 4.0942,
+      "step": 4130
+    },
+    {
+      "epoch": 0.04131,
+      "grad_norm": 1.097730707750305,
+      "learning_rate": 0.003,
+      "loss": 4.1188,
+      "step": 4131
+    },
+    {
+      "epoch": 0.04132,
+      "grad_norm": 0.9015795328637733,
+      "learning_rate": 0.003,
+      "loss": 4.0805,
+      "step": 4132
+    },
+    {
+      "epoch": 0.04133,
+      "grad_norm": 0.9605681859330745,
+      "learning_rate": 0.003,
+      "loss": 4.1225,
+      "step": 4133
+    },
+    {
+      "epoch": 0.04134,
+      "grad_norm": 0.9723288613378216,
+      "learning_rate": 0.003,
+      "loss": 4.1152,
+      "step": 4134
+    },
+    {
+      "epoch": 0.04135,
+      "grad_norm": 0.9769907462898332,
+      "learning_rate": 0.003,
+      "loss": 4.0955,
+      "step": 4135
+    },
+    {
+      "epoch": 0.04136,
+      "grad_norm": 0.8696285037251947,
+      "learning_rate": 0.003,
+      "loss": 4.0888,
+      "step": 4136
+    },
+    {
+      "epoch": 0.04137,
+      "grad_norm": 0.8622412103402255,
+      "learning_rate": 0.003,
+      "loss": 4.1276,
+      "step": 4137
+    },
+    {
+      "epoch": 0.04138,
+      "grad_norm": 0.8940895987636069,
+      "learning_rate": 0.003,
+      "loss": 4.114,
+      "step": 4138
+    },
+    {
+      "epoch": 0.04139,
+      "grad_norm": 0.8558944236718854,
+      "learning_rate": 0.003,
+      "loss": 4.1079,
+      "step": 4139
+    },
+    {
+      "epoch": 0.0414,
+      "grad_norm": 0.7906160709298794,
+      "learning_rate": 0.003,
+      "loss": 4.1091,
+      "step": 4140
+    },
+    {
+      "epoch": 0.04141,
+      "grad_norm": 0.8043674825801352,
+      "learning_rate": 0.003,
+      "loss": 4.0851,
+      "step": 4141
+    },
+    {
+      "epoch": 0.04142,
+      "grad_norm": 0.8668589703050257,
+      "learning_rate": 0.003,
+      "loss": 4.1101,
+      "step": 4142
+    },
+    {
+      "epoch": 0.04143,
+      "grad_norm": 1.0143988611429429,
+      "learning_rate": 0.003,
+      "loss": 4.1009,
+      "step": 4143
+    },
+    {
+      "epoch": 0.04144,
+      "grad_norm": 1.1221724323882003,
+      "learning_rate": 0.003,
+      "loss": 4.105,
+      "step": 4144
+    },
+    {
+      "epoch": 0.04145,
+      "grad_norm": 0.9750842855026217,
+      "learning_rate": 0.003,
+      "loss": 4.0885,
+      "step": 4145
+    },
+    {
+      "epoch": 0.04146,
+      "grad_norm": 0.837063893029182,
+      "learning_rate": 0.003,
+      "loss": 4.1029,
+      "step": 4146
+    },
+    {
+      "epoch": 0.04147,
+      "grad_norm": 0.817279014947959,
+      "learning_rate": 0.003,
+      "loss": 4.1136,
+      "step": 4147
+    },
+    {
+      "epoch": 0.04148,
+      "grad_norm": 0.6652410842078591,
+      "learning_rate": 0.003,
+      "loss": 4.0677,
+      "step": 4148
+    },
+    {
+      "epoch": 0.04149,
+      "grad_norm": 0.601931902805492,
+      "learning_rate": 0.003,
+      "loss": 4.0762,
+      "step": 4149
+    },
+    {
+      "epoch": 0.0415,
+      "grad_norm": 0.5744893147283616,
+      "learning_rate": 0.003,
+      "loss": 4.0783,
+      "step": 4150
+    },
+    {
+      "epoch": 0.04151,
+      "grad_norm": 0.5594756907939531,
+      "learning_rate": 0.003,
+      "loss": 4.0958,
+      "step": 4151
+    },
+    {
+      "epoch": 0.04152,
+      "grad_norm": 0.6696918347600734,
+      "learning_rate": 0.003,
+      "loss": 4.097,
+      "step": 4152
+    },
+    {
+      "epoch": 0.04153,
+      "grad_norm": 0.8337852170452664,
+      "learning_rate": 0.003,
+      "loss": 4.0646,
+      "step": 4153
+    },
+    {
+      "epoch": 0.04154,
+      "grad_norm": 0.8486120723348882,
+      "learning_rate": 0.003,
+      "loss": 4.1015,
+      "step": 4154
+    },
+    {
+      "epoch": 0.04155,
+      "grad_norm": 0.697828069478384,
+      "learning_rate": 0.003,
+      "loss": 4.0726,
+      "step": 4155
+    },
+    {
+      "epoch": 0.04156,
+      "grad_norm": 0.4957422759820407,
+      "learning_rate": 0.003,
+      "loss": 4.0881,
+      "step": 4156
+    },
+    {
+      "epoch": 0.04157,
+      "grad_norm": 0.5563098539794213,
+      "learning_rate": 0.003,
+      "loss": 4.1032,
+      "step": 4157
+    },
+    {
+      "epoch": 0.04158,
+      "grad_norm": 0.6673553499632091,
+      "learning_rate": 0.003,
+      "loss": 4.1043,
+      "step": 4158
+    },
+    {
+      "epoch": 0.04159,
+      "grad_norm": 0.8290986224854208,
+      "learning_rate": 0.003,
+      "loss": 4.0802,
+      "step": 4159
+    },
+    {
+      "epoch": 0.0416,
+      "grad_norm": 0.9164477827441916,
+      "learning_rate": 0.003,
+      "loss": 4.0966,
+      "step": 4160
+    },
+    {
+      "epoch": 0.04161,
+      "grad_norm": 0.8138741601967407,
+      "learning_rate": 0.003,
+      "loss": 4.0993,
+      "step": 4161
+    },
+    {
+      "epoch": 0.04162,
+      "grad_norm": 0.7719431484259072,
+      "learning_rate": 0.003,
+      "loss": 4.0587,
+      "step": 4162
+    },
+    {
+      "epoch": 0.04163,
+      "grad_norm": 0.7815652988597618,
+      "learning_rate": 0.003,
+      "loss": 4.0858,
+      "step": 4163
+    },
+    {
+      "epoch": 0.04164,
+      "grad_norm": 0.9136897661923532,
+      "learning_rate": 0.003,
+      "loss": 4.1081,
+      "step": 4164
+    },
+    {
+      "epoch": 0.04165,
+      "grad_norm": 1.0910864238229012,
+      "learning_rate": 0.003,
+      "loss": 4.089,
+      "step": 4165
+    },
+    {
+      "epoch": 0.04166,
+      "grad_norm": 1.0985022482979871,
+      "learning_rate": 0.003,
+      "loss": 4.0829,
+      "step": 4166
+    },
+    {
+      "epoch": 0.04167,
+      "grad_norm": 0.884758586994079,
+      "learning_rate": 0.003,
+      "loss": 4.1031,
+      "step": 4167
+    },
+    {
+      "epoch": 0.04168,
+      "grad_norm": 0.8375929677822369,
+      "learning_rate": 0.003,
+      "loss": 4.1005,
+      "step": 4168
+    },
+    {
+      "epoch": 0.04169,
+      "grad_norm": 0.8912187343234272,
+      "learning_rate": 0.003,
+      "loss": 4.1106,
+      "step": 4169
+    },
+    {
+      "epoch": 0.0417,
+      "grad_norm": 1.09510323581272,
+      "learning_rate": 0.003,
+      "loss": 4.0782,
+      "step": 4170
+    },
+    {
+      "epoch": 0.04171,
+      "grad_norm": 0.9235321645280937,
+      "learning_rate": 0.003,
+      "loss": 4.0951,
+      "step": 4171
+    },
+    {
+      "epoch": 0.04172,
+      "grad_norm": 0.9054535762684148,
+      "learning_rate": 0.003,
+      "loss": 4.1142,
+      "step": 4172
+    },
+    {
+      "epoch": 0.04173,
+      "grad_norm": 0.9533816623318108,
+      "learning_rate": 0.003,
+      "loss": 4.1107,
+      "step": 4173
+    },
+    {
+      "epoch": 0.04174,
+      "grad_norm": 0.9693424734311672,
+      "learning_rate": 0.003,
+      "loss": 4.0809,
+      "step": 4174
+    },
+    {
+      "epoch": 0.04175,
+      "grad_norm": 0.9476037015111809,
+      "learning_rate": 0.003,
+      "loss": 4.1271,
+      "step": 4175
+    },
+    {
+      "epoch": 0.04176,
+      "grad_norm": 0.9584295419287631,
+      "learning_rate": 0.003,
+      "loss": 4.0947,
+      "step": 4176
+    },
+    {
+      "epoch": 0.04177,
+      "grad_norm": 0.9248605600298978,
+      "learning_rate": 0.003,
+      "loss": 4.072,
+      "step": 4177
+    },
+    {
+      "epoch": 0.04178,
+      "grad_norm": 1.0049448032304569,
+      "learning_rate": 0.003,
+      "loss": 4.1083,
+      "step": 4178
+    },
+    {
+      "epoch": 0.04179,
+      "grad_norm": 1.0827168186886107,
+      "learning_rate": 0.003,
+      "loss": 4.0888,
+      "step": 4179
+    },
+    {
+      "epoch": 0.0418,
+      "grad_norm": 0.8883360779651741,
+      "learning_rate": 0.003,
+      "loss": 4.098,
+      "step": 4180
+    },
+    {
+      "epoch": 0.04181,
+      "grad_norm": 1.0344215717913283,
+      "learning_rate": 0.003,
+      "loss": 4.1003,
+      "step": 4181
+    },
+    {
+      "epoch": 0.04182,
+      "grad_norm": 0.9869075813400712,
+      "learning_rate": 0.003,
+      "loss": 4.0833,
+      "step": 4182
+    },
+    {
+      "epoch": 0.04183,
+      "grad_norm": 0.8294921509305445,
+      "learning_rate": 0.003,
+      "loss": 4.0878,
+      "step": 4183
+    },
+    {
+      "epoch": 0.04184,
+      "grad_norm": 0.869514009963607,
+      "learning_rate": 0.003,
+      "loss": 4.1424,
+      "step": 4184
+    },
+    {
+      "epoch": 0.04185,
+      "grad_norm": 1.0166270234192272,
+      "learning_rate": 0.003,
+      "loss": 4.1279,
+      "step": 4185
+    },
+    {
+      "epoch": 0.04186,
+      "grad_norm": 0.9675533699418017,
+      "learning_rate": 0.003,
+      "loss": 4.0714,
+      "step": 4186
+    },
+    {
+      "epoch": 0.04187,
+      "grad_norm": 1.118617978668176,
+      "learning_rate": 0.003,
+      "loss": 4.1282,
+      "step": 4187
+    },
+    {
+      "epoch": 0.04188,
+      "grad_norm": 0.9597380404807206,
+      "learning_rate": 0.003,
+      "loss": 4.0878,
+      "step": 4188
+    },
+    {
+      "epoch": 0.04189,
+      "grad_norm": 0.8821463731911939,
+      "learning_rate": 0.003,
+      "loss": 4.1,
+      "step": 4189
+    },
+    {
+      "epoch": 0.0419,
+      "grad_norm": 0.8335574771570168,
+      "learning_rate": 0.003,
+      "loss": 4.097,
+      "step": 4190
+    },
+    {
+      "epoch": 0.04191,
+      "grad_norm": 0.960590386364999,
+      "learning_rate": 0.003,
+      "loss": 4.1012,
+      "step": 4191
+    },
+    {
+      "epoch": 0.04192,
+      "grad_norm": 1.1082087641443477,
+      "learning_rate": 0.003,
+      "loss": 4.0884,
+      "step": 4192
+    },
+    {
+      "epoch": 0.04193,
+      "grad_norm": 1.1014870968077684,
+      "learning_rate": 0.003,
+      "loss": 4.0724,
+      "step": 4193
+    },
+    {
+      "epoch": 0.04194,
+      "grad_norm": 1.004137077094656,
+      "learning_rate": 0.003,
+      "loss": 4.0669,
+      "step": 4194
+    },
+    {
+      "epoch": 0.04195,
+      "grad_norm": 1.0060456499295114,
+      "learning_rate": 0.003,
+      "loss": 4.1185,
+      "step": 4195
+    },
+    {
+      "epoch": 0.04196,
+      "grad_norm": 0.9781412326562324,
+      "learning_rate": 0.003,
+      "loss": 4.0768,
+      "step": 4196
+    },
+    {
+      "epoch": 0.04197,
+      "grad_norm": 0.966982025696319,
+      "learning_rate": 0.003,
+      "loss": 4.0865,
+      "step": 4197
+    },
+    {
+      "epoch": 0.04198,
+      "grad_norm": 1.0321323425834754,
+      "learning_rate": 0.003,
+      "loss": 4.0848,
+      "step": 4198
+    },
+    {
+      "epoch": 0.04199,
+      "grad_norm": 0.9725763984900901,
+      "learning_rate": 0.003,
+      "loss": 4.1218,
+      "step": 4199
+    },
+    {
+      "epoch": 0.042,
+      "grad_norm": 0.9044032352315242,
+      "learning_rate": 0.003,
+      "loss": 4.0991,
+      "step": 4200
+    },
+    {
+      "epoch": 0.04201,
+      "grad_norm": 0.876685657514267,
+      "learning_rate": 0.003,
+      "loss": 4.08,
+      "step": 4201
+    },
+    {
+      "epoch": 0.04202,
+      "grad_norm": 1.0379948329277224,
+      "learning_rate": 0.003,
+      "loss": 4.1002,
+      "step": 4202
+    },
+    {
+      "epoch": 0.04203,
+      "grad_norm": 0.9407666140032972,
+      "learning_rate": 0.003,
+      "loss": 4.1169,
+      "step": 4203
+    },
+    {
+      "epoch": 0.04204,
+      "grad_norm": 0.9627655702680004,
+      "learning_rate": 0.003,
+      "loss": 4.085,
+      "step": 4204
+    },
+    {
+      "epoch": 0.04205,
+      "grad_norm": 0.879727149930489,
+      "learning_rate": 0.003,
+      "loss": 4.1074,
+      "step": 4205
+    },
+    {
+      "epoch": 0.04206,
+      "grad_norm": 0.7201424170354667,
+      "learning_rate": 0.003,
+      "loss": 4.098,
+      "step": 4206
+    },
+    {
+      "epoch": 0.04207,
+      "grad_norm": 0.8722237657873413,
+      "learning_rate": 0.003,
+      "loss": 4.0915,
+      "step": 4207
+    },
+    {
+      "epoch": 0.04208,
+      "grad_norm": 1.057153458391563,
+      "learning_rate": 0.003,
+      "loss": 4.1051,
+      "step": 4208
+    },
+    {
+      "epoch": 0.04209,
+      "grad_norm": 1.0856375627812824,
+      "learning_rate": 0.003,
+      "loss": 4.1123,
+      "step": 4209
+    },
+    {
+      "epoch": 0.0421,
+      "grad_norm": 0.8686311196866661,
+      "learning_rate": 0.003,
+      "loss": 4.0617,
+      "step": 4210
+    },
+    {
+      "epoch": 0.04211,
+      "grad_norm": 0.772423867231878,
+      "learning_rate": 0.003,
+      "loss": 4.0983,
+      "step": 4211
+    },
+    {
+      "epoch": 0.04212,
+      "grad_norm": 0.9206795604812895,
+      "learning_rate": 0.003,
+      "loss": 4.0918,
+      "step": 4212
+    },
+    {
+      "epoch": 0.04213,
+      "grad_norm": 1.024206681316176,
+      "learning_rate": 0.003,
+      "loss": 4.0789,
+      "step": 4213
+    },
+    {
+      "epoch": 0.04214,
+      "grad_norm": 0.9670322877487537,
+      "learning_rate": 0.003,
+      "loss": 4.0856,
+      "step": 4214
+    },
+    {
+      "epoch": 0.04215,
+      "grad_norm": 0.7494961355008928,
+      "learning_rate": 0.003,
+      "loss": 4.1001,
+      "step": 4215
+    },
+    {
+      "epoch": 0.04216,
+      "grad_norm": 0.765486635210703,
+      "learning_rate": 0.003,
+      "loss": 4.0645,
+      "step": 4216
+    },
+    {
+      "epoch": 0.04217,
+      "grad_norm": 0.7160617513742286,
+      "learning_rate": 0.003,
+      "loss": 4.0848,
+      "step": 4217
+    },
+    {
+      "epoch": 0.04218,
+      "grad_norm": 0.8033027325871259,
+      "learning_rate": 0.003,
+      "loss": 4.1174,
+      "step": 4218
+    },
+    {
+      "epoch": 0.04219,
+      "grad_norm": 0.8144707096133446,
+      "learning_rate": 0.003,
+      "loss": 4.0403,
+      "step": 4219
+    },
+    {
+      "epoch": 0.0422,
+      "grad_norm": 0.8870053013997257,
+      "learning_rate": 0.003,
+      "loss": 4.1062,
+      "step": 4220
+    },
+    {
+      "epoch": 0.04221,
+      "grad_norm": 1.0572558853767193,
+      "learning_rate": 0.003,
+      "loss": 4.0827,
+      "step": 4221
+    },
+    {
+      "epoch": 0.04222,
+      "grad_norm": 0.8601884630133385,
+      "learning_rate": 0.003,
+      "loss": 4.0479,
+      "step": 4222
+    },
+    {
+      "epoch": 0.04223,
+      "grad_norm": 0.7192916764502633,
+      "learning_rate": 0.003,
+      "loss": 4.1063,
+      "step": 4223
+    },
+    {
+      "epoch": 0.04224,
+      "grad_norm": 0.7121512553768459,
+      "learning_rate": 0.003,
+      "loss": 4.1112,
+      "step": 4224
+    },
+    {
+      "epoch": 0.04225,
+      "grad_norm": 0.7021515831824932,
+      "learning_rate": 0.003,
+      "loss": 4.0944,
+      "step": 4225
+    },
+    {
+      "epoch": 0.04226,
+      "grad_norm": 0.8314637642734086,
+      "learning_rate": 0.003,
+      "loss": 4.0828,
+      "step": 4226
+    },
+    {
+      "epoch": 0.04227,
+      "grad_norm": 0.9670578496344213,
+      "learning_rate": 0.003,
+      "loss": 4.0877,
+      "step": 4227
+    },
+    {
+      "epoch": 0.04228,
+      "grad_norm": 1.049287879558258,
+      "learning_rate": 0.003,
+      "loss": 4.0772,
+      "step": 4228
+    },
+    {
+      "epoch": 0.04229,
+      "grad_norm": 1.0992300949270042,
+      "learning_rate": 0.003,
+      "loss": 4.1078,
+      "step": 4229
+    },
+    {
+      "epoch": 0.0423,
+      "grad_norm": 0.900897411198522,
+      "learning_rate": 0.003,
+      "loss": 4.0917,
+      "step": 4230
+    },
+    {
+      "epoch": 0.04231,
+      "grad_norm": 0.9451671959275939,
+      "learning_rate": 0.003,
+      "loss": 4.0886,
+      "step": 4231
+    },
+    {
+      "epoch": 0.04232,
+      "grad_norm": 0.7956005679951864,
+      "learning_rate": 0.003,
+      "loss": 4.0993,
+      "step": 4232
+    },
+    {
+      "epoch": 0.04233,
+      "grad_norm": 0.8292582126895408,
+      "learning_rate": 0.003,
+      "loss": 4.0505,
+      "step": 4233
+    },
+    {
+      "epoch": 0.04234,
+      "grad_norm": 0.8871903907197981,
+      "learning_rate": 0.003,
+      "loss": 4.0874,
+      "step": 4234
+    },
+    {
+      "epoch": 0.04235,
+      "grad_norm": 0.9300140410326806,
+      "learning_rate": 0.003,
+      "loss": 4.0712,
+      "step": 4235
+    },
+    {
+      "epoch": 0.04236,
+      "grad_norm": 1.0741902913167412,
+      "learning_rate": 0.003,
+      "loss": 4.0895,
+      "step": 4236
+    },
+    {
+      "epoch": 0.04237,
+      "grad_norm": 0.9580888994428801,
+      "learning_rate": 0.003,
+      "loss": 4.0592,
+      "step": 4237
+    },
+    {
+      "epoch": 0.04238,
+      "grad_norm": 0.9482815531910007,
+      "learning_rate": 0.003,
+      "loss": 4.1176,
+      "step": 4238
+    },
+    {
+      "epoch": 0.04239,
+      "grad_norm": 0.9555806662772239,
+      "learning_rate": 0.003,
+      "loss": 4.1016,
+      "step": 4239
+    },
+    {
+      "epoch": 0.0424,
+      "grad_norm": 0.8664629875137623,
+      "learning_rate": 0.003,
+      "loss": 4.1005,
+      "step": 4240
+    },
+    {
+      "epoch": 0.04241,
+      "grad_norm": 0.9630766502725931,
+      "learning_rate": 0.003,
+      "loss": 4.1171,
+      "step": 4241
+    },
+    {
+      "epoch": 0.04242,
+      "grad_norm": 1.0804387798244068,
+      "learning_rate": 0.003,
+      "loss": 4.1085,
+      "step": 4242
+    },
+    {
+      "epoch": 0.04243,
+      "grad_norm": 0.9274350779718051,
+      "learning_rate": 0.003,
+      "loss": 4.0861,
+      "step": 4243
+    },
+    {
+      "epoch": 0.04244,
+      "grad_norm": 0.913816427911704,
+      "learning_rate": 0.003,
+      "loss": 4.0946,
+      "step": 4244
+    },
+    {
+      "epoch": 0.04245,
+      "grad_norm": 1.106413525319224,
+      "learning_rate": 0.003,
+      "loss": 4.0955,
+      "step": 4245
+    },
+    {
+      "epoch": 0.04246,
+      "grad_norm": 0.9030634078700811,
+      "learning_rate": 0.003,
+      "loss": 4.1158,
+      "step": 4246
+    },
+    {
+      "epoch": 0.04247,
+      "grad_norm": 0.8416832028937715,
+      "learning_rate": 0.003,
+      "loss": 4.0884,
+      "step": 4247
+    },
+    {
+      "epoch": 0.04248,
+      "grad_norm": 0.8169991638726913,
+      "learning_rate": 0.003,
+      "loss": 4.081,
+      "step": 4248
+    },
+    {
+      "epoch": 0.04249,
+      "grad_norm": 0.8044824810175584,
+      "learning_rate": 0.003,
+      "loss": 4.0797,
+      "step": 4249
+    },
+    {
+      "epoch": 0.0425,
+      "grad_norm": 0.8896696091741753,
+      "learning_rate": 0.003,
+      "loss": 4.0983,
+      "step": 4250
+    },
+    {
+      "epoch": 0.04251,
+      "grad_norm": 0.9502948772765745,
+      "learning_rate": 0.003,
+      "loss": 4.0614,
+      "step": 4251
+    },
+    {
+      "epoch": 0.04252,
+      "grad_norm": 1.0326556881542461,
+      "learning_rate": 0.003,
+      "loss": 4.1092,
+      "step": 4252
+    },
+    {
+      "epoch": 0.04253,
+      "grad_norm": 1.0173408114295495,
+      "learning_rate": 0.003,
+      "loss": 4.0741,
+      "step": 4253
+    },
+    {
+      "epoch": 0.04254,
+      "grad_norm": 1.0646057446624475,
+      "learning_rate": 0.003,
+      "loss": 4.1019,
+      "step": 4254
+    },
+    {
+      "epoch": 0.04255,
+      "grad_norm": 0.8420645317602736,
+      "learning_rate": 0.003,
+      "loss": 4.0732,
+      "step": 4255
+    },
+    {
+      "epoch": 0.04256,
+      "grad_norm": 0.7201557468187585,
+      "learning_rate": 0.003,
+      "loss": 4.078,
+      "step": 4256
+    },
+    {
+      "epoch": 0.04257,
+      "grad_norm": 0.7996495415748023,
+      "learning_rate": 0.003,
+      "loss": 4.1028,
+      "step": 4257
+    },
+    {
+      "epoch": 0.04258,
+      "grad_norm": 0.8858738615733014,
+      "learning_rate": 0.003,
+      "loss": 4.1038,
+      "step": 4258
+    },
+    {
+      "epoch": 0.04259,
+      "grad_norm": 0.9662802527822355,
+      "learning_rate": 0.003,
+      "loss": 4.0711,
+      "step": 4259
+    },
+    {
+      "epoch": 0.0426,
+      "grad_norm": 0.9169898773347952,
+      "learning_rate": 0.003,
+      "loss": 4.0869,
+      "step": 4260
+    },
+    {
+      "epoch": 0.04261,
+      "grad_norm": 0.9208708917372306,
+      "learning_rate": 0.003,
+      "loss": 4.0993,
+      "step": 4261
+    },
+    {
+      "epoch": 0.04262,
+      "grad_norm": 0.8866710041215375,
+      "learning_rate": 0.003,
+      "loss": 4.1145,
+      "step": 4262
+    },
+    {
+      "epoch": 0.04263,
+      "grad_norm": 0.8426381040253105,
+      "learning_rate": 0.003,
+      "loss": 4.0973,
+      "step": 4263
+    },
+    {
+      "epoch": 0.04264,
+      "grad_norm": 0.743799590275064,
+      "learning_rate": 0.003,
+      "loss": 4.1055,
+      "step": 4264
+    },
+    {
+      "epoch": 0.04265,
+      "grad_norm": 0.8815249453115196,
+      "learning_rate": 0.003,
+      "loss": 4.1041,
+      "step": 4265
+    },
+    {
+      "epoch": 0.04266,
+      "grad_norm": 0.8452841678134956,
+      "learning_rate": 0.003,
+      "loss": 4.0856,
+      "step": 4266
+    },
+    {
+      "epoch": 0.04267,
+      "grad_norm": 0.7585523353787604,
+      "learning_rate": 0.003,
+      "loss": 4.0593,
+      "step": 4267
+    },
+    {
+      "epoch": 0.04268,
+      "grad_norm": 0.7042766430430472,
+      "learning_rate": 0.003,
+      "loss": 4.0972,
+      "step": 4268
+    },
+    {
+      "epoch": 0.04269,
+      "grad_norm": 0.6767603788338118,
+      "learning_rate": 0.003,
+      "loss": 4.0585,
+      "step": 4269
+    },
+    {
+      "epoch": 0.0427,
+      "grad_norm": 0.6619260992554636,
+      "learning_rate": 0.003,
+      "loss": 4.0769,
+      "step": 4270
+    },
+    {
+      "epoch": 0.04271,
+      "grad_norm": 0.716784572802792,
+      "learning_rate": 0.003,
+      "loss": 4.0901,
+      "step": 4271
+    },
+    {
+      "epoch": 0.04272,
+      "grad_norm": 0.9018222570354946,
+      "learning_rate": 0.003,
+      "loss": 4.0805,
+      "step": 4272
+    },
+    {
+      "epoch": 0.04273,
+      "grad_norm": 1.0946540429372817,
+      "learning_rate": 0.003,
+      "loss": 4.0688,
+      "step": 4273
+    },
+    {
+      "epoch": 0.04274,
+      "grad_norm": 0.8458329250448817,
+      "learning_rate": 0.003,
+      "loss": 4.086,
+      "step": 4274
+    },
+    {
+      "epoch": 0.04275,
+      "grad_norm": 0.6435172962120423,
+      "learning_rate": 0.003,
+      "loss": 4.0865,
+      "step": 4275
+    },
+    {
+      "epoch": 0.04276,
+      "grad_norm": 0.6643590249615142,
+      "learning_rate": 0.003,
+      "loss": 4.1023,
+      "step": 4276
+    },
+    {
+      "epoch": 0.04277,
+      "grad_norm": 0.79792656427568,
+      "learning_rate": 0.003,
+      "loss": 4.0928,
+      "step": 4277
+    },
+    {
+      "epoch": 0.04278,
+      "grad_norm": 0.740840553173491,
+      "learning_rate": 0.003,
+      "loss": 4.0939,
+      "step": 4278
+    },
+    {
+      "epoch": 0.04279,
+      "grad_norm": 0.7711154804154717,
+      "learning_rate": 0.003,
+      "loss": 4.071,
+      "step": 4279
+    },
+    {
+      "epoch": 0.0428,
+      "grad_norm": 0.7250008586500746,
+      "learning_rate": 0.003,
+      "loss": 4.0495,
+      "step": 4280
+    },
+    {
+      "epoch": 0.04281,
+      "grad_norm": 0.7499387037166975,
+      "learning_rate": 0.003,
+      "loss": 4.1047,
+      "step": 4281
+    },
+    {
+      "epoch": 0.04282,
+      "grad_norm": 0.7545960253753359,
+      "learning_rate": 0.003,
+      "loss": 4.0814,
+      "step": 4282
+    },
+    {
+      "epoch": 0.04283,
+      "grad_norm": 0.839588483108746,
+      "learning_rate": 0.003,
+      "loss": 4.12,
+      "step": 4283
+    },
+    {
+      "epoch": 0.04284,
+      "grad_norm": 0.86181150410575,
+      "learning_rate": 0.003,
+      "loss": 4.074,
+      "step": 4284
+    },
+    {
+      "epoch": 0.04285,
+      "grad_norm": 0.8662575012952002,
+      "learning_rate": 0.003,
+      "loss": 4.09,
+      "step": 4285
+    },
+    {
+      "epoch": 0.04286,
+      "grad_norm": 1.0398949684975605,
+      "learning_rate": 0.003,
+      "loss": 4.1182,
+      "step": 4286
+    },
+    {
+      "epoch": 0.04287,
+      "grad_norm": 1.1167633903119278,
+      "learning_rate": 0.003,
+      "loss": 4.1002,
+      "step": 4287
+    },
+    {
+      "epoch": 0.04288,
+      "grad_norm": 0.9953157675421085,
+      "learning_rate": 0.003,
+      "loss": 4.0784,
+      "step": 4288
+    },
+    {
+      "epoch": 0.04289,
+      "grad_norm": 0.9963288991872585,
+      "learning_rate": 0.003,
+      "loss": 4.087,
+      "step": 4289
+    },
+    {
+      "epoch": 0.0429,
+      "grad_norm": 1.0134088286440928,
+      "learning_rate": 0.003,
+      "loss": 4.0732,
+      "step": 4290
+    },
+    {
+      "epoch": 0.04291,
+      "grad_norm": 1.0632191551515977,
+      "learning_rate": 0.003,
+      "loss": 4.08,
+      "step": 4291
+    },
+    {
+      "epoch": 0.04292,
+      "grad_norm": 1.1744325304960588,
+      "learning_rate": 0.003,
+      "loss": 4.0864,
+      "step": 4292
+    },
+    {
+      "epoch": 0.04293,
+      "grad_norm": 0.8830847280464275,
+      "learning_rate": 0.003,
+      "loss": 4.1184,
+      "step": 4293
+    },
+    {
+      "epoch": 0.04294,
+      "grad_norm": 0.8997282546954833,
+      "learning_rate": 0.003,
+      "loss": 4.0968,
+      "step": 4294
+    },
+    {
+      "epoch": 0.04295,
+      "grad_norm": 1.0130781104561735,
+      "learning_rate": 0.003,
+      "loss": 4.0742,
+      "step": 4295
+    },
+    {
+      "epoch": 0.04296,
+      "grad_norm": 0.7688499577075354,
+      "learning_rate": 0.003,
+      "loss": 4.0677,
+      "step": 4296
+    },
+    {
+      "epoch": 0.04297,
+      "grad_norm": 0.8069754114362383,
+      "learning_rate": 0.003,
+      "loss": 4.0556,
+      "step": 4297
+    },
+    {
+      "epoch": 0.04298,
+      "grad_norm": 0.7696772164144003,
+      "learning_rate": 0.003,
+      "loss": 4.1021,
+      "step": 4298
+    },
+    {
+      "epoch": 0.04299,
+      "grad_norm": 0.731529824570407,
+      "learning_rate": 0.003,
+      "loss": 4.0886,
+      "step": 4299
+    },
+    {
+      "epoch": 0.043,
+      "grad_norm": 0.7813911597495976,
+      "learning_rate": 0.003,
+      "loss": 4.0702,
+      "step": 4300
+    },
+    {
+      "epoch": 0.04301,
+      "grad_norm": 0.9515070484621972,
+      "learning_rate": 0.003,
+      "loss": 4.1246,
+      "step": 4301
+    },
+    {
+      "epoch": 0.04302,
+      "grad_norm": 1.1524985858220247,
+      "learning_rate": 0.003,
+      "loss": 4.1198,
+      "step": 4302
+    },
+    {
+      "epoch": 0.04303,
+      "grad_norm": 0.9661803127063716,
+      "learning_rate": 0.003,
+      "loss": 4.1038,
+      "step": 4303
+    },
+    {
+      "epoch": 0.04304,
+      "grad_norm": 0.8958980804239224,
+      "learning_rate": 0.003,
+      "loss": 4.1004,
+      "step": 4304
+    },
+    {
+      "epoch": 0.04305,
+      "grad_norm": 0.9457226757083944,
+      "learning_rate": 0.003,
+      "loss": 4.0986,
+      "step": 4305
+    },
+    {
+      "epoch": 0.04306,
+      "grad_norm": 0.866320067732626,
+      "learning_rate": 0.003,
+      "loss": 4.0701,
+      "step": 4306
+    },
+    {
+      "epoch": 0.04307,
+      "grad_norm": 0.9815566634548711,
+      "learning_rate": 0.003,
+      "loss": 4.0798,
+      "step": 4307
+    },
+    {
+      "epoch": 0.04308,
+      "grad_norm": 1.1045714176495842,
+      "learning_rate": 0.003,
+      "loss": 4.1016,
+      "step": 4308
+    },
+    {
+      "epoch": 0.04309,
+      "grad_norm": 0.9997081776371058,
+      "learning_rate": 0.003,
+      "loss": 4.0875,
+      "step": 4309
+    },
+    {
+      "epoch": 0.0431,
+      "grad_norm": 1.014433219426494,
+      "learning_rate": 0.003,
+      "loss": 4.1076,
+      "step": 4310
+    },
+    {
+      "epoch": 0.04311,
+      "grad_norm": 1.0841431057954531,
+      "learning_rate": 0.003,
+      "loss": 4.1103,
+      "step": 4311
+    },
+    {
+      "epoch": 0.04312,
+      "grad_norm": 0.8706786032806877,
+      "learning_rate": 0.003,
+      "loss": 4.0822,
+      "step": 4312
+    },
+    {
+      "epoch": 0.04313,
+      "grad_norm": 1.0443926449585144,
+      "learning_rate": 0.003,
+      "loss": 4.1006,
+      "step": 4313
+    },
+    {
+      "epoch": 0.04314,
+      "grad_norm": 1.0606632584516302,
+      "learning_rate": 0.003,
+      "loss": 4.0894,
+      "step": 4314
+    },
+    {
+      "epoch": 0.04315,
+      "grad_norm": 0.8710054154104404,
+      "learning_rate": 0.003,
+      "loss": 4.1084,
+      "step": 4315
+    },
+    {
+      "epoch": 0.04316,
+      "grad_norm": 0.7770801944004984,
+      "learning_rate": 0.003,
+      "loss": 4.101,
+      "step": 4316
+    },
+    {
+      "epoch": 0.04317,
+      "grad_norm": 0.808693446756647,
+      "learning_rate": 0.003,
+      "loss": 4.1163,
+      "step": 4317
+    },
+    {
+      "epoch": 0.04318,
+      "grad_norm": 0.783023904953245,
+      "learning_rate": 0.003,
+      "loss": 4.059,
+      "step": 4318
+    },
+    {
+      "epoch": 0.04319,
+      "grad_norm": 0.7045592780420906,
+      "learning_rate": 0.003,
+      "loss": 4.0509,
+      "step": 4319
+    },
+    {
+      "epoch": 0.0432,
+      "grad_norm": 0.6211212627554941,
+      "learning_rate": 0.003,
+      "loss": 4.0712,
+      "step": 4320
+    },
+    {
+      "epoch": 0.04321,
+      "grad_norm": 0.7218417386943828,
+      "learning_rate": 0.003,
+      "loss": 4.0759,
+      "step": 4321
+    },
+    {
+      "epoch": 0.04322,
+      "grad_norm": 0.7864622459064595,
+      "learning_rate": 0.003,
+      "loss": 4.1099,
+      "step": 4322
+    },
+    {
+      "epoch": 0.04323,
+      "grad_norm": 1.0759947667136016,
+      "learning_rate": 0.003,
+      "loss": 4.0829,
+      "step": 4323
+    },
+    {
+      "epoch": 0.04324,
+      "grad_norm": 1.3618558609167744,
+      "learning_rate": 0.003,
+      "loss": 4.0891,
+      "step": 4324
+    },
+    {
+      "epoch": 0.04325,
+      "grad_norm": 0.8537661961267661,
+      "learning_rate": 0.003,
+      "loss": 4.108,
+      "step": 4325
+    },
+    {
+      "epoch": 0.04326,
+      "grad_norm": 0.8673429528992446,
+      "learning_rate": 0.003,
+      "loss": 4.0867,
+      "step": 4326
+    },
+    {
+      "epoch": 0.04327,
+      "grad_norm": 0.8784142198291053,
+      "learning_rate": 0.003,
+      "loss": 4.071,
+      "step": 4327
+    },
+    {
+      "epoch": 0.04328,
+      "grad_norm": 0.964766867252245,
+      "learning_rate": 0.003,
+      "loss": 4.088,
+      "step": 4328
+    },
+    {
+      "epoch": 0.04329,
+      "grad_norm": 1.159486755106838,
+      "learning_rate": 0.003,
+      "loss": 4.1146,
+      "step": 4329
+    },
+    {
+      "epoch": 0.0433,
+      "grad_norm": 0.9798503929358533,
+      "learning_rate": 0.003,
+      "loss": 4.0823,
+      "step": 4330
+    },
+    {
+      "epoch": 0.04331,
+      "grad_norm": 0.8036875374792932,
+      "learning_rate": 0.003,
+      "loss": 4.0724,
+      "step": 4331
+    },
+    {
+      "epoch": 0.04332,
+      "grad_norm": 0.8974020360029874,
+      "learning_rate": 0.003,
+      "loss": 4.1218,
+      "step": 4332
+    },
+    {
+      "epoch": 0.04333,
+      "grad_norm": 0.952759627899482,
+      "learning_rate": 0.003,
+      "loss": 4.0939,
+      "step": 4333
+    },
+    {
+      "epoch": 0.04334,
+      "grad_norm": 0.9486168347147088,
+      "learning_rate": 0.003,
+      "loss": 4.0848,
+      "step": 4334
+    },
+    {
+      "epoch": 0.04335,
+      "grad_norm": 0.9411584678122592,
+      "learning_rate": 0.003,
+      "loss": 4.073,
+      "step": 4335
+    },
+    {
+      "epoch": 0.04336,
+      "grad_norm": 0.8380130707060495,
+      "learning_rate": 0.003,
+      "loss": 4.1043,
+      "step": 4336
+    },
+    {
+      "epoch": 0.04337,
+      "grad_norm": 0.8435104385458285,
+      "learning_rate": 0.003,
+      "loss": 4.0897,
+      "step": 4337
+    },
+    {
+      "epoch": 0.04338,
+      "grad_norm": 0.8283613873296221,
+      "learning_rate": 0.003,
+      "loss": 4.0901,
+      "step": 4338
+    },
+    {
+      "epoch": 0.04339,
+      "grad_norm": 0.7575807206093103,
+      "learning_rate": 0.003,
+      "loss": 4.0957,
+      "step": 4339
+    },
+    {
+      "epoch": 0.0434,
+      "grad_norm": 0.7052060232541129,
+      "learning_rate": 0.003,
+      "loss": 4.0745,
+      "step": 4340
+    },
+    {
+      "epoch": 0.04341,
+      "grad_norm": 0.7393447733605004,
+      "learning_rate": 0.003,
+      "loss": 4.0863,
+      "step": 4341
+    },
+    {
+      "epoch": 0.04342,
+      "grad_norm": 0.6868972479892425,
+      "learning_rate": 0.003,
+      "loss": 4.0719,
+      "step": 4342
+    },
+    {
+      "epoch": 0.04343,
+      "grad_norm": 0.642426794922686,
+      "learning_rate": 0.003,
+      "loss": 4.1075,
+      "step": 4343
+    },
+    {
+      "epoch": 0.04344,
+      "grad_norm": 0.6793615281465072,
+      "learning_rate": 0.003,
+      "loss": 4.0521,
+      "step": 4344
+    },
+    {
+      "epoch": 0.04345,
+      "grad_norm": 0.6547984850581937,
+      "learning_rate": 0.003,
+      "loss": 4.0924,
+      "step": 4345
+    },
+    {
+      "epoch": 0.04346,
+      "grad_norm": 0.7129305907572272,
+      "learning_rate": 0.003,
+      "loss": 4.0874,
+      "step": 4346
+    },
+    {
+      "epoch": 0.04347,
+      "grad_norm": 0.7392919981263668,
+      "learning_rate": 0.003,
+      "loss": 4.0892,
+      "step": 4347
+    },
+    {
+      "epoch": 0.04348,
+      "grad_norm": 0.7595005587543163,
+      "learning_rate": 0.003,
+      "loss": 4.0718,
+      "step": 4348
+    },
+    {
+      "epoch": 0.04349,
+      "grad_norm": 0.8185974571279835,
+      "learning_rate": 0.003,
+      "loss": 4.087,
+      "step": 4349
+    },
+    {
+      "epoch": 0.0435,
+      "grad_norm": 0.9514565408114718,
+      "learning_rate": 0.003,
+      "loss": 4.0754,
+      "step": 4350
+    },
+    {
+      "epoch": 0.04351,
+      "grad_norm": 1.2109347120622453,
+      "learning_rate": 0.003,
+      "loss": 4.0959,
+      "step": 4351
+    },
+    {
+      "epoch": 0.04352,
+      "grad_norm": 0.9471998868314153,
+      "learning_rate": 0.003,
+      "loss": 4.1051,
+      "step": 4352
+    },
+    {
+      "epoch": 0.04353,
+      "grad_norm": 0.9889455977002414,
+      "learning_rate": 0.003,
+      "loss": 4.0786,
+      "step": 4353
+    },
+    {
+      "epoch": 0.04354,
+      "grad_norm": 1.0731295960755523,
+      "learning_rate": 0.003,
+      "loss": 4.088,
+      "step": 4354
+    },
+    {
+      "epoch": 0.04355,
+      "grad_norm": 0.9572032046556027,
+      "learning_rate": 0.003,
+      "loss": 4.0916,
+      "step": 4355
+    },
+    {
+      "epoch": 0.04356,
+      "grad_norm": 0.9581656158071228,
+      "learning_rate": 0.003,
+      "loss": 4.0979,
+      "step": 4356
+    },
+    {
+      "epoch": 0.04357,
+      "grad_norm": 0.9195125428358254,
+      "learning_rate": 0.003,
+      "loss": 4.0862,
+      "step": 4357
+    },
+    {
+      "epoch": 0.04358,
+      "grad_norm": 0.9636453763618577,
+      "learning_rate": 0.003,
+      "loss": 4.0757,
+      "step": 4358
+    },
+    {
+      "epoch": 0.04359,
+      "grad_norm": 1.0660232103277558,
+      "learning_rate": 0.003,
+      "loss": 4.1052,
+      "step": 4359
+    },
+    {
+      "epoch": 0.0436,
+      "grad_norm": 0.8061306785792844,
+      "learning_rate": 0.003,
+      "loss": 4.0996,
+      "step": 4360
+    },
+    {
+      "epoch": 0.04361,
+      "grad_norm": 0.8093145893752948,
+      "learning_rate": 0.003,
+      "loss": 4.0629,
+      "step": 4361
+    },
+    {
+      "epoch": 0.04362,
+      "grad_norm": 0.8171945284904929,
+      "learning_rate": 0.003,
+      "loss": 4.0733,
+      "step": 4362
+    },
+    {
+      "epoch": 0.04363,
+      "grad_norm": 0.9868190592217813,
+      "learning_rate": 0.003,
+      "loss": 4.0814,
+      "step": 4363
+    },
+    {
+      "epoch": 0.04364,
+      "grad_norm": 1.0426300462439566,
+      "learning_rate": 0.003,
+      "loss": 4.0994,
+      "step": 4364
+    },
+    {
+      "epoch": 0.04365,
+      "grad_norm": 0.8588093044292321,
+      "learning_rate": 0.003,
+      "loss": 4.0957,
+      "step": 4365
+    },
+    {
+      "epoch": 0.04366,
+      "grad_norm": 0.8909026739864028,
+      "learning_rate": 0.003,
+      "loss": 4.0533,
+      "step": 4366
+    },
+    {
+      "epoch": 0.04367,
+      "grad_norm": 0.873925255651534,
+      "learning_rate": 0.003,
+      "loss": 4.1008,
+      "step": 4367
+    },
+    {
+      "epoch": 0.04368,
+      "grad_norm": 0.779588637997916,
+      "learning_rate": 0.003,
+      "loss": 4.0846,
+      "step": 4368
+    },
+    {
+      "epoch": 0.04369,
+      "grad_norm": 0.7966324124265414,
+      "learning_rate": 0.003,
+      "loss": 4.0772,
+      "step": 4369
+    },
+    {
+      "epoch": 0.0437,
+      "grad_norm": 0.669780598735826,
+      "learning_rate": 0.003,
+      "loss": 4.1015,
+      "step": 4370
+    },
+    {
+      "epoch": 0.04371,
+      "grad_norm": 0.7760483978802495,
+      "learning_rate": 0.003,
+      "loss": 4.0884,
+      "step": 4371
+    },
+    {
+      "epoch": 0.04372,
+      "grad_norm": 0.9716643560234968,
+      "learning_rate": 0.003,
+      "loss": 4.0775,
+      "step": 4372
+    },
+    {
+      "epoch": 0.04373,
+      "grad_norm": 1.2693018325935452,
+      "learning_rate": 0.003,
+      "loss": 4.1111,
+      "step": 4373
+    },
+    {
+      "epoch": 0.04374,
+      "grad_norm": 1.0224726158531943,
+      "learning_rate": 0.003,
+      "loss": 4.1489,
+      "step": 4374
+    },
+    {
+      "epoch": 0.04375,
+      "grad_norm": 0.8263535068880948,
+      "learning_rate": 0.003,
+      "loss": 4.0555,
+      "step": 4375
+    },
+    {
+      "epoch": 0.04376,
+      "grad_norm": 0.7522103171040029,
+      "learning_rate": 0.003,
+      "loss": 4.0945,
+      "step": 4376
+    },
+    {
+      "epoch": 0.04377,
+      "grad_norm": 0.8208201264915056,
+      "learning_rate": 0.003,
+      "loss": 4.1211,
+      "step": 4377
+    },
+    {
+      "epoch": 0.04378,
+      "grad_norm": 1.005702386332168,
+      "learning_rate": 0.003,
+      "loss": 4.109,
+      "step": 4378
+    },
+    {
+      "epoch": 0.04379,
+      "grad_norm": 1.1786773838771147,
+      "learning_rate": 0.003,
+      "loss": 4.1324,
+      "step": 4379
+    },
+    {
+      "epoch": 0.0438,
+      "grad_norm": 0.7871810906501763,
+      "learning_rate": 0.003,
+      "loss": 4.092,
+      "step": 4380
+    },
+    {
+      "epoch": 0.04381,
+      "grad_norm": 0.7577866085120181,
+      "learning_rate": 0.003,
+      "loss": 4.0802,
+      "step": 4381
+    },
+    {
+      "epoch": 0.04382,
+      "grad_norm": 0.830788547447954,
+      "learning_rate": 0.003,
+      "loss": 4.096,
+      "step": 4382
+    },
+    {
+      "epoch": 0.04383,
+      "grad_norm": 0.9439340501432005,
+      "learning_rate": 0.003,
+      "loss": 4.076,
+      "step": 4383
+    },
+    {
+      "epoch": 0.04384,
+      "grad_norm": 0.9569811292204884,
+      "learning_rate": 0.003,
+      "loss": 4.0812,
+      "step": 4384
+    },
+    {
+      "epoch": 0.04385,
+      "grad_norm": 0.9621273589739346,
+      "learning_rate": 0.003,
+      "loss": 4.1186,
+      "step": 4385
+    },
+    {
+      "epoch": 0.04386,
+      "grad_norm": 1.0027765639035047,
+      "learning_rate": 0.003,
+      "loss": 4.0978,
+      "step": 4386
+    },
+    {
+      "epoch": 0.04387,
+      "grad_norm": 0.8877446565419372,
+      "learning_rate": 0.003,
+      "loss": 4.1159,
+      "step": 4387
+    },
+    {
+      "epoch": 0.04388,
+      "grad_norm": 1.004365074328248,
+      "learning_rate": 0.003,
+      "loss": 4.0785,
+      "step": 4388
+    },
+    {
+      "epoch": 0.04389,
+      "grad_norm": 1.167012365586639,
+      "learning_rate": 0.003,
+      "loss": 4.1193,
+      "step": 4389
+    },
+    {
+      "epoch": 0.0439,
+      "grad_norm": 1.126759575747849,
+      "learning_rate": 0.003,
+      "loss": 4.1115,
+      "step": 4390
+    },
+    {
+      "epoch": 0.04391,
+      "grad_norm": 0.8766190371623821,
+      "learning_rate": 0.003,
+      "loss": 4.0806,
+      "step": 4391
+    },
+    {
+      "epoch": 0.04392,
+      "grad_norm": 0.9422353250001032,
+      "learning_rate": 0.003,
+      "loss": 4.0901,
+      "step": 4392
+    },
+    {
+      "epoch": 0.04393,
+      "grad_norm": 0.9388161319056185,
+      "learning_rate": 0.003,
+      "loss": 4.0779,
+      "step": 4393
+    },
+    {
+      "epoch": 0.04394,
+      "grad_norm": 0.9030510084400544,
+      "learning_rate": 0.003,
+      "loss": 4.0976,
+      "step": 4394
+    },
+    {
+      "epoch": 0.04395,
+      "grad_norm": 0.8932781141162982,
+      "learning_rate": 0.003,
+      "loss": 4.0795,
+      "step": 4395
+    },
+    {
+      "epoch": 0.04396,
+      "grad_norm": 0.8796817611377403,
+      "learning_rate": 0.003,
+      "loss": 4.0526,
+      "step": 4396
+    },
+    {
+      "epoch": 0.04397,
+      "grad_norm": 0.844265233977852,
+      "learning_rate": 0.003,
+      "loss": 4.0674,
+      "step": 4397
+    },
+    {
+      "epoch": 0.04398,
+      "grad_norm": 0.9152454694688225,
+      "learning_rate": 0.003,
+      "loss": 4.0846,
+      "step": 4398
+    },
+    {
+      "epoch": 0.04399,
+      "grad_norm": 0.9294951448787108,
+      "learning_rate": 0.003,
+      "loss": 4.0919,
+      "step": 4399
+    },
+    {
+      "epoch": 0.044,
+      "grad_norm": 1.0106829137077165,
+      "learning_rate": 0.003,
+      "loss": 4.0903,
+      "step": 4400
+    },
+    {
+      "epoch": 0.04401,
+      "grad_norm": 0.9222214369606679,
+      "learning_rate": 0.003,
+      "loss": 4.1049,
+      "step": 4401
+    },
+    {
+      "epoch": 0.04402,
+      "grad_norm": 0.8861574452686165,
+      "learning_rate": 0.003,
+      "loss": 4.1072,
+      "step": 4402
+    },
+    {
+      "epoch": 0.04403,
+      "grad_norm": 0.9244720061111934,
+      "learning_rate": 0.003,
+      "loss": 4.0741,
+      "step": 4403
+    },
+    {
+      "epoch": 0.04404,
+      "grad_norm": 1.0163229703427172,
+      "learning_rate": 0.003,
+      "loss": 4.1046,
+      "step": 4404
+    },
+    {
+      "epoch": 0.04405,
+      "grad_norm": 1.112663606677766,
+      "learning_rate": 0.003,
+      "loss": 4.103,
+      "step": 4405
+    },
+    {
+      "epoch": 0.04406,
+      "grad_norm": 0.7836665936004462,
+      "learning_rate": 0.003,
+      "loss": 4.0944,
+      "step": 4406
+    },
+    {
+      "epoch": 0.04407,
+      "grad_norm": 0.6834208693181686,
+      "learning_rate": 0.003,
+      "loss": 4.0614,
+      "step": 4407
+    },
+    {
+      "epoch": 0.04408,
+      "grad_norm": 0.7215319828238491,
+      "learning_rate": 0.003,
+      "loss": 4.0692,
+      "step": 4408
+    },
+    {
+      "epoch": 0.04409,
+      "grad_norm": 0.7547734730978946,
+      "learning_rate": 0.003,
+      "loss": 4.0991,
+      "step": 4409
+    },
+    {
+      "epoch": 0.0441,
+      "grad_norm": 0.9855505388299907,
+      "learning_rate": 0.003,
+      "loss": 4.0924,
+      "step": 4410
+    },
+    {
+      "epoch": 0.04411,
+      "grad_norm": 1.3745879066451838,
+      "learning_rate": 0.003,
+      "loss": 4.0889,
+      "step": 4411
+    },
+    {
+      "epoch": 0.04412,
+      "grad_norm": 0.6697260575539041,
+      "learning_rate": 0.003,
+      "loss": 4.0609,
+      "step": 4412
+    },
+    {
+      "epoch": 0.04413,
+      "grad_norm": 0.7289649196117617,
+      "learning_rate": 0.003,
+      "loss": 4.0687,
+      "step": 4413
+    },
+    {
+      "epoch": 0.04414,
+      "grad_norm": 0.8710542961593792,
+      "learning_rate": 0.003,
+      "loss": 4.121,
+      "step": 4414
+    },
+    {
+      "epoch": 0.04415,
+      "grad_norm": 1.006707768649903,
+      "learning_rate": 0.003,
+      "loss": 4.089,
+      "step": 4415
+    },
+    {
+      "epoch": 0.04416,
+      "grad_norm": 0.9528175763218202,
+      "learning_rate": 0.003,
+      "loss": 4.0762,
+      "step": 4416
+    },
+    {
+      "epoch": 0.04417,
+      "grad_norm": 0.8424334779705969,
+      "learning_rate": 0.003,
+      "loss": 4.0503,
+      "step": 4417
+    },
+    {
+      "epoch": 0.04418,
+      "grad_norm": 0.97351224925848,
+      "learning_rate": 0.003,
+      "loss": 4.0966,
+      "step": 4418
+    },
+    {
+      "epoch": 0.04419,
+      "grad_norm": 1.0808697355237324,
+      "learning_rate": 0.003,
+      "loss": 4.1194,
+      "step": 4419
+    },
+    {
+      "epoch": 0.0442,
+      "grad_norm": 1.167054123021739,
+      "learning_rate": 0.003,
+      "loss": 4.0947,
+      "step": 4420
+    },
+    {
+      "epoch": 0.04421,
+      "grad_norm": 0.8176085170190497,
+      "learning_rate": 0.003,
+      "loss": 4.1169,
+      "step": 4421
+    },
+    {
+      "epoch": 0.04422,
+      "grad_norm": 0.7560953731068145,
+      "learning_rate": 0.003,
+      "loss": 4.0803,
+      "step": 4422
+    },
+    {
+      "epoch": 0.04423,
+      "grad_norm": 0.8430900886347851,
+      "learning_rate": 0.003,
+      "loss": 4.0997,
+      "step": 4423
+    },
+    {
+      "epoch": 0.04424,
+      "grad_norm": 0.7698126783686144,
+      "learning_rate": 0.003,
+      "loss": 4.0479,
+      "step": 4424
+    },
+    {
+      "epoch": 0.04425,
+      "grad_norm": 0.7635205731836768,
+      "learning_rate": 0.003,
+      "loss": 4.0713,
+      "step": 4425
+    },
+    {
+      "epoch": 0.04426,
+      "grad_norm": 0.7393576832666733,
+      "learning_rate": 0.003,
+      "loss": 4.0931,
+      "step": 4426
+    },
+    {
+      "epoch": 0.04427,
+      "grad_norm": 0.7066164659139067,
+      "learning_rate": 0.003,
+      "loss": 4.0786,
+      "step": 4427
+    },
+    {
+      "epoch": 0.04428,
+      "grad_norm": 0.584092377564762,
+      "learning_rate": 0.003,
+      "loss": 4.0711,
+      "step": 4428
+    },
+    {
+      "epoch": 0.04429,
+      "grad_norm": 0.5955745561562374,
+      "learning_rate": 0.003,
+      "loss": 4.1113,
+      "step": 4429
+    },
+    {
+      "epoch": 0.0443,
+      "grad_norm": 0.7642349609553867,
+      "learning_rate": 0.003,
+      "loss": 4.0711,
+      "step": 4430
+    },
+    {
+      "epoch": 0.04431,
+      "grad_norm": 0.9897440622578424,
+      "learning_rate": 0.003,
+      "loss": 4.0996,
+      "step": 4431
+    },
+    {
+      "epoch": 0.04432,
+      "grad_norm": 1.3796673035154565,
+      "learning_rate": 0.003,
+      "loss": 4.1162,
+      "step": 4432
+    },
+    {
+      "epoch": 0.04433,
+      "grad_norm": 0.7338783584401088,
+      "learning_rate": 0.003,
+      "loss": 4.079,
+      "step": 4433
+    },
+    {
+      "epoch": 0.04434,
+      "grad_norm": 0.9478779466631931,
+      "learning_rate": 0.003,
+      "loss": 4.0833,
+      "step": 4434
+    },
+    {
+      "epoch": 0.04435,
+      "grad_norm": 1.0373098684540567,
+      "learning_rate": 0.003,
+      "loss": 4.1009,
+      "step": 4435
+    },
+    {
+      "epoch": 0.04436,
+      "grad_norm": 0.9002494417955366,
+      "learning_rate": 0.003,
+      "loss": 4.0695,
+      "step": 4436
+    },
+    {
+      "epoch": 0.04437,
+      "grad_norm": 0.9423712815921459,
+      "learning_rate": 0.003,
+      "loss": 4.0946,
+      "step": 4437
+    },
+    {
+      "epoch": 0.04438,
+      "grad_norm": 0.9881181207836567,
+      "learning_rate": 0.003,
+      "loss": 4.1344,
+      "step": 4438
+    },
+    {
+      "epoch": 0.04439,
+      "grad_norm": 1.140055930440485,
+      "learning_rate": 0.003,
+      "loss": 4.1062,
+      "step": 4439
+    },
+    {
+      "epoch": 0.0444,
+      "grad_norm": 0.8857720718409576,
+      "learning_rate": 0.003,
+      "loss": 4.0592,
+      "step": 4440
+    },
+    {
+      "epoch": 0.04441,
+      "grad_norm": 0.9085661359962524,
+      "learning_rate": 0.003,
+      "loss": 4.0916,
+      "step": 4441
+    },
+    {
+      "epoch": 0.04442,
+      "grad_norm": 0.8204868786177558,
+      "learning_rate": 0.003,
+      "loss": 4.0858,
+      "step": 4442
+    },
+    {
+      "epoch": 0.04443,
+      "grad_norm": 0.6457268453463154,
+      "learning_rate": 0.003,
+      "loss": 4.0546,
+      "step": 4443
+    },
+    {
+      "epoch": 0.04444,
+      "grad_norm": 0.6492396831004539,
+      "learning_rate": 0.003,
+      "loss": 4.0556,
+      "step": 4444
+    },
+    {
+      "epoch": 0.04445,
+      "grad_norm": 0.6928510619497573,
+      "learning_rate": 0.003,
+      "loss": 4.0868,
+      "step": 4445
+    },
+    {
+      "epoch": 0.04446,
+      "grad_norm": 0.851264848376963,
+      "learning_rate": 0.003,
+      "loss": 4.0572,
+      "step": 4446
+    },
+    {
+      "epoch": 0.04447,
+      "grad_norm": 0.9403414167914219,
+      "learning_rate": 0.003,
+      "loss": 4.0663,
+      "step": 4447
+    },
+    {
+      "epoch": 0.04448,
+      "grad_norm": 1.0475208155252718,
+      "learning_rate": 0.003,
+      "loss": 4.0656,
+      "step": 4448
+    },
+    {
+      "epoch": 0.04449,
+      "grad_norm": 0.8839010679595062,
+      "learning_rate": 0.003,
+      "loss": 4.0779,
+      "step": 4449
+    },
+    {
+      "epoch": 0.0445,
+      "grad_norm": 0.9663307778578972,
+      "learning_rate": 0.003,
+      "loss": 4.0855,
+      "step": 4450
+    },
+    {
+      "epoch": 0.04451,
+      "grad_norm": 1.0371788520078717,
+      "learning_rate": 0.003,
+      "loss": 4.0875,
+      "step": 4451
+    },
+    {
+      "epoch": 0.04452,
+      "grad_norm": 1.2078981755286253,
+      "learning_rate": 0.003,
+      "loss": 4.1113,
+      "step": 4452
+    },
+    {
+      "epoch": 0.04453,
+      "grad_norm": 0.8296222855961709,
+      "learning_rate": 0.003,
+      "loss": 4.1097,
+      "step": 4453
+    },
+    {
+      "epoch": 0.04454,
+      "grad_norm": 0.8721163575738796,
+      "learning_rate": 0.003,
+      "loss": 4.0902,
+      "step": 4454
+    },
+    {
+      "epoch": 0.04455,
+      "grad_norm": 0.9017669849588411,
+      "learning_rate": 0.003,
+      "loss": 4.1017,
+      "step": 4455
+    },
+    {
+      "epoch": 0.04456,
+      "grad_norm": 1.0543326711835521,
+      "learning_rate": 0.003,
+      "loss": 4.0725,
+      "step": 4456
+    },
+    {
+      "epoch": 0.04457,
+      "grad_norm": 1.1758149593074942,
+      "learning_rate": 0.003,
+      "loss": 4.0972,
+      "step": 4457
+    },
+    {
+      "epoch": 0.04458,
+      "grad_norm": 0.795675945576436,
+      "learning_rate": 0.003,
+      "loss": 4.1065,
+      "step": 4458
+    },
+    {
+      "epoch": 0.04459,
+      "grad_norm": 0.8944676165743426,
+      "learning_rate": 0.003,
+      "loss": 4.1156,
+      "step": 4459
+    },
+    {
+      "epoch": 0.0446,
+      "grad_norm": 1.0309911114547456,
+      "learning_rate": 0.003,
+      "loss": 4.1151,
+      "step": 4460
+    },
+    {
+      "epoch": 0.04461,
+      "grad_norm": 1.1633164535503087,
+      "learning_rate": 0.003,
+      "loss": 4.1207,
+      "step": 4461
+    },
+    {
+      "epoch": 0.04462,
+      "grad_norm": 1.167208135838096,
+      "learning_rate": 0.003,
+      "loss": 4.0744,
+      "step": 4462
+    },
+    {
+      "epoch": 0.04463,
+      "grad_norm": 1.183108803762208,
+      "learning_rate": 0.003,
+      "loss": 4.0919,
+      "step": 4463
+    },
+    {
+      "epoch": 0.04464,
+      "grad_norm": 0.7854170437651848,
+      "learning_rate": 0.003,
+      "loss": 4.0985,
+      "step": 4464
+    },
+    {
+      "epoch": 0.04465,
+      "grad_norm": 0.8018127380897019,
+      "learning_rate": 0.003,
+      "loss": 4.0828,
+      "step": 4465
+    },
+    {
+      "epoch": 0.04466,
+      "grad_norm": 0.7724293795857595,
+      "learning_rate": 0.003,
+      "loss": 4.0933,
+      "step": 4466
+    },
+    {
+      "epoch": 0.04467,
+      "grad_norm": 0.8927290074378922,
+      "learning_rate": 0.003,
+      "loss": 4.089,
+      "step": 4467
+    },
+    {
+      "epoch": 0.04468,
+      "grad_norm": 0.9529441187417813,
+      "learning_rate": 0.003,
+      "loss": 4.0693,
+      "step": 4468
+    },
+    {
+      "epoch": 0.04469,
+      "grad_norm": 0.7635512052136587,
+      "learning_rate": 0.003,
+      "loss": 4.0938,
+      "step": 4469
+    },
+    {
+      "epoch": 0.0447,
+      "grad_norm": 0.7111288881650668,
+      "learning_rate": 0.003,
+      "loss": 4.0964,
+      "step": 4470
+    },
+    {
+      "epoch": 0.04471,
+      "grad_norm": 0.6842975948143225,
+      "learning_rate": 0.003,
+      "loss": 4.1143,
+      "step": 4471
+    },
+    {
+      "epoch": 0.04472,
+      "grad_norm": 0.6721731379887363,
+      "learning_rate": 0.003,
+      "loss": 4.064,
+      "step": 4472
+    },
+    {
+      "epoch": 0.04473,
+      "grad_norm": 0.8269116687874477,
+      "learning_rate": 0.003,
+      "loss": 4.069,
+      "step": 4473
+    },
+    {
+      "epoch": 0.04474,
+      "grad_norm": 0.9372410627748204,
+      "learning_rate": 0.003,
+      "loss": 4.07,
+      "step": 4474
+    },
+    {
+      "epoch": 0.04475,
+      "grad_norm": 1.1298342557384584,
+      "learning_rate": 0.003,
+      "loss": 4.0664,
+      "step": 4475
+    },
+    {
+      "epoch": 0.04476,
+      "grad_norm": 0.8988046555647741,
+      "learning_rate": 0.003,
+      "loss": 4.0742,
+      "step": 4476
+    },
+    {
+      "epoch": 0.04477,
+      "grad_norm": 0.795000264212953,
+      "learning_rate": 0.003,
+      "loss": 4.0865,
+      "step": 4477
+    },
+    {
+      "epoch": 0.04478,
+      "grad_norm": 0.8406418984097118,
+      "learning_rate": 0.003,
+      "loss": 4.0847,
+      "step": 4478
+    },
+    {
+      "epoch": 0.04479,
+      "grad_norm": 0.9513259886842036,
+      "learning_rate": 0.003,
+      "loss": 4.067,
+      "step": 4479
+    },
+    {
+      "epoch": 0.0448,
+      "grad_norm": 1.0486642112843845,
+      "learning_rate": 0.003,
+      "loss": 4.0806,
+      "step": 4480
+    },
+    {
+      "epoch": 0.04481,
+      "grad_norm": 1.0910923440009397,
+      "learning_rate": 0.003,
+      "loss": 4.0771,
+      "step": 4481
+    },
+    {
+      "epoch": 0.04482,
+      "grad_norm": 0.9923003720594974,
+      "learning_rate": 0.003,
+      "loss": 4.102,
+      "step": 4482
+    },
+    {
+      "epoch": 0.04483,
+      "grad_norm": 0.9838181891048627,
+      "learning_rate": 0.003,
+      "loss": 4.0915,
+      "step": 4483
+    },
+    {
+      "epoch": 0.04484,
+      "grad_norm": 0.8441164141274783,
+      "learning_rate": 0.003,
+      "loss": 4.0995,
+      "step": 4484
+    },
+    {
+      "epoch": 0.04485,
+      "grad_norm": 0.8006899110895409,
+      "learning_rate": 0.003,
+      "loss": 4.1063,
+      "step": 4485
+    },
+    {
+      "epoch": 0.04486,
+      "grad_norm": 0.8707281199253866,
+      "learning_rate": 0.003,
+      "loss": 4.0701,
+      "step": 4486
+    },
+    {
+      "epoch": 0.04487,
+      "grad_norm": 0.8680647327916373,
+      "learning_rate": 0.003,
+      "loss": 4.0813,
+      "step": 4487
+    },
+    {
+      "epoch": 0.04488,
+      "grad_norm": 0.8736811662681273,
+      "learning_rate": 0.003,
+      "loss": 4.1028,
+      "step": 4488
+    },
+    {
+      "epoch": 0.04489,
+      "grad_norm": 0.8803676696260273,
+      "learning_rate": 0.003,
+      "loss": 4.1047,
+      "step": 4489
+    },
+    {
+      "epoch": 0.0449,
+      "grad_norm": 0.9871186527446516,
+      "learning_rate": 0.003,
+      "loss": 4.1047,
+      "step": 4490
+    },
+    {
+      "epoch": 0.04491,
+      "grad_norm": 1.0838956165086664,
+      "learning_rate": 0.003,
+      "loss": 4.0947,
+      "step": 4491
+    },
+    {
+      "epoch": 0.04492,
+      "grad_norm": 1.0246550076014433,
+      "learning_rate": 0.003,
+      "loss": 4.1021,
+      "step": 4492
+    },
+    {
+      "epoch": 0.04493,
+      "grad_norm": 1.119573546351604,
+      "learning_rate": 0.003,
+      "loss": 4.1161,
+      "step": 4493
+    },
+    {
+      "epoch": 0.04494,
+      "grad_norm": 0.9017080906502098,
+      "learning_rate": 0.003,
+      "loss": 4.0891,
+      "step": 4494
+    },
+    {
+      "epoch": 0.04495,
+      "grad_norm": 0.8048737469068201,
+      "learning_rate": 0.003,
+      "loss": 4.0853,
+      "step": 4495
+    },
+    {
+      "epoch": 0.04496,
+      "grad_norm": 0.8207659865646967,
+      "learning_rate": 0.003,
+      "loss": 4.1246,
+      "step": 4496
+    },
+    {
+      "epoch": 0.04497,
+      "grad_norm": 1.1245269296552654,
+      "learning_rate": 0.003,
+      "loss": 4.1121,
+      "step": 4497
+    },
+    {
+      "epoch": 0.04498,
+      "grad_norm": 1.0002119651690387,
+      "learning_rate": 0.003,
+      "loss": 4.0823,
+      "step": 4498
+    },
+    {
+      "epoch": 0.04499,
+      "grad_norm": 0.9808448366317947,
+      "learning_rate": 0.003,
+      "loss": 4.0531,
+      "step": 4499
+    },
+    {
+      "epoch": 0.045,
+      "grad_norm": 1.132093441653165,
+      "learning_rate": 0.003,
+      "loss": 4.0864,
+      "step": 4500
+    },
+    {
+      "epoch": 0.04501,
+      "grad_norm": 0.8718373099791662,
+      "learning_rate": 0.003,
+      "loss": 4.0759,
+      "step": 4501
+    },
+    {
+      "epoch": 0.04502,
+      "grad_norm": 0.9228626903708667,
+      "learning_rate": 0.003,
+      "loss": 4.116,
+      "step": 4502
+    },
+    {
+      "epoch": 0.04503,
+      "grad_norm": 0.8263232740111328,
+      "learning_rate": 0.003,
+      "loss": 4.1153,
+      "step": 4503
+    },
+    {
+      "epoch": 0.04504,
+      "grad_norm": 0.7734875104374885,
+      "learning_rate": 0.003,
+      "loss": 4.1093,
+      "step": 4504
+    },
+    {
+      "epoch": 0.04505,
+      "grad_norm": 0.7885987258017754,
+      "learning_rate": 0.003,
+      "loss": 4.1044,
+      "step": 4505
+    },
+    {
+      "epoch": 0.04506,
+      "grad_norm": 0.9407227338991642,
+      "learning_rate": 0.003,
+      "loss": 4.0866,
+      "step": 4506
+    },
+    {
+      "epoch": 0.04507,
+      "grad_norm": 1.0832398695950372,
+      "learning_rate": 0.003,
+      "loss": 4.067,
+      "step": 4507
+    },
+    {
+      "epoch": 0.04508,
+      "grad_norm": 0.9418456435744189,
+      "learning_rate": 0.003,
+      "loss": 4.0834,
+      "step": 4508
+    },
+    {
+      "epoch": 0.04509,
+      "grad_norm": 0.9175750111895549,
+      "learning_rate": 0.003,
+      "loss": 4.1085,
+      "step": 4509
+    },
+    {
+      "epoch": 0.0451,
+      "grad_norm": 0.9398070401461067,
+      "learning_rate": 0.003,
+      "loss": 4.1109,
+      "step": 4510
+    },
+    {
+      "epoch": 0.04511,
+      "grad_norm": 0.9217534503191119,
+      "learning_rate": 0.003,
+      "loss": 4.0934,
+      "step": 4511
+    },
+    {
+      "epoch": 0.04512,
+      "grad_norm": 1.0427993141187701,
+      "learning_rate": 0.003,
+      "loss": 4.0519,
+      "step": 4512
+    },
+    {
+      "epoch": 0.04513,
+      "grad_norm": 0.8428325423508638,
+      "learning_rate": 0.003,
+      "loss": 4.0786,
+      "step": 4513
+    },
+    {
+      "epoch": 0.04514,
+      "grad_norm": 1.0089526014399501,
+      "learning_rate": 0.003,
+      "loss": 4.0941,
+      "step": 4514
+    },
+    {
+      "epoch": 0.04515,
+      "grad_norm": 0.9789233122430068,
+      "learning_rate": 0.003,
+      "loss": 4.0893,
+      "step": 4515
+    },
+    {
+      "epoch": 0.04516,
+      "grad_norm": 0.9091489043065707,
+      "learning_rate": 0.003,
+      "loss": 4.0965,
+      "step": 4516
+    },
+    {
+      "epoch": 0.04517,
+      "grad_norm": 0.9751493943869993,
+      "learning_rate": 0.003,
+      "loss": 4.1139,
+      "step": 4517
+    },
+    {
+      "epoch": 0.04518,
+      "grad_norm": 0.8917249628104398,
+      "learning_rate": 0.003,
+      "loss": 4.0877,
+      "step": 4518
+    },
+    {
+      "epoch": 0.04519,
+      "grad_norm": 1.004402772465955,
+      "learning_rate": 0.003,
+      "loss": 4.0835,
+      "step": 4519
+    },
+    {
+      "epoch": 0.0452,
+      "grad_norm": 1.2586466593520984,
+      "learning_rate": 0.003,
+      "loss": 4.0953,
+      "step": 4520
+    },
+    {
+      "epoch": 0.04521,
+      "grad_norm": 0.9330729244910515,
+      "learning_rate": 0.003,
+      "loss": 4.0815,
+      "step": 4521
+    },
+    {
+      "epoch": 0.04522,
+      "grad_norm": 0.8641019927023269,
+      "learning_rate": 0.003,
+      "loss": 4.0767,
+      "step": 4522
+    },
+    {
+      "epoch": 0.04523,
+      "grad_norm": 0.9140037813457487,
+      "learning_rate": 0.003,
+      "loss": 4.0813,
+      "step": 4523
+    },
+    {
+      "epoch": 0.04524,
+      "grad_norm": 0.8919093436216252,
+      "learning_rate": 0.003,
+      "loss": 4.0586,
+      "step": 4524
+    },
+    {
+      "epoch": 0.04525,
+      "grad_norm": 1.0121384333442254,
+      "learning_rate": 0.003,
+      "loss": 4.0716,
+      "step": 4525
+    },
+    {
+      "epoch": 0.04526,
+      "grad_norm": 1.1210191689178208,
+      "learning_rate": 0.003,
+      "loss": 4.1035,
+      "step": 4526
+    },
+    {
+      "epoch": 0.04527,
+      "grad_norm": 0.682641155436752,
+      "learning_rate": 0.003,
+      "loss": 4.0755,
+      "step": 4527
+    },
+    {
+      "epoch": 0.04528,
+      "grad_norm": 0.6585540834263701,
+      "learning_rate": 0.003,
+      "loss": 4.0532,
+      "step": 4528
+    },
+    {
+      "epoch": 0.04529,
+      "grad_norm": 0.7678794616279656,
+      "learning_rate": 0.003,
+      "loss": 4.061,
+      "step": 4529
+    },
+    {
+      "epoch": 0.0453,
+      "grad_norm": 0.8419648665385003,
+      "learning_rate": 0.003,
+      "loss": 4.0545,
+      "step": 4530
+    },
+    {
+      "epoch": 0.04531,
+      "grad_norm": 0.9205297479236283,
+      "learning_rate": 0.003,
+      "loss": 4.0614,
+      "step": 4531
+    },
+    {
+      "epoch": 0.04532,
+      "grad_norm": 0.8853526961425466,
+      "learning_rate": 0.003,
+      "loss": 4.088,
+      "step": 4532
+    },
+    {
+      "epoch": 0.04533,
+      "grad_norm": 0.8992375264745746,
+      "learning_rate": 0.003,
+      "loss": 4.0517,
+      "step": 4533
+    },
+    {
+      "epoch": 0.04534,
+      "grad_norm": 0.9811346625863182,
+      "learning_rate": 0.003,
+      "loss": 4.0986,
+      "step": 4534
+    },
+    {
+      "epoch": 0.04535,
+      "grad_norm": 1.037436568279496,
+      "learning_rate": 0.003,
+      "loss": 4.1191,
+      "step": 4535
+    },
+    {
+      "epoch": 0.04536,
+      "grad_norm": 0.8288199794219244,
+      "learning_rate": 0.003,
+      "loss": 4.1116,
+      "step": 4536
+    },
+    {
+      "epoch": 0.04537,
+      "grad_norm": 0.8088616928239108,
+      "learning_rate": 0.003,
+      "loss": 4.0889,
+      "step": 4537
+    },
+    {
+      "epoch": 0.04538,
+      "grad_norm": 0.7318781047913877,
+      "learning_rate": 0.003,
+      "loss": 4.0888,
+      "step": 4538
+    },
+    {
+      "epoch": 0.04539,
+      "grad_norm": 0.7179435097962413,
+      "learning_rate": 0.003,
+      "loss": 4.0502,
+      "step": 4539
+    },
+    {
+      "epoch": 0.0454,
+      "grad_norm": 0.8058129172445117,
+      "learning_rate": 0.003,
+      "loss": 4.0624,
+      "step": 4540
+    },
+    {
+      "epoch": 0.04541,
+      "grad_norm": 0.9730325188123338,
+      "learning_rate": 0.003,
+      "loss": 4.1112,
+      "step": 4541
+    },
+    {
+      "epoch": 0.04542,
+      "grad_norm": 1.1629088900069842,
+      "learning_rate": 0.003,
+      "loss": 4.0661,
+      "step": 4542
+    },
+    {
+      "epoch": 0.04543,
+      "grad_norm": 1.0223952095742048,
+      "learning_rate": 0.003,
+      "loss": 4.0942,
+      "step": 4543
+    },
+    {
+      "epoch": 0.04544,
+      "grad_norm": 0.9007892884805351,
+      "learning_rate": 0.003,
+      "loss": 4.0719,
+      "step": 4544
+    },
+    {
+      "epoch": 0.04545,
+      "grad_norm": 0.8810595549441049,
+      "learning_rate": 0.003,
+      "loss": 4.0834,
+      "step": 4545
+    },
+    {
+      "epoch": 0.04546,
+      "grad_norm": 1.0706790887149014,
+      "learning_rate": 0.003,
+      "loss": 4.0718,
+      "step": 4546
+    },
+    {
+      "epoch": 0.04547,
+      "grad_norm": 1.0783793172148153,
+      "learning_rate": 0.003,
+      "loss": 4.0792,
+      "step": 4547
+    },
+    {
+      "epoch": 0.04548,
+      "grad_norm": 0.8754970851766884,
+      "learning_rate": 0.003,
+      "loss": 4.0879,
+      "step": 4548
+    },
+    {
+      "epoch": 0.04549,
+      "grad_norm": 1.006047833218977,
+      "learning_rate": 0.003,
+      "loss": 4.0827,
+      "step": 4549
+    },
+    {
+      "epoch": 0.0455,
+      "grad_norm": 1.0770476162131184,
+      "learning_rate": 0.003,
+      "loss": 4.1161,
+      "step": 4550
+    },
+    {
+      "epoch": 0.04551,
+      "grad_norm": 0.9601215888010274,
+      "learning_rate": 0.003,
+      "loss": 4.1242,
+      "step": 4551
+    },
+    {
+      "epoch": 0.04552,
+      "grad_norm": 0.9774413017455548,
+      "learning_rate": 0.003,
+      "loss": 4.111,
+      "step": 4552
+    },
+    {
+      "epoch": 0.04553,
+      "grad_norm": 1.0213364834049192,
+      "learning_rate": 0.003,
+      "loss": 4.0959,
+      "step": 4553
+    },
+    {
+      "epoch": 0.04554,
+      "grad_norm": 0.9568143904158197,
+      "learning_rate": 0.003,
+      "loss": 4.0819,
+      "step": 4554
+    },
+    {
+      "epoch": 0.04555,
+      "grad_norm": 0.7973060747821642,
+      "learning_rate": 0.003,
+      "loss": 4.0881,
+      "step": 4555
+    },
+    {
+      "epoch": 0.04556,
+      "grad_norm": 0.9299995488489511,
+      "learning_rate": 0.003,
+      "loss": 4.0943,
+      "step": 4556
+    },
+    {
+      "epoch": 0.04557,
+      "grad_norm": 1.1293974930486246,
+      "learning_rate": 0.003,
+      "loss": 4.0845,
+      "step": 4557
+    },
+    {
+      "epoch": 0.04558,
+      "grad_norm": 1.026675541394777,
+      "learning_rate": 0.003,
+      "loss": 4.1108,
+      "step": 4558
+    },
+    {
+      "epoch": 0.04559,
+      "grad_norm": 1.0345981426986643,
+      "learning_rate": 0.003,
+      "loss": 4.1249,
+      "step": 4559
+    },
+    {
+      "epoch": 0.0456,
+      "grad_norm": 1.1305484499753016,
+      "learning_rate": 0.003,
+      "loss": 4.0854,
+      "step": 4560
+    },
+    {
+      "epoch": 0.04561,
+      "grad_norm": 0.925988269592052,
+      "learning_rate": 0.003,
+      "loss": 4.0913,
+      "step": 4561
+    },
+    {
+      "epoch": 0.04562,
+      "grad_norm": 0.8510922703082149,
+      "learning_rate": 0.003,
+      "loss": 4.1115,
+      "step": 4562
+    },
+    {
+      "epoch": 0.04563,
+      "grad_norm": 0.9226408925609122,
+      "learning_rate": 0.003,
+      "loss": 4.0786,
+      "step": 4563
+    },
+    {
+      "epoch": 0.04564,
+      "grad_norm": 1.0130794105331695,
+      "learning_rate": 0.003,
+      "loss": 4.0952,
+      "step": 4564
+    },
+    {
+      "epoch": 0.04565,
+      "grad_norm": 1.1969435548126093,
+      "learning_rate": 0.003,
+      "loss": 4.0725,
+      "step": 4565
+    },
+    {
+      "epoch": 0.04566,
+      "grad_norm": 0.9275288015749137,
+      "learning_rate": 0.003,
+      "loss": 4.1017,
+      "step": 4566
+    },
+    {
+      "epoch": 0.04567,
+      "grad_norm": 0.8651130584077846,
+      "learning_rate": 0.003,
+      "loss": 4.0909,
+      "step": 4567
+    },
+    {
+      "epoch": 0.04568,
+      "grad_norm": 0.8986618248546338,
+      "learning_rate": 0.003,
+      "loss": 4.0685,
+      "step": 4568
+    },
+    {
+      "epoch": 0.04569,
+      "grad_norm": 0.6837244864462902,
+      "learning_rate": 0.003,
+      "loss": 4.0887,
+      "step": 4569
+    },
+    {
+      "epoch": 0.0457,
+      "grad_norm": 0.6768551414670718,
+      "learning_rate": 0.003,
+      "loss": 4.0781,
+      "step": 4570
+    },
+    {
+      "epoch": 0.04571,
+      "grad_norm": 0.7428397016306537,
+      "learning_rate": 0.003,
+      "loss": 4.107,
+      "step": 4571
+    },
+    {
+      "epoch": 0.04572,
+      "grad_norm": 0.8995755995048413,
+      "learning_rate": 0.003,
+      "loss": 4.0813,
+      "step": 4572
+    },
+    {
+      "epoch": 0.04573,
+      "grad_norm": 0.9009642352606402,
+      "learning_rate": 0.003,
+      "loss": 4.058,
+      "step": 4573
+    },
+    {
+      "epoch": 0.04574,
+      "grad_norm": 0.7114211134538346,
+      "learning_rate": 0.003,
+      "loss": 4.0634,
+      "step": 4574
+    },
+    {
+      "epoch": 0.04575,
+      "grad_norm": 0.6599729401731194,
+      "learning_rate": 0.003,
+      "loss": 4.0868,
+      "step": 4575
+    },
+    {
+      "epoch": 0.04576,
+      "grad_norm": 0.7597125240743299,
+      "learning_rate": 0.003,
+      "loss": 4.0805,
+      "step": 4576
+    },
+    {
+      "epoch": 0.04577,
+      "grad_norm": 0.8421667189567135,
+      "learning_rate": 0.003,
+      "loss": 4.1001,
+      "step": 4577
+    },
+    {
+      "epoch": 0.04578,
+      "grad_norm": 0.8104388984795634,
+      "learning_rate": 0.003,
+      "loss": 4.066,
+      "step": 4578
+    },
+    {
+      "epoch": 0.04579,
+      "grad_norm": 0.8976119076785662,
+      "learning_rate": 0.003,
+      "loss": 4.0531,
+      "step": 4579
+    },
+    {
+      "epoch": 0.0458,
+      "grad_norm": 0.9887238152154557,
+      "learning_rate": 0.003,
+      "loss": 4.0887,
+      "step": 4580
+    },
+    {
+      "epoch": 0.04581,
+      "grad_norm": 1.0944325064747162,
+      "learning_rate": 0.003,
+      "loss": 4.1047,
+      "step": 4581
+    },
+    {
+      "epoch": 0.04582,
+      "grad_norm": 1.1228836648844638,
+      "learning_rate": 0.003,
+      "loss": 4.1083,
+      "step": 4582
+    },
+    {
+      "epoch": 0.04583,
+      "grad_norm": 1.037982851445284,
+      "learning_rate": 0.003,
+      "loss": 4.0738,
+      "step": 4583
+    },
+    {
+      "epoch": 0.04584,
+      "grad_norm": 1.1309942180537078,
+      "learning_rate": 0.003,
+      "loss": 4.0889,
+      "step": 4584
+    },
+    {
+      "epoch": 0.04585,
+      "grad_norm": 0.8719784933921241,
+      "learning_rate": 0.003,
+      "loss": 4.0785,
+      "step": 4585
+    },
+    {
+      "epoch": 0.04586,
+      "grad_norm": 0.9988395861483548,
+      "learning_rate": 0.003,
+      "loss": 4.0882,
+      "step": 4586
+    },
+    {
+      "epoch": 0.04587,
+      "grad_norm": 1.1757119262154812,
+      "learning_rate": 0.003,
+      "loss": 4.1107,
+      "step": 4587
+    },
+    {
+      "epoch": 0.04588,
+      "grad_norm": 0.9935517321787154,
+      "learning_rate": 0.003,
+      "loss": 4.106,
+      "step": 4588
+    },
+    {
+      "epoch": 0.04589,
+      "grad_norm": 0.9699416826899608,
+      "learning_rate": 0.003,
+      "loss": 4.073,
+      "step": 4589
+    },
+    {
+      "epoch": 0.0459,
+      "grad_norm": 0.8608811328558719,
+      "learning_rate": 0.003,
+      "loss": 4.0949,
+      "step": 4590
+    },
+    {
+      "epoch": 0.04591,
+      "grad_norm": 0.8189922265739307,
+      "learning_rate": 0.003,
+      "loss": 4.0583,
+      "step": 4591
+    },
+    {
+      "epoch": 0.04592,
+      "grad_norm": 0.7122732264854349,
+      "learning_rate": 0.003,
+      "loss": 4.0811,
+      "step": 4592
+    },
+    {
+      "epoch": 0.04593,
+      "grad_norm": 0.6916533373498136,
+      "learning_rate": 0.003,
+      "loss": 4.1044,
+      "step": 4593
+    },
+    {
+      "epoch": 0.04594,
+      "grad_norm": 0.7141399590985198,
+      "learning_rate": 0.003,
+      "loss": 4.0964,
+      "step": 4594
+    },
+    {
+      "epoch": 0.04595,
+      "grad_norm": 0.5874937539781525,
+      "learning_rate": 0.003,
+      "loss": 4.0797,
+      "step": 4595
+    },
+    {
+      "epoch": 0.04596,
+      "grad_norm": 0.5901031684112441,
+      "learning_rate": 0.003,
+      "loss": 4.0822,
+      "step": 4596
+    },
+    {
+      "epoch": 0.04597,
+      "grad_norm": 0.6145935292761844,
+      "learning_rate": 0.003,
+      "loss": 4.0849,
+      "step": 4597
+    },
+    {
+      "epoch": 0.04598,
+      "grad_norm": 0.6900732502328394,
+      "learning_rate": 0.003,
+      "loss": 4.1197,
+      "step": 4598
+    },
+    {
+      "epoch": 0.04599,
+      "grad_norm": 0.7532465802179442,
+      "learning_rate": 0.003,
+      "loss": 4.0664,
+      "step": 4599
+    },
+    {
+      "epoch": 0.046,
+      "grad_norm": 0.7204851942827531,
+      "learning_rate": 0.003,
+      "loss": 4.0796,
+      "step": 4600
+    },
+    {
+      "epoch": 0.04601,
+      "grad_norm": 0.8000251382960614,
+      "learning_rate": 0.003,
+      "loss": 4.0849,
+      "step": 4601
+    },
+    {
+      "epoch": 0.04602,
+      "grad_norm": 0.9181816267947781,
+      "learning_rate": 0.003,
+      "loss": 4.0829,
+      "step": 4602
+    },
+    {
+      "epoch": 0.04603,
+      "grad_norm": 1.1329954986924105,
+      "learning_rate": 0.003,
+      "loss": 4.1,
+      "step": 4603
+    },
+    {
+      "epoch": 0.04604,
+      "grad_norm": 0.9229244630258843,
+      "learning_rate": 0.003,
+      "loss": 4.0923,
+      "step": 4604
+    },
+    {
+      "epoch": 0.04605,
+      "grad_norm": 0.9397750901932713,
+      "learning_rate": 0.003,
+      "loss": 4.1063,
+      "step": 4605
+    },
+    {
+      "epoch": 0.04606,
+      "grad_norm": 0.9807853977592482,
+      "learning_rate": 0.003,
+      "loss": 4.071,
+      "step": 4606
+    },
+    {
+      "epoch": 0.04607,
+      "grad_norm": 1.0004066586342284,
+      "learning_rate": 0.003,
+      "loss": 4.0801,
+      "step": 4607
+    },
+    {
+      "epoch": 0.04608,
+      "grad_norm": 0.8365719223227974,
+      "learning_rate": 0.003,
+      "loss": 4.0608,
+      "step": 4608
+    },
+    {
+      "epoch": 0.04609,
+      "grad_norm": 0.8004776907383365,
+      "learning_rate": 0.003,
+      "loss": 4.0825,
+      "step": 4609
+    },
+    {
+      "epoch": 0.0461,
+      "grad_norm": 1.0497099318779148,
+      "learning_rate": 0.003,
+      "loss": 4.0677,
+      "step": 4610
+    },
+    {
+      "epoch": 0.04611,
+      "grad_norm": 1.2600264680618676,
+      "learning_rate": 0.003,
+      "loss": 4.1259,
+      "step": 4611
+    },
+    {
+      "epoch": 0.04612,
+      "grad_norm": 0.7894125524130393,
+      "learning_rate": 0.003,
+      "loss": 4.0945,
+      "step": 4612
+    },
+    {
+      "epoch": 0.04613,
+      "grad_norm": 0.8137383483366754,
+      "learning_rate": 0.003,
+      "loss": 4.0774,
+      "step": 4613
+    },
+    {
+      "epoch": 0.04614,
+      "grad_norm": 1.0780550037519088,
+      "learning_rate": 0.003,
+      "loss": 4.0491,
+      "step": 4614
+    },
+    {
+      "epoch": 0.04615,
+      "grad_norm": 1.146445498524777,
+      "learning_rate": 0.003,
+      "loss": 4.0926,
+      "step": 4615
+    },
+    {
+      "epoch": 0.04616,
+      "grad_norm": 1.0658805778199005,
+      "learning_rate": 0.003,
+      "loss": 4.0745,
+      "step": 4616
+    },
+    {
+      "epoch": 0.04617,
+      "grad_norm": 1.0441620047388833,
+      "learning_rate": 0.003,
+      "loss": 4.0986,
+      "step": 4617
+    },
+    {
+      "epoch": 0.04618,
+      "grad_norm": 0.8556036446407603,
+      "learning_rate": 0.003,
+      "loss": 4.0758,
+      "step": 4618
+    },
+    {
+      "epoch": 0.04619,
+      "grad_norm": 0.8711936990908583,
+      "learning_rate": 0.003,
+      "loss": 4.0747,
+      "step": 4619
+    },
+    {
+      "epoch": 0.0462,
+      "grad_norm": 0.9909063580253566,
+      "learning_rate": 0.003,
+      "loss": 4.1061,
+      "step": 4620
+    },
+    {
+      "epoch": 0.04621,
+      "grad_norm": 1.0674313514316285,
+      "learning_rate": 0.003,
+      "loss": 4.118,
+      "step": 4621
+    },
+    {
+      "epoch": 0.04622,
+      "grad_norm": 0.9135451914376392,
+      "learning_rate": 0.003,
+      "loss": 4.1124,
+      "step": 4622
+    },
+    {
+      "epoch": 0.04623,
+      "grad_norm": 0.9086448162432442,
+      "learning_rate": 0.003,
+      "loss": 4.0951,
+      "step": 4623
+    },
+    {
+      "epoch": 0.04624,
+      "grad_norm": 0.929118113001063,
+      "learning_rate": 0.003,
+      "loss": 4.0728,
+      "step": 4624
+    },
+    {
+      "epoch": 0.04625,
+      "grad_norm": 0.8923285451003576,
+      "learning_rate": 0.003,
+      "loss": 4.0925,
+      "step": 4625
+    },
+    {
+      "epoch": 0.04626,
+      "grad_norm": 0.8550528854318953,
+      "learning_rate": 0.003,
+      "loss": 4.0966,
+      "step": 4626
+    },
+    {
+      "epoch": 0.04627,
+      "grad_norm": 0.8738286232684426,
+      "learning_rate": 0.003,
+      "loss": 4.0834,
+      "step": 4627
+    },
+    {
+      "epoch": 0.04628,
+      "grad_norm": 1.040219821170605,
+      "learning_rate": 0.003,
+      "loss": 4.1134,
+      "step": 4628
+    },
+    {
+      "epoch": 0.04629,
+      "grad_norm": 1.0457108285440184,
+      "learning_rate": 0.003,
+      "loss": 4.0893,
+      "step": 4629
+    },
+    {
+      "epoch": 0.0463,
+      "grad_norm": 1.0237213701996026,
+      "learning_rate": 0.003,
+      "loss": 4.1058,
+      "step": 4630
+    },
+    {
+      "epoch": 0.04631,
+      "grad_norm": 0.903960943241525,
+      "learning_rate": 0.003,
+      "loss": 4.084,
+      "step": 4631
+    },
+    {
+      "epoch": 0.04632,
+      "grad_norm": 0.848296335406008,
+      "learning_rate": 0.003,
+      "loss": 4.0734,
+      "step": 4632
+    },
+    {
+      "epoch": 0.04633,
+      "grad_norm": 0.9208369707573915,
+      "learning_rate": 0.003,
+      "loss": 4.0785,
+      "step": 4633
+    },
+    {
+      "epoch": 0.04634,
+      "grad_norm": 0.9324467970591866,
+      "learning_rate": 0.003,
+      "loss": 4.0824,
+      "step": 4634
+    },
+    {
+      "epoch": 0.04635,
+      "grad_norm": 1.1007094021276311,
+      "learning_rate": 0.003,
+      "loss": 4.0932,
+      "step": 4635
+    },
+    {
+      "epoch": 0.04636,
+      "grad_norm": 1.1761724983902668,
+      "learning_rate": 0.003,
+      "loss": 4.1293,
+      "step": 4636
+    },
+    {
+      "epoch": 0.04637,
+      "grad_norm": 0.8928241988529365,
+      "learning_rate": 0.003,
+      "loss": 4.0681,
+      "step": 4637
+    },
+    {
+      "epoch": 0.04638,
+      "grad_norm": 0.7909525696684022,
+      "learning_rate": 0.003,
+      "loss": 4.1013,
+      "step": 4638
+    },
+    {
+      "epoch": 0.04639,
+      "grad_norm": 0.8316115510623557,
+      "learning_rate": 0.003,
+      "loss": 4.0717,
+      "step": 4639
+    },
+    {
+      "epoch": 0.0464,
+      "grad_norm": 0.8890413282477022,
+      "learning_rate": 0.003,
+      "loss": 4.0938,
+      "step": 4640
+    },
+    {
+      "epoch": 0.04641,
+      "grad_norm": 0.9442456572250497,
+      "learning_rate": 0.003,
+      "loss": 4.1097,
+      "step": 4641
+    },
+    {
+      "epoch": 0.04642,
+      "grad_norm": 1.1131605186896782,
+      "learning_rate": 0.003,
+      "loss": 4.0593,
+      "step": 4642
+    },
+    {
+      "epoch": 0.04643,
+      "grad_norm": 1.151049967466462,
+      "learning_rate": 0.003,
+      "loss": 4.0865,
+      "step": 4643
+    },
+    {
+      "epoch": 0.04644,
+      "grad_norm": 1.0077381059472987,
+      "learning_rate": 0.003,
+      "loss": 4.0905,
+      "step": 4644
+    },
+    {
+      "epoch": 0.04645,
+      "grad_norm": 0.8760793972978849,
+      "learning_rate": 0.003,
+      "loss": 4.0792,
+      "step": 4645
+    },
+    {
+      "epoch": 0.04646,
+      "grad_norm": 0.8512107506066473,
+      "learning_rate": 0.003,
+      "loss": 4.0771,
+      "step": 4646
+    },
+    {
+      "epoch": 0.04647,
+      "grad_norm": 0.8732029277276215,
+      "learning_rate": 0.003,
+      "loss": 4.0748,
+      "step": 4647
+    },
+    {
+      "epoch": 0.04648,
+      "grad_norm": 0.9302078959957535,
+      "learning_rate": 0.003,
+      "loss": 4.0974,
+      "step": 4648
+    },
+    {
+      "epoch": 0.04649,
+      "grad_norm": 0.9169568810760613,
+      "learning_rate": 0.003,
+      "loss": 4.0818,
+      "step": 4649
+    },
+    {
+      "epoch": 0.0465,
+      "grad_norm": 0.9303804967387479,
+      "learning_rate": 0.003,
+      "loss": 4.053,
+      "step": 4650
+    },
+    {
+      "epoch": 0.04651,
+      "grad_norm": 0.872676445058754,
+      "learning_rate": 0.003,
+      "loss": 4.0853,
+      "step": 4651
+    },
+    {
+      "epoch": 0.04652,
+      "grad_norm": 0.8018180143670698,
+      "learning_rate": 0.003,
+      "loss": 4.09,
+      "step": 4652
+    },
+    {
+      "epoch": 0.04653,
+      "grad_norm": 0.8064464814963113,
+      "learning_rate": 0.003,
+      "loss": 4.0896,
+      "step": 4653
+    },
+    {
+      "epoch": 0.04654,
+      "grad_norm": 0.8944906023744553,
+      "learning_rate": 0.003,
+      "loss": 4.0846,
+      "step": 4654
+    },
+    {
+      "epoch": 0.04655,
+      "grad_norm": 1.1219617714586472,
+      "learning_rate": 0.003,
+      "loss": 4.0955,
+      "step": 4655
+    },
+    {
+      "epoch": 0.04656,
+      "grad_norm": 1.0550396369369868,
+      "learning_rate": 0.003,
+      "loss": 4.0687,
+      "step": 4656
+    },
+    {
+      "epoch": 0.04657,
+      "grad_norm": 1.0337811454324184,
+      "learning_rate": 0.003,
+      "loss": 4.1054,
+      "step": 4657
+    },
+    {
+      "epoch": 0.04658,
+      "grad_norm": 0.9390842279569726,
+      "learning_rate": 0.003,
+      "loss": 4.083,
+      "step": 4658
+    },
+    {
+      "epoch": 0.04659,
+      "grad_norm": 0.7870888779187648,
+      "learning_rate": 0.003,
+      "loss": 4.0752,
+      "step": 4659
+    },
+    {
+      "epoch": 0.0466,
+      "grad_norm": 0.7813047740300597,
+      "learning_rate": 0.003,
+      "loss": 4.0937,
+      "step": 4660
+    },
+    {
+      "epoch": 0.04661,
+      "grad_norm": 0.8185250491523086,
+      "learning_rate": 0.003,
+      "loss": 4.0796,
+      "step": 4661
+    },
+    {
+      "epoch": 0.04662,
+      "grad_norm": 0.7657744046442502,
+      "learning_rate": 0.003,
+      "loss": 4.0644,
+      "step": 4662
+    },
+    {
+      "epoch": 0.04663,
+      "grad_norm": 0.9612044009806899,
+      "learning_rate": 0.003,
+      "loss": 4.0885,
+      "step": 4663
+    },
+    {
+      "epoch": 0.04664,
+      "grad_norm": 1.091240235491955,
+      "learning_rate": 0.003,
+      "loss": 4.0688,
+      "step": 4664
+    },
+    {
+      "epoch": 0.04665,
+      "grad_norm": 0.9496659607676349,
+      "learning_rate": 0.003,
+      "loss": 4.0652,
+      "step": 4665
+    },
+    {
+      "epoch": 0.04666,
+      "grad_norm": 0.8885966480506947,
+      "learning_rate": 0.003,
+      "loss": 4.0611,
+      "step": 4666
+    },
+    {
+      "epoch": 0.04667,
+      "grad_norm": 0.8146510839872884,
+      "learning_rate": 0.003,
+      "loss": 4.0644,
+      "step": 4667
+    },
+    {
+      "epoch": 0.04668,
+      "grad_norm": 0.7003726363991867,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 4668
+    },
+    {
+      "epoch": 0.04669,
+      "grad_norm": 0.6213740348621104,
+      "learning_rate": 0.003,
+      "loss": 4.0899,
+      "step": 4669
+    },
+    {
+      "epoch": 0.0467,
+      "grad_norm": 0.7033914078971897,
+      "learning_rate": 0.003,
+      "loss": 4.04,
+      "step": 4670
+    },
+    {
+      "epoch": 0.04671,
+      "grad_norm": 0.6956570433878502,
+      "learning_rate": 0.003,
+      "loss": 4.0646,
+      "step": 4671
+    },
+    {
+      "epoch": 0.04672,
+      "grad_norm": 0.7790564265361141,
+      "learning_rate": 0.003,
+      "loss": 4.0834,
+      "step": 4672
+    },
+    {
+      "epoch": 0.04673,
+      "grad_norm": 0.8582519469717382,
+      "learning_rate": 0.003,
+      "loss": 4.0867,
+      "step": 4673
+    },
+    {
+      "epoch": 0.04674,
+      "grad_norm": 1.0257732379563222,
+      "learning_rate": 0.003,
+      "loss": 4.0598,
+      "step": 4674
+    },
+    {
+      "epoch": 0.04675,
+      "grad_norm": 1.0228753347895319,
+      "learning_rate": 0.003,
+      "loss": 4.0889,
+      "step": 4675
+    },
+    {
+      "epoch": 0.04676,
+      "grad_norm": 1.091796013099298,
+      "learning_rate": 0.003,
+      "loss": 4.096,
+      "step": 4676
+    },
+    {
+      "epoch": 0.04677,
+      "grad_norm": 0.9874564467619078,
+      "learning_rate": 0.003,
+      "loss": 4.1002,
+      "step": 4677
+    },
+    {
+      "epoch": 0.04678,
+      "grad_norm": 0.9545956541830211,
+      "learning_rate": 0.003,
+      "loss": 4.055,
+      "step": 4678
+    },
+    {
+      "epoch": 0.04679,
+      "grad_norm": 0.9188392135996144,
+      "learning_rate": 0.003,
+      "loss": 4.0875,
+      "step": 4679
+    },
+    {
+      "epoch": 0.0468,
+      "grad_norm": 0.9976774725576849,
+      "learning_rate": 0.003,
+      "loss": 4.0607,
+      "step": 4680
+    },
+    {
+      "epoch": 0.04681,
+      "grad_norm": 1.2926044572885032,
+      "learning_rate": 0.003,
+      "loss": 4.1224,
+      "step": 4681
+    },
+    {
+      "epoch": 0.04682,
+      "grad_norm": 0.8936432704892819,
+      "learning_rate": 0.003,
+      "loss": 4.0795,
+      "step": 4682
+    },
+    {
+      "epoch": 0.04683,
+      "grad_norm": 0.9387765394338359,
+      "learning_rate": 0.003,
+      "loss": 4.0738,
+      "step": 4683
+    },
+    {
+      "epoch": 0.04684,
+      "grad_norm": 1.0010607126157993,
+      "learning_rate": 0.003,
+      "loss": 4.0972,
+      "step": 4684
+    },
+    {
+      "epoch": 0.04685,
+      "grad_norm": 1.1642588881453941,
+      "learning_rate": 0.003,
+      "loss": 4.0844,
+      "step": 4685
+    },
+    {
+      "epoch": 0.04686,
+      "grad_norm": 0.9177452996715223,
+      "learning_rate": 0.003,
+      "loss": 4.0887,
+      "step": 4686
+    },
+    {
+      "epoch": 0.04687,
+      "grad_norm": 1.0845507224363737,
+      "learning_rate": 0.003,
+      "loss": 4.1267,
+      "step": 4687
+    },
+    {
+      "epoch": 0.04688,
+      "grad_norm": 1.0851979161146954,
+      "learning_rate": 0.003,
+      "loss": 4.1109,
+      "step": 4688
+    },
+    {
+      "epoch": 0.04689,
+      "grad_norm": 1.0323716074650593,
+      "learning_rate": 0.003,
+      "loss": 4.1251,
+      "step": 4689
+    },
+    {
+      "epoch": 0.0469,
+      "grad_norm": 0.8881158922609561,
+      "learning_rate": 0.003,
+      "loss": 4.0659,
+      "step": 4690
+    },
+    {
+      "epoch": 0.04691,
+      "grad_norm": 0.7530974336635942,
+      "learning_rate": 0.003,
+      "loss": 4.0791,
+      "step": 4691
+    },
+    {
+      "epoch": 0.04692,
+      "grad_norm": 0.8251300897824291,
+      "learning_rate": 0.003,
+      "loss": 4.0807,
+      "step": 4692
+    },
+    {
+      "epoch": 0.04693,
+      "grad_norm": 1.1080787687675981,
+      "learning_rate": 0.003,
+      "loss": 4.1121,
+      "step": 4693
+    },
+    {
+      "epoch": 0.04694,
+      "grad_norm": 1.2008844146719992,
+      "learning_rate": 0.003,
+      "loss": 4.0831,
+      "step": 4694
+    },
+    {
+      "epoch": 0.04695,
+      "grad_norm": 0.8692807098125874,
+      "learning_rate": 0.003,
+      "loss": 4.0656,
+      "step": 4695
+    },
+    {
+      "epoch": 0.04696,
+      "grad_norm": 0.7273592465913997,
+      "learning_rate": 0.003,
+      "loss": 4.1023,
+      "step": 4696
+    },
+    {
+      "epoch": 0.04697,
+      "grad_norm": 0.791219049234071,
+      "learning_rate": 0.003,
+      "loss": 4.0875,
+      "step": 4697
+    },
+    {
+      "epoch": 0.04698,
+      "grad_norm": 0.8259619113948167,
+      "learning_rate": 0.003,
+      "loss": 4.0897,
+      "step": 4698
+    },
+    {
+      "epoch": 0.04699,
+      "grad_norm": 0.986873640288411,
+      "learning_rate": 0.003,
+      "loss": 4.0838,
+      "step": 4699
+    },
+    {
+      "epoch": 0.047,
+      "grad_norm": 1.124829270421113,
+      "learning_rate": 0.003,
+      "loss": 4.103,
+      "step": 4700
+    },
+    {
+      "epoch": 0.04701,
+      "grad_norm": 0.8041528371793156,
+      "learning_rate": 0.003,
+      "loss": 4.1021,
+      "step": 4701
+    },
+    {
+      "epoch": 0.04702,
+      "grad_norm": 0.9002565040000517,
+      "learning_rate": 0.003,
+      "loss": 4.0976,
+      "step": 4702
+    },
+    {
+      "epoch": 0.04703,
+      "grad_norm": 0.928884750922695,
+      "learning_rate": 0.003,
+      "loss": 4.0859,
+      "step": 4703
+    },
+    {
+      "epoch": 0.04704,
+      "grad_norm": 0.9517910705080795,
+      "learning_rate": 0.003,
+      "loss": 4.0778,
+      "step": 4704
+    },
+    {
+      "epoch": 0.04705,
+      "grad_norm": 1.0793630137006167,
+      "learning_rate": 0.003,
+      "loss": 4.1135,
+      "step": 4705
+    },
+    {
+      "epoch": 0.04706,
+      "grad_norm": 0.8932410896556339,
+      "learning_rate": 0.003,
+      "loss": 4.0518,
+      "step": 4706
+    },
+    {
+      "epoch": 0.04707,
+      "grad_norm": 0.8273663973299694,
+      "learning_rate": 0.003,
+      "loss": 4.0908,
+      "step": 4707
+    },
+    {
+      "epoch": 0.04708,
+      "grad_norm": 0.7707848347191034,
+      "learning_rate": 0.003,
+      "loss": 4.0434,
+      "step": 4708
+    },
+    {
+      "epoch": 0.04709,
+      "grad_norm": 0.6881441521153875,
+      "learning_rate": 0.003,
+      "loss": 4.0731,
+      "step": 4709
+    },
+    {
+      "epoch": 0.0471,
+      "grad_norm": 0.714094436830912,
+      "learning_rate": 0.003,
+      "loss": 4.0848,
+      "step": 4710
+    },
+    {
+      "epoch": 0.04711,
+      "grad_norm": 0.8507678879908461,
+      "learning_rate": 0.003,
+      "loss": 4.0801,
+      "step": 4711
+    },
+    {
+      "epoch": 0.04712,
+      "grad_norm": 0.8931516416205477,
+      "learning_rate": 0.003,
+      "loss": 4.0711,
+      "step": 4712
+    },
+    {
+      "epoch": 0.04713,
+      "grad_norm": 0.9279444773212077,
+      "learning_rate": 0.003,
+      "loss": 4.07,
+      "step": 4713
+    },
+    {
+      "epoch": 0.04714,
+      "grad_norm": 0.7844617012393187,
+      "learning_rate": 0.003,
+      "loss": 4.0749,
+      "step": 4714
+    },
+    {
+      "epoch": 0.04715,
+      "grad_norm": 0.7834150113825615,
+      "learning_rate": 0.003,
+      "loss": 4.1002,
+      "step": 4715
+    },
+    {
+      "epoch": 0.04716,
+      "grad_norm": 0.7743590099580365,
+      "learning_rate": 0.003,
+      "loss": 4.0714,
+      "step": 4716
+    },
+    {
+      "epoch": 0.04717,
+      "grad_norm": 0.7709401253853543,
+      "learning_rate": 0.003,
+      "loss": 4.0883,
+      "step": 4717
+    },
+    {
+      "epoch": 0.04718,
+      "grad_norm": 0.8235615104288091,
+      "learning_rate": 0.003,
+      "loss": 4.0511,
+      "step": 4718
+    },
+    {
+      "epoch": 0.04719,
+      "grad_norm": 0.8708271313882437,
+      "learning_rate": 0.003,
+      "loss": 4.0615,
+      "step": 4719
+    },
+    {
+      "epoch": 0.0472,
+      "grad_norm": 0.8040720392649453,
+      "learning_rate": 0.003,
+      "loss": 4.0944,
+      "step": 4720
+    },
+    {
+      "epoch": 0.04721,
+      "grad_norm": 0.7637937594189287,
+      "learning_rate": 0.003,
+      "loss": 4.054,
+      "step": 4721
+    },
+    {
+      "epoch": 0.04722,
+      "grad_norm": 0.7154347513877589,
+      "learning_rate": 0.003,
+      "loss": 4.069,
+      "step": 4722
+    },
+    {
+      "epoch": 0.04723,
+      "grad_norm": 0.8612068842939984,
+      "learning_rate": 0.003,
+      "loss": 4.0811,
+      "step": 4723
+    },
+    {
+      "epoch": 0.04724,
+      "grad_norm": 0.9614536713142897,
+      "learning_rate": 0.003,
+      "loss": 4.0598,
+      "step": 4724
+    },
+    {
+      "epoch": 0.04725,
+      "grad_norm": 1.0189566497638871,
+      "learning_rate": 0.003,
+      "loss": 4.0874,
+      "step": 4725
+    },
+    {
+      "epoch": 0.04726,
+      "grad_norm": 1.0839491352411363,
+      "learning_rate": 0.003,
+      "loss": 4.0786,
+      "step": 4726
+    },
+    {
+      "epoch": 0.04727,
+      "grad_norm": 1.0649373566906404,
+      "learning_rate": 0.003,
+      "loss": 4.064,
+      "step": 4727
+    },
+    {
+      "epoch": 0.04728,
+      "grad_norm": 0.867382918842643,
+      "learning_rate": 0.003,
+      "loss": 4.0677,
+      "step": 4728
+    },
+    {
+      "epoch": 0.04729,
+      "grad_norm": 0.8648790898500417,
+      "learning_rate": 0.003,
+      "loss": 4.0871,
+      "step": 4729
+    },
+    {
+      "epoch": 0.0473,
+      "grad_norm": 0.8269978275529186,
+      "learning_rate": 0.003,
+      "loss": 4.0814,
+      "step": 4730
+    },
+    {
+      "epoch": 0.04731,
+      "grad_norm": 0.8941716511884031,
+      "learning_rate": 0.003,
+      "loss": 4.0772,
+      "step": 4731
+    },
+    {
+      "epoch": 0.04732,
+      "grad_norm": 0.9280984555504275,
+      "learning_rate": 0.003,
+      "loss": 4.0911,
+      "step": 4732
+    },
+    {
+      "epoch": 0.04733,
+      "grad_norm": 1.147397771954512,
+      "learning_rate": 0.003,
+      "loss": 4.0877,
+      "step": 4733
+    },
+    {
+      "epoch": 0.04734,
+      "grad_norm": 0.9874050482010743,
+      "learning_rate": 0.003,
+      "loss": 4.0883,
+      "step": 4734
+    },
+    {
+      "epoch": 0.04735,
+      "grad_norm": 1.2239662812431078,
+      "learning_rate": 0.003,
+      "loss": 4.114,
+      "step": 4735
+    },
+    {
+      "epoch": 0.04736,
+      "grad_norm": 0.9512556578156451,
+      "learning_rate": 0.003,
+      "loss": 4.0925,
+      "step": 4736
+    },
+    {
+      "epoch": 0.04737,
+      "grad_norm": 1.1383559521224216,
+      "learning_rate": 0.003,
+      "loss": 4.0647,
+      "step": 4737
+    },
+    {
+      "epoch": 0.04738,
+      "grad_norm": 0.9981707221550351,
+      "learning_rate": 0.003,
+      "loss": 4.1033,
+      "step": 4738
+    },
+    {
+      "epoch": 0.04739,
+      "grad_norm": 0.9631032735800266,
+      "learning_rate": 0.003,
+      "loss": 4.0359,
+      "step": 4739
+    },
+    {
+      "epoch": 0.0474,
+      "grad_norm": 0.8943309500832445,
+      "learning_rate": 0.003,
+      "loss": 4.1061,
+      "step": 4740
+    },
+    {
+      "epoch": 0.04741,
+      "grad_norm": 0.9142912447416025,
+      "learning_rate": 0.003,
+      "loss": 4.0967,
+      "step": 4741
+    },
+    {
+      "epoch": 0.04742,
+      "grad_norm": 0.9095935688180709,
+      "learning_rate": 0.003,
+      "loss": 4.0768,
+      "step": 4742
+    },
+    {
+      "epoch": 0.04743,
+      "grad_norm": 0.9621787693657161,
+      "learning_rate": 0.003,
+      "loss": 4.1005,
+      "step": 4743
+    },
+    {
+      "epoch": 0.04744,
+      "grad_norm": 0.983730502100782,
+      "learning_rate": 0.003,
+      "loss": 4.0628,
+      "step": 4744
+    },
+    {
+      "epoch": 0.04745,
+      "grad_norm": 1.0519698186730737,
+      "learning_rate": 0.003,
+      "loss": 4.0701,
+      "step": 4745
+    },
+    {
+      "epoch": 0.04746,
+      "grad_norm": 1.0692770481585765,
+      "learning_rate": 0.003,
+      "loss": 4.0901,
+      "step": 4746
+    },
+    {
+      "epoch": 0.04747,
+      "grad_norm": 0.9800763430595273,
+      "learning_rate": 0.003,
+      "loss": 4.0605,
+      "step": 4747
+    },
+    {
+      "epoch": 0.04748,
+      "grad_norm": 1.0447027226711574,
+      "learning_rate": 0.003,
+      "loss": 4.1131,
+      "step": 4748
+    },
+    {
+      "epoch": 0.04749,
+      "grad_norm": 0.9780745361519866,
+      "learning_rate": 0.003,
+      "loss": 4.1178,
+      "step": 4749
+    },
+    {
+      "epoch": 0.0475,
+      "grad_norm": 1.1823864036419054,
+      "learning_rate": 0.003,
+      "loss": 4.0892,
+      "step": 4750
+    },
+    {
+      "epoch": 0.04751,
+      "grad_norm": 1.0317082296257587,
+      "learning_rate": 0.003,
+      "loss": 4.0512,
+      "step": 4751
+    },
+    {
+      "epoch": 0.04752,
+      "grad_norm": 1.0191266508391366,
+      "learning_rate": 0.003,
+      "loss": 4.0841,
+      "step": 4752
+    },
+    {
+      "epoch": 0.04753,
+      "grad_norm": 0.9650995183204055,
+      "learning_rate": 0.003,
+      "loss": 4.0888,
+      "step": 4753
+    },
+    {
+      "epoch": 0.04754,
+      "grad_norm": 0.9626529774574477,
+      "learning_rate": 0.003,
+      "loss": 4.1079,
+      "step": 4754
+    },
+    {
+      "epoch": 0.04755,
+      "grad_norm": 1.011836432397673,
+      "learning_rate": 0.003,
+      "loss": 4.0896,
+      "step": 4755
+    },
+    {
+      "epoch": 0.04756,
+      "grad_norm": 0.9061088923081047,
+      "learning_rate": 0.003,
+      "loss": 4.0887,
+      "step": 4756
+    },
+    {
+      "epoch": 0.04757,
+      "grad_norm": 0.8653623623227834,
+      "learning_rate": 0.003,
+      "loss": 4.0833,
+      "step": 4757
+    },
+    {
+      "epoch": 0.04758,
+      "grad_norm": 0.8875207606926206,
+      "learning_rate": 0.003,
+      "loss": 4.0762,
+      "step": 4758
+    },
+    {
+      "epoch": 0.04759,
+      "grad_norm": 0.8817482562664115,
+      "learning_rate": 0.003,
+      "loss": 4.0888,
+      "step": 4759
+    },
+    {
+      "epoch": 0.0476,
+      "grad_norm": 0.7350149992314764,
+      "learning_rate": 0.003,
+      "loss": 4.0637,
+      "step": 4760
+    },
+    {
+      "epoch": 0.04761,
+      "grad_norm": 0.6886778817907339,
+      "learning_rate": 0.003,
+      "loss": 4.0696,
+      "step": 4761
+    },
+    {
+      "epoch": 0.04762,
+      "grad_norm": 0.6706585457203895,
+      "learning_rate": 0.003,
+      "loss": 4.0639,
+      "step": 4762
+    },
+    {
+      "epoch": 0.04763,
+      "grad_norm": 0.7887867887553318,
+      "learning_rate": 0.003,
+      "loss": 4.0669,
+      "step": 4763
+    },
+    {
+      "epoch": 0.04764,
+      "grad_norm": 0.97596832551055,
+      "learning_rate": 0.003,
+      "loss": 4.0587,
+      "step": 4764
+    },
+    {
+      "epoch": 0.04765,
+      "grad_norm": 1.2783386527166918,
+      "learning_rate": 0.003,
+      "loss": 4.1179,
+      "step": 4765
+    },
+    {
+      "epoch": 0.04766,
+      "grad_norm": 0.87011816316084,
+      "learning_rate": 0.003,
+      "loss": 4.0816,
+      "step": 4766
+    },
+    {
+      "epoch": 0.04767,
+      "grad_norm": 0.995725590420577,
+      "learning_rate": 0.003,
+      "loss": 4.0836,
+      "step": 4767
+    },
+    {
+      "epoch": 0.04768,
+      "grad_norm": 0.9157007548076511,
+      "learning_rate": 0.003,
+      "loss": 4.0762,
+      "step": 4768
+    },
+    {
+      "epoch": 0.04769,
+      "grad_norm": 0.7763424443251495,
+      "learning_rate": 0.003,
+      "loss": 4.0373,
+      "step": 4769
+    },
+    {
+      "epoch": 0.0477,
+      "grad_norm": 0.9132161182733682,
+      "learning_rate": 0.003,
+      "loss": 4.0549,
+      "step": 4770
+    },
+    {
+      "epoch": 0.04771,
+      "grad_norm": 1.0690816936012133,
+      "learning_rate": 0.003,
+      "loss": 4.081,
+      "step": 4771
+    },
+    {
+      "epoch": 0.04772,
+      "grad_norm": 1.0183011941792959,
+      "learning_rate": 0.003,
+      "loss": 4.0843,
+      "step": 4772
+    },
+    {
+      "epoch": 0.04773,
+      "grad_norm": 0.9142174256967822,
+      "learning_rate": 0.003,
+      "loss": 4.1024,
+      "step": 4773
+    },
+    {
+      "epoch": 0.04774,
+      "grad_norm": 0.7603001306934604,
+      "learning_rate": 0.003,
+      "loss": 4.0826,
+      "step": 4774
+    },
+    {
+      "epoch": 0.04775,
+      "grad_norm": 0.8346482465320779,
+      "learning_rate": 0.003,
+      "loss": 4.0835,
+      "step": 4775
+    },
+    {
+      "epoch": 0.04776,
+      "grad_norm": 0.9773364899564101,
+      "learning_rate": 0.003,
+      "loss": 4.0729,
+      "step": 4776
+    },
+    {
+      "epoch": 0.04777,
+      "grad_norm": 1.0784655700916257,
+      "learning_rate": 0.003,
+      "loss": 4.0746,
+      "step": 4777
+    },
+    {
+      "epoch": 0.04778,
+      "grad_norm": 0.9412592454712865,
+      "learning_rate": 0.003,
+      "loss": 4.0479,
+      "step": 4778
+    },
+    {
+      "epoch": 0.04779,
+      "grad_norm": 1.0145035098418551,
+      "learning_rate": 0.003,
+      "loss": 4.0918,
+      "step": 4779
+    },
+    {
+      "epoch": 0.0478,
+      "grad_norm": 1.0762920782151484,
+      "learning_rate": 0.003,
+      "loss": 4.0954,
+      "step": 4780
+    },
+    {
+      "epoch": 0.04781,
+      "grad_norm": 0.8651372390591274,
+      "learning_rate": 0.003,
+      "loss": 4.0699,
+      "step": 4781
+    },
+    {
+      "epoch": 0.04782,
+      "grad_norm": 0.8769680695281393,
+      "learning_rate": 0.003,
+      "loss": 4.0713,
+      "step": 4782
+    },
+    {
+      "epoch": 0.04783,
+      "grad_norm": 0.9787359970897872,
+      "learning_rate": 0.003,
+      "loss": 4.0678,
+      "step": 4783
+    },
+    {
+      "epoch": 0.04784,
+      "grad_norm": 0.9721858465952792,
+      "learning_rate": 0.003,
+      "loss": 4.0917,
+      "step": 4784
+    },
+    {
+      "epoch": 0.04785,
+      "grad_norm": 1.0635527582181876,
+      "learning_rate": 0.003,
+      "loss": 4.0951,
+      "step": 4785
+    },
+    {
+      "epoch": 0.04786,
+      "grad_norm": 0.9396709467834784,
+      "learning_rate": 0.003,
+      "loss": 4.0965,
+      "step": 4786
+    },
+    {
+      "epoch": 0.04787,
+      "grad_norm": 1.00148310702931,
+      "learning_rate": 0.003,
+      "loss": 4.0914,
+      "step": 4787
+    },
+    {
+      "epoch": 0.04788,
+      "grad_norm": 0.8743917541049051,
+      "learning_rate": 0.003,
+      "loss": 4.0745,
+      "step": 4788
+    },
+    {
+      "epoch": 0.04789,
+      "grad_norm": 0.7884230141024988,
+      "learning_rate": 0.003,
+      "loss": 4.0576,
+      "step": 4789
+    },
+    {
+      "epoch": 0.0479,
+      "grad_norm": 0.8498046311242973,
+      "learning_rate": 0.003,
+      "loss": 4.0738,
+      "step": 4790
+    },
+    {
+      "epoch": 0.04791,
+      "grad_norm": 0.956319683384673,
+      "learning_rate": 0.003,
+      "loss": 4.095,
+      "step": 4791
+    },
+    {
+      "epoch": 0.04792,
+      "grad_norm": 1.228255623374456,
+      "learning_rate": 0.003,
+      "loss": 4.1159,
+      "step": 4792
+    },
+    {
+      "epoch": 0.04793,
+      "grad_norm": 0.9294780478699636,
+      "learning_rate": 0.003,
+      "loss": 4.0688,
+      "step": 4793
+    },
+    {
+      "epoch": 0.04794,
+      "grad_norm": 0.9672358732016223,
+      "learning_rate": 0.003,
+      "loss": 4.0907,
+      "step": 4794
+    },
+    {
+      "epoch": 0.04795,
+      "grad_norm": 1.0525050179084454,
+      "learning_rate": 0.003,
+      "loss": 4.0955,
+      "step": 4795
+    },
+    {
+      "epoch": 0.04796,
+      "grad_norm": 1.1544771283976443,
+      "learning_rate": 0.003,
+      "loss": 4.0651,
+      "step": 4796
+    },
+    {
+      "epoch": 0.04797,
+      "grad_norm": 0.915501476651925,
+      "learning_rate": 0.003,
+      "loss": 4.0978,
+      "step": 4797
+    },
+    {
+      "epoch": 0.04798,
+      "grad_norm": 0.8655114100315004,
+      "learning_rate": 0.003,
+      "loss": 4.0542,
+      "step": 4798
+    },
+    {
+      "epoch": 0.04799,
+      "grad_norm": 0.9840921488311046,
+      "learning_rate": 0.003,
+      "loss": 4.0695,
+      "step": 4799
+    },
+    {
+      "epoch": 0.048,
+      "grad_norm": 0.9567911011323519,
+      "learning_rate": 0.003,
+      "loss": 4.0744,
+      "step": 4800
+    },
+    {
+      "epoch": 0.04801,
+      "grad_norm": 1.0260566498791253,
+      "learning_rate": 0.003,
+      "loss": 4.0849,
+      "step": 4801
+    },
+    {
+      "epoch": 0.04802,
+      "grad_norm": 0.9765671371087857,
+      "learning_rate": 0.003,
+      "loss": 4.0589,
+      "step": 4802
+    },
+    {
+      "epoch": 0.04803,
+      "grad_norm": 0.8954590032392303,
+      "learning_rate": 0.003,
+      "loss": 4.067,
+      "step": 4803
+    },
+    {
+      "epoch": 0.04804,
+      "grad_norm": 0.9397394535107887,
+      "learning_rate": 0.003,
+      "loss": 4.1015,
+      "step": 4804
+    },
+    {
+      "epoch": 0.04805,
+      "grad_norm": 0.8330259017268677,
+      "learning_rate": 0.003,
+      "loss": 4.0977,
+      "step": 4805
+    },
+    {
+      "epoch": 0.04806,
+      "grad_norm": 0.8897845335382429,
+      "learning_rate": 0.003,
+      "loss": 4.0469,
+      "step": 4806
+    },
+    {
+      "epoch": 0.04807,
+      "grad_norm": 0.8951815464379674,
+      "learning_rate": 0.003,
+      "loss": 4.0989,
+      "step": 4807
+    },
+    {
+      "epoch": 0.04808,
+      "grad_norm": 0.8091055618521581,
+      "learning_rate": 0.003,
+      "loss": 4.0753,
+      "step": 4808
+    },
+    {
+      "epoch": 0.04809,
+      "grad_norm": 0.90919679772142,
+      "learning_rate": 0.003,
+      "loss": 4.0697,
+      "step": 4809
+    },
+    {
+      "epoch": 0.0481,
+      "grad_norm": 1.0315510358211395,
+      "learning_rate": 0.003,
+      "loss": 4.0975,
+      "step": 4810
+    },
+    {
+      "epoch": 0.04811,
+      "grad_norm": 1.0721233459919157,
+      "learning_rate": 0.003,
+      "loss": 4.0916,
+      "step": 4811
+    },
+    {
+      "epoch": 0.04812,
+      "grad_norm": 1.0618859760248345,
+      "learning_rate": 0.003,
+      "loss": 4.0969,
+      "step": 4812
+    },
+    {
+      "epoch": 0.04813,
+      "grad_norm": 0.9313083894483718,
+      "learning_rate": 0.003,
+      "loss": 4.08,
+      "step": 4813
+    },
+    {
+      "epoch": 0.04814,
+      "grad_norm": 0.8584027305263373,
+      "learning_rate": 0.003,
+      "loss": 4.0354,
+      "step": 4814
+    },
+    {
+      "epoch": 0.04815,
+      "grad_norm": 0.8633671828950363,
+      "learning_rate": 0.003,
+      "loss": 4.0765,
+      "step": 4815
+    },
+    {
+      "epoch": 0.04816,
+      "grad_norm": 1.0445614739609703,
+      "learning_rate": 0.003,
+      "loss": 4.0889,
+      "step": 4816
+    },
+    {
+      "epoch": 0.04817,
+      "grad_norm": 1.156557836130519,
+      "learning_rate": 0.003,
+      "loss": 4.0682,
+      "step": 4817
+    },
+    {
+      "epoch": 0.04818,
+      "grad_norm": 0.9056636167430384,
+      "learning_rate": 0.003,
+      "loss": 4.0728,
+      "step": 4818
+    },
+    {
+      "epoch": 0.04819,
+      "grad_norm": 0.9644804670241944,
+      "learning_rate": 0.003,
+      "loss": 4.0807,
+      "step": 4819
+    },
+    {
+      "epoch": 0.0482,
+      "grad_norm": 0.8579108360881226,
+      "learning_rate": 0.003,
+      "loss": 4.0677,
+      "step": 4820
+    },
+    {
+      "epoch": 0.04821,
+      "grad_norm": 0.738347738618665,
+      "learning_rate": 0.003,
+      "loss": 4.0927,
+      "step": 4821
+    },
+    {
+      "epoch": 0.04822,
+      "grad_norm": 0.77578338268237,
+      "learning_rate": 0.003,
+      "loss": 4.0683,
+      "step": 4822
+    },
+    {
+      "epoch": 0.04823,
+      "grad_norm": 0.7660224855432475,
+      "learning_rate": 0.003,
+      "loss": 4.0661,
+      "step": 4823
+    },
+    {
+      "epoch": 0.04824,
+      "grad_norm": 0.7699061557992386,
+      "learning_rate": 0.003,
+      "loss": 4.0523,
+      "step": 4824
+    },
+    {
+      "epoch": 0.04825,
+      "grad_norm": 0.9058101965573263,
+      "learning_rate": 0.003,
+      "loss": 4.072,
+      "step": 4825
+    },
+    {
+      "epoch": 0.04826,
+      "grad_norm": 1.1063436218286011,
+      "learning_rate": 0.003,
+      "loss": 4.083,
+      "step": 4826
+    },
+    {
+      "epoch": 0.04827,
+      "grad_norm": 0.8962605854230049,
+      "learning_rate": 0.003,
+      "loss": 4.0839,
+      "step": 4827
+    },
+    {
+      "epoch": 0.04828,
+      "grad_norm": 0.9401425903734211,
+      "learning_rate": 0.003,
+      "loss": 4.0922,
+      "step": 4828
+    },
+    {
+      "epoch": 0.04829,
+      "grad_norm": 1.008827045223568,
+      "learning_rate": 0.003,
+      "loss": 4.0779,
+      "step": 4829
+    },
+    {
+      "epoch": 0.0483,
+      "grad_norm": 1.1759354719434816,
+      "learning_rate": 0.003,
+      "loss": 4.094,
+      "step": 4830
+    },
+    {
+      "epoch": 0.04831,
+      "grad_norm": 0.9149475371771189,
+      "learning_rate": 0.003,
+      "loss": 4.0539,
+      "step": 4831
+    },
+    {
+      "epoch": 0.04832,
+      "grad_norm": 0.9417806047622538,
+      "learning_rate": 0.003,
+      "loss": 4.0804,
+      "step": 4832
+    },
+    {
+      "epoch": 0.04833,
+      "grad_norm": 0.8568983398054711,
+      "learning_rate": 0.003,
+      "loss": 4.0802,
+      "step": 4833
+    },
+    {
+      "epoch": 0.04834,
+      "grad_norm": 0.9118080863225161,
+      "learning_rate": 0.003,
+      "loss": 4.0515,
+      "step": 4834
+    },
+    {
+      "epoch": 0.04835,
+      "grad_norm": 0.8727113022874836,
+      "learning_rate": 0.003,
+      "loss": 4.091,
+      "step": 4835
+    },
+    {
+      "epoch": 0.04836,
+      "grad_norm": 0.7282176510987858,
+      "learning_rate": 0.003,
+      "loss": 4.0541,
+      "step": 4836
+    },
+    {
+      "epoch": 0.04837,
+      "grad_norm": 0.8743286431817264,
+      "learning_rate": 0.003,
+      "loss": 4.0812,
+      "step": 4837
+    },
+    {
+      "epoch": 0.04838,
+      "grad_norm": 1.0886280382022706,
+      "learning_rate": 0.003,
+      "loss": 4.0982,
+      "step": 4838
+    },
+    {
+      "epoch": 0.04839,
+      "grad_norm": 0.9289173758415538,
+      "learning_rate": 0.003,
+      "loss": 4.0784,
+      "step": 4839
+    },
+    {
+      "epoch": 0.0484,
+      "grad_norm": 0.9689477229941422,
+      "learning_rate": 0.003,
+      "loss": 4.0838,
+      "step": 4840
+    },
+    {
+      "epoch": 0.04841,
+      "grad_norm": 0.8968968888481653,
+      "learning_rate": 0.003,
+      "loss": 4.0773,
+      "step": 4841
+    },
+    {
+      "epoch": 0.04842,
+      "grad_norm": 0.7785478768547701,
+      "learning_rate": 0.003,
+      "loss": 4.0916,
+      "step": 4842
+    },
+    {
+      "epoch": 0.04843,
+      "grad_norm": 0.7603045256063694,
+      "learning_rate": 0.003,
+      "loss": 4.048,
+      "step": 4843
+    },
+    {
+      "epoch": 0.04844,
+      "grad_norm": 0.8780025270605276,
+      "learning_rate": 0.003,
+      "loss": 4.0815,
+      "step": 4844
+    },
+    {
+      "epoch": 0.04845,
+      "grad_norm": 0.9254852484657723,
+      "learning_rate": 0.003,
+      "loss": 4.0677,
+      "step": 4845
+    },
+    {
+      "epoch": 0.04846,
+      "grad_norm": 0.8987509251388802,
+      "learning_rate": 0.003,
+      "loss": 4.0889,
+      "step": 4846
+    },
+    {
+      "epoch": 0.04847,
+      "grad_norm": 0.8893955297373247,
+      "learning_rate": 0.003,
+      "loss": 4.0535,
+      "step": 4847
+    },
+    {
+      "epoch": 0.04848,
+      "grad_norm": 0.8912302394312529,
+      "learning_rate": 0.003,
+      "loss": 4.0973,
+      "step": 4848
+    },
+    {
+      "epoch": 0.04849,
+      "grad_norm": 0.8726877737099056,
+      "learning_rate": 0.003,
+      "loss": 4.0889,
+      "step": 4849
+    },
+    {
+      "epoch": 0.0485,
+      "grad_norm": 0.8314222830339668,
+      "learning_rate": 0.003,
+      "loss": 4.0765,
+      "step": 4850
+    },
+    {
+      "epoch": 0.04851,
+      "grad_norm": 0.7822732329238694,
+      "learning_rate": 0.003,
+      "loss": 4.0876,
+      "step": 4851
+    },
+    {
+      "epoch": 0.04852,
+      "grad_norm": 0.9092871934087126,
+      "learning_rate": 0.003,
+      "loss": 4.0282,
+      "step": 4852
+    },
+    {
+      "epoch": 0.04853,
+      "grad_norm": 1.2348482197912367,
+      "learning_rate": 0.003,
+      "loss": 4.0946,
+      "step": 4853
+    },
+    {
+      "epoch": 0.04854,
+      "grad_norm": 0.8979894181749922,
+      "learning_rate": 0.003,
+      "loss": 4.0863,
+      "step": 4854
+    },
+    {
+      "epoch": 0.04855,
+      "grad_norm": 0.8934339928496606,
+      "learning_rate": 0.003,
+      "loss": 4.0953,
+      "step": 4855
+    },
+    {
+      "epoch": 0.04856,
+      "grad_norm": 1.0136848244881436,
+      "learning_rate": 0.003,
+      "loss": 4.095,
+      "step": 4856
+    },
+    {
+      "epoch": 0.04857,
+      "grad_norm": 1.2509395241639578,
+      "learning_rate": 0.003,
+      "loss": 4.1218,
+      "step": 4857
+    },
+    {
+      "epoch": 0.04858,
+      "grad_norm": 0.9372409373437549,
+      "learning_rate": 0.003,
+      "loss": 4.0756,
+      "step": 4858
+    },
+    {
+      "epoch": 0.04859,
+      "grad_norm": 0.9546537840113466,
+      "learning_rate": 0.003,
+      "loss": 4.1102,
+      "step": 4859
+    },
+    {
+      "epoch": 0.0486,
+      "grad_norm": 1.082853408289471,
+      "learning_rate": 0.003,
+      "loss": 4.093,
+      "step": 4860
+    },
+    {
+      "epoch": 0.04861,
+      "grad_norm": 1.01675025574686,
+      "learning_rate": 0.003,
+      "loss": 4.086,
+      "step": 4861
+    },
+    {
+      "epoch": 0.04862,
+      "grad_norm": 0.9176376091053398,
+      "learning_rate": 0.003,
+      "loss": 4.0838,
+      "step": 4862
+    },
+    {
+      "epoch": 0.04863,
+      "grad_norm": 0.8568974528178754,
+      "learning_rate": 0.003,
+      "loss": 4.1013,
+      "step": 4863
+    },
+    {
+      "epoch": 0.04864,
+      "grad_norm": 0.8166420481645057,
+      "learning_rate": 0.003,
+      "loss": 4.0783,
+      "step": 4864
+    },
+    {
+      "epoch": 0.04865,
+      "grad_norm": 0.9234428400906274,
+      "learning_rate": 0.003,
+      "loss": 4.098,
+      "step": 4865
+    },
+    {
+      "epoch": 0.04866,
+      "grad_norm": 0.8738302868938767,
+      "learning_rate": 0.003,
+      "loss": 4.0999,
+      "step": 4866
+    },
+    {
+      "epoch": 0.04867,
+      "grad_norm": 0.85532860997535,
+      "learning_rate": 0.003,
+      "loss": 4.0715,
+      "step": 4867
+    },
+    {
+      "epoch": 0.04868,
+      "grad_norm": 0.7330290515083526,
+      "learning_rate": 0.003,
+      "loss": 4.0776,
+      "step": 4868
+    },
+    {
+      "epoch": 0.04869,
+      "grad_norm": 0.7237705668329806,
+      "learning_rate": 0.003,
+      "loss": 4.0879,
+      "step": 4869
+    },
+    {
+      "epoch": 0.0487,
+      "grad_norm": 0.7073551331896761,
+      "learning_rate": 0.003,
+      "loss": 4.0749,
+      "step": 4870
+    },
+    {
+      "epoch": 0.04871,
+      "grad_norm": 0.7482228718063552,
+      "learning_rate": 0.003,
+      "loss": 4.094,
+      "step": 4871
+    },
+    {
+      "epoch": 0.04872,
+      "grad_norm": 0.7934599742401708,
+      "learning_rate": 0.003,
+      "loss": 4.06,
+      "step": 4872
+    },
+    {
+      "epoch": 0.04873,
+      "grad_norm": 0.8520994037973396,
+      "learning_rate": 0.003,
+      "loss": 4.0851,
+      "step": 4873
+    },
+    {
+      "epoch": 0.04874,
+      "grad_norm": 0.9436390490861155,
+      "learning_rate": 0.003,
+      "loss": 4.0796,
+      "step": 4874
+    },
+    {
+      "epoch": 0.04875,
+      "grad_norm": 1.3353781337660462,
+      "learning_rate": 0.003,
+      "loss": 4.0849,
+      "step": 4875
+    },
+    {
+      "epoch": 0.04876,
+      "grad_norm": 0.8304614123526641,
+      "learning_rate": 0.003,
+      "loss": 4.0763,
+      "step": 4876
+    },
+    {
+      "epoch": 0.04877,
+      "grad_norm": 0.7707719582887104,
+      "learning_rate": 0.003,
+      "loss": 4.0776,
+      "step": 4877
+    },
+    {
+      "epoch": 0.04878,
+      "grad_norm": 0.9308445091065936,
+      "learning_rate": 0.003,
+      "loss": 4.0616,
+      "step": 4878
+    },
+    {
+      "epoch": 0.04879,
+      "grad_norm": 0.9815137055251556,
+      "learning_rate": 0.003,
+      "loss": 4.057,
+      "step": 4879
+    },
+    {
+      "epoch": 0.0488,
+      "grad_norm": 1.3666093708712401,
+      "learning_rate": 0.003,
+      "loss": 4.0795,
+      "step": 4880
+    },
+    {
+      "epoch": 0.04881,
+      "grad_norm": 0.8702473732106857,
+      "learning_rate": 0.003,
+      "loss": 4.0669,
+      "step": 4881
+    },
+    {
+      "epoch": 0.04882,
+      "grad_norm": 0.879457851478898,
+      "learning_rate": 0.003,
+      "loss": 4.0728,
+      "step": 4882
+    },
+    {
+      "epoch": 0.04883,
+      "grad_norm": 0.8303309342477992,
+      "learning_rate": 0.003,
+      "loss": 4.0649,
+      "step": 4883
+    },
+    {
+      "epoch": 0.04884,
+      "grad_norm": 0.8052075077532388,
+      "learning_rate": 0.003,
+      "loss": 4.0678,
+      "step": 4884
+    },
+    {
+      "epoch": 0.04885,
+      "grad_norm": 0.8973915391486328,
+      "learning_rate": 0.003,
+      "loss": 4.0818,
+      "step": 4885
+    },
+    {
+      "epoch": 0.04886,
+      "grad_norm": 1.0019930120717506,
+      "learning_rate": 0.003,
+      "loss": 4.078,
+      "step": 4886
+    },
+    {
+      "epoch": 0.04887,
+      "grad_norm": 1.347620612956241,
+      "learning_rate": 0.003,
+      "loss": 4.1003,
+      "step": 4887
+    },
+    {
+      "epoch": 0.04888,
+      "grad_norm": 0.8778759238009314,
+      "learning_rate": 0.003,
+      "loss": 4.0927,
+      "step": 4888
+    },
+    {
+      "epoch": 0.04889,
+      "grad_norm": 1.0906727199530957,
+      "learning_rate": 0.003,
+      "loss": 4.0783,
+      "step": 4889
+    },
+    {
+      "epoch": 0.0489,
+      "grad_norm": 0.9502197503744106,
+      "learning_rate": 0.003,
+      "loss": 4.0826,
+      "step": 4890
+    },
+    {
+      "epoch": 0.04891,
+      "grad_norm": 0.9462880674223806,
+      "learning_rate": 0.003,
+      "loss": 4.0929,
+      "step": 4891
+    },
+    {
+      "epoch": 0.04892,
+      "grad_norm": 0.9960546118887255,
+      "learning_rate": 0.003,
+      "loss": 4.0933,
+      "step": 4892
+    },
+    {
+      "epoch": 0.04893,
+      "grad_norm": 0.9133067753769846,
+      "learning_rate": 0.003,
+      "loss": 4.0906,
+      "step": 4893
+    },
+    {
+      "epoch": 0.04894,
+      "grad_norm": 0.910365622657426,
+      "learning_rate": 0.003,
+      "loss": 4.1016,
+      "step": 4894
+    },
+    {
+      "epoch": 0.04895,
+      "grad_norm": 1.0152626137013607,
+      "learning_rate": 0.003,
+      "loss": 4.0797,
+      "step": 4895
+    },
+    {
+      "epoch": 0.04896,
+      "grad_norm": 0.9625608021661926,
+      "learning_rate": 0.003,
+      "loss": 4.096,
+      "step": 4896
+    },
+    {
+      "epoch": 0.04897,
+      "grad_norm": 0.9603064678490546,
+      "learning_rate": 0.003,
+      "loss": 4.1006,
+      "step": 4897
+    },
+    {
+      "epoch": 0.04898,
+      "grad_norm": 1.0125115002322624,
+      "learning_rate": 0.003,
+      "loss": 4.0908,
+      "step": 4898
+    },
+    {
+      "epoch": 0.04899,
+      "grad_norm": 1.0091142549096461,
+      "learning_rate": 0.003,
+      "loss": 4.079,
+      "step": 4899
+    },
+    {
+      "epoch": 0.049,
+      "grad_norm": 1.1619884083457441,
+      "learning_rate": 0.003,
+      "loss": 4.1115,
+      "step": 4900
+    },
+    {
+      "epoch": 0.04901,
+      "grad_norm": 0.8775379815040303,
+      "learning_rate": 0.003,
+      "loss": 4.0646,
+      "step": 4901
+    },
+    {
+      "epoch": 0.04902,
+      "grad_norm": 0.793220630533508,
+      "learning_rate": 0.003,
+      "loss": 4.095,
+      "step": 4902
+    },
+    {
+      "epoch": 0.04903,
+      "grad_norm": 0.8017930562442611,
+      "learning_rate": 0.003,
+      "loss": 4.0951,
+      "step": 4903
+    },
+    {
+      "epoch": 0.04904,
+      "grad_norm": 0.8507583142575733,
+      "learning_rate": 0.003,
+      "loss": 4.0616,
+      "step": 4904
+    },
+    {
+      "epoch": 0.04905,
+      "grad_norm": 0.9366175720443591,
+      "learning_rate": 0.003,
+      "loss": 4.0647,
+      "step": 4905
+    },
+    {
+      "epoch": 0.04906,
+      "grad_norm": 1.0894519512603444,
+      "learning_rate": 0.003,
+      "loss": 4.0797,
+      "step": 4906
+    },
+    {
+      "epoch": 0.04907,
+      "grad_norm": 0.9566147882681758,
+      "learning_rate": 0.003,
+      "loss": 4.0613,
+      "step": 4907
+    },
+    {
+      "epoch": 0.04908,
+      "grad_norm": 0.8813724661117732,
+      "learning_rate": 0.003,
+      "loss": 4.0732,
+      "step": 4908
+    },
+    {
+      "epoch": 0.04909,
+      "grad_norm": 0.7697604768117718,
+      "learning_rate": 0.003,
+      "loss": 4.0773,
+      "step": 4909
+    },
+    {
+      "epoch": 0.0491,
+      "grad_norm": 0.9551094520562223,
+      "learning_rate": 0.003,
+      "loss": 4.071,
+      "step": 4910
+    },
+    {
+      "epoch": 0.04911,
+      "grad_norm": 1.5155833386194069,
+      "learning_rate": 0.003,
+      "loss": 4.0824,
+      "step": 4911
+    },
+    {
+      "epoch": 0.04912,
+      "grad_norm": 0.9025182185743093,
+      "learning_rate": 0.003,
+      "loss": 4.0507,
+      "step": 4912
+    },
+    {
+      "epoch": 0.04913,
+      "grad_norm": 0.7963791253987615,
+      "learning_rate": 0.003,
+      "loss": 4.0699,
+      "step": 4913
+    },
+    {
+      "epoch": 0.04914,
+      "grad_norm": 0.7975682531864906,
+      "learning_rate": 0.003,
+      "loss": 4.074,
+      "step": 4914
+    },
+    {
+      "epoch": 0.04915,
+      "grad_norm": 0.7680989535713021,
+      "learning_rate": 0.003,
+      "loss": 4.0798,
+      "step": 4915
+    },
+    {
+      "epoch": 0.04916,
+      "grad_norm": 0.8541651719195734,
+      "learning_rate": 0.003,
+      "loss": 4.0782,
+      "step": 4916
+    },
+    {
+      "epoch": 0.04917,
+      "grad_norm": 0.9306809194401052,
+      "learning_rate": 0.003,
+      "loss": 4.0395,
+      "step": 4917
+    },
+    {
+      "epoch": 0.04918,
+      "grad_norm": 1.0201811580852054,
+      "learning_rate": 0.003,
+      "loss": 4.0805,
+      "step": 4918
+    },
+    {
+      "epoch": 0.04919,
+      "grad_norm": 0.9164268845754078,
+      "learning_rate": 0.003,
+      "loss": 4.0783,
+      "step": 4919
+    },
+    {
+      "epoch": 0.0492,
+      "grad_norm": 0.8305372570501433,
+      "learning_rate": 0.003,
+      "loss": 4.0765,
+      "step": 4920
+    },
+    {
+      "epoch": 0.04921,
+      "grad_norm": 0.7707198235784101,
+      "learning_rate": 0.003,
+      "loss": 4.0926,
+      "step": 4921
+    },
+    {
+      "epoch": 0.04922,
+      "grad_norm": 0.7928692496541254,
+      "learning_rate": 0.003,
+      "loss": 4.0671,
+      "step": 4922
+    },
+    {
+      "epoch": 0.04923,
+      "grad_norm": 0.7521164440028039,
+      "learning_rate": 0.003,
+      "loss": 4.045,
+      "step": 4923
+    },
+    {
+      "epoch": 0.04924,
+      "grad_norm": 0.8188114735033863,
+      "learning_rate": 0.003,
+      "loss": 4.0991,
+      "step": 4924
+    },
+    {
+      "epoch": 0.04925,
+      "grad_norm": 0.8564277609742044,
+      "learning_rate": 0.003,
+      "loss": 4.051,
+      "step": 4925
+    },
+    {
+      "epoch": 0.04926,
+      "grad_norm": 0.8486097281410112,
+      "learning_rate": 0.003,
+      "loss": 4.0563,
+      "step": 4926
+    },
+    {
+      "epoch": 0.04927,
+      "grad_norm": 0.889369882812886,
+      "learning_rate": 0.003,
+      "loss": 4.0896,
+      "step": 4927
+    },
+    {
+      "epoch": 0.04928,
+      "grad_norm": 1.040367513666876,
+      "learning_rate": 0.003,
+      "loss": 4.0578,
+      "step": 4928
+    },
+    {
+      "epoch": 0.04929,
+      "grad_norm": 1.1046422380445349,
+      "learning_rate": 0.003,
+      "loss": 4.0775,
+      "step": 4929
+    },
+    {
+      "epoch": 0.0493,
+      "grad_norm": 0.8664686841477672,
+      "learning_rate": 0.003,
+      "loss": 4.0646,
+      "step": 4930
+    },
+    {
+      "epoch": 0.04931,
+      "grad_norm": 0.9761798460720659,
+      "learning_rate": 0.003,
+      "loss": 4.0523,
+      "step": 4931
+    },
+    {
+      "epoch": 0.04932,
+      "grad_norm": 1.0969681843858505,
+      "learning_rate": 0.003,
+      "loss": 4.1055,
+      "step": 4932
+    },
+    {
+      "epoch": 0.04933,
+      "grad_norm": 0.8634620800542036,
+      "learning_rate": 0.003,
+      "loss": 4.0746,
+      "step": 4933
+    },
+    {
+      "epoch": 0.04934,
+      "grad_norm": 1.0167878960199042,
+      "learning_rate": 0.003,
+      "loss": 4.1067,
+      "step": 4934
+    },
+    {
+      "epoch": 0.04935,
+      "grad_norm": 1.2105132259455145,
+      "learning_rate": 0.003,
+      "loss": 4.1103,
+      "step": 4935
+    },
+    {
+      "epoch": 0.04936,
+      "grad_norm": 0.8823692534511746,
+      "learning_rate": 0.003,
+      "loss": 4.0749,
+      "step": 4936
+    },
+    {
+      "epoch": 0.04937,
+      "grad_norm": 1.0035117633460957,
+      "learning_rate": 0.003,
+      "loss": 4.0734,
+      "step": 4937
+    },
+    {
+      "epoch": 0.04938,
+      "grad_norm": 1.21119716146334,
+      "learning_rate": 0.003,
+      "loss": 4.0877,
+      "step": 4938
+    },
+    {
+      "epoch": 0.04939,
+      "grad_norm": 0.9542017309667814,
+      "learning_rate": 0.003,
+      "loss": 4.093,
+      "step": 4939
+    },
+    {
+      "epoch": 0.0494,
+      "grad_norm": 0.9693778311510872,
+      "learning_rate": 0.003,
+      "loss": 4.0976,
+      "step": 4940
+    },
+    {
+      "epoch": 0.04941,
+      "grad_norm": 1.1934773830149854,
+      "learning_rate": 0.003,
+      "loss": 4.0758,
+      "step": 4941
+    },
+    {
+      "epoch": 0.04942,
+      "grad_norm": 0.9941479800765747,
+      "learning_rate": 0.003,
+      "loss": 4.0833,
+      "step": 4942
+    },
+    {
+      "epoch": 0.04943,
+      "grad_norm": 1.2980963908573873,
+      "learning_rate": 0.003,
+      "loss": 4.1165,
+      "step": 4943
+    },
+    {
+      "epoch": 0.04944,
+      "grad_norm": 0.7685905125841351,
+      "learning_rate": 0.003,
+      "loss": 4.1046,
+      "step": 4944
+    },
+    {
+      "epoch": 0.04945,
+      "grad_norm": 0.77621337036946,
+      "learning_rate": 0.003,
+      "loss": 4.094,
+      "step": 4945
+    },
+    {
+      "epoch": 0.04946,
+      "grad_norm": 0.7935358182030233,
+      "learning_rate": 0.003,
+      "loss": 4.1189,
+      "step": 4946
+    },
+    {
+      "epoch": 0.04947,
+      "grad_norm": 0.95777398203718,
+      "learning_rate": 0.003,
+      "loss": 4.0637,
+      "step": 4947
+    },
+    {
+      "epoch": 0.04948,
+      "grad_norm": 1.0505854626251887,
+      "learning_rate": 0.003,
+      "loss": 4.0838,
+      "step": 4948
+    },
+    {
+      "epoch": 0.04949,
+      "grad_norm": 0.9843835143059065,
+      "learning_rate": 0.003,
+      "loss": 4.0889,
+      "step": 4949
+    },
+    {
+      "epoch": 0.0495,
+      "grad_norm": 0.9820238950682217,
+      "learning_rate": 0.003,
+      "loss": 4.0503,
+      "step": 4950
+    },
+    {
+      "epoch": 0.04951,
+      "grad_norm": 1.0176777494089932,
+      "learning_rate": 0.003,
+      "loss": 4.0613,
+      "step": 4951
+    },
+    {
+      "epoch": 0.04952,
+      "grad_norm": 0.8631784047281243,
+      "learning_rate": 0.003,
+      "loss": 4.0956,
+      "step": 4952
+    },
+    {
+      "epoch": 0.04953,
+      "grad_norm": 0.8078946550955691,
+      "learning_rate": 0.003,
+      "loss": 4.0754,
+      "step": 4953
+    },
+    {
+      "epoch": 0.04954,
+      "grad_norm": 0.8107838454898298,
+      "learning_rate": 0.003,
+      "loss": 4.0998,
+      "step": 4954
+    },
+    {
+      "epoch": 0.04955,
+      "grad_norm": 0.6872102056615723,
+      "learning_rate": 0.003,
+      "loss": 4.0695,
+      "step": 4955
+    },
+    {
+      "epoch": 0.04956,
+      "grad_norm": 0.7000353354694138,
+      "learning_rate": 0.003,
+      "loss": 4.0861,
+      "step": 4956
+    },
+    {
+      "epoch": 0.04957,
+      "grad_norm": 0.768512912596234,
+      "learning_rate": 0.003,
+      "loss": 4.0543,
+      "step": 4957
+    },
+    {
+      "epoch": 0.04958,
+      "grad_norm": 0.773414971171959,
+      "learning_rate": 0.003,
+      "loss": 4.0715,
+      "step": 4958
+    },
+    {
+      "epoch": 0.04959,
+      "grad_norm": 0.6897656034365274,
+      "learning_rate": 0.003,
+      "loss": 4.0752,
+      "step": 4959
+    },
+    {
+      "epoch": 0.0496,
+      "grad_norm": 0.5963565715617187,
+      "learning_rate": 0.003,
+      "loss": 4.0435,
+      "step": 4960
+    },
+    {
+      "epoch": 0.04961,
+      "grad_norm": 0.6834698784405567,
+      "learning_rate": 0.003,
+      "loss": 4.0489,
+      "step": 4961
+    },
+    {
+      "epoch": 0.04962,
+      "grad_norm": 0.7972061518569966,
+      "learning_rate": 0.003,
+      "loss": 4.0654,
+      "step": 4962
+    },
+    {
+      "epoch": 0.04963,
+      "grad_norm": 0.8471174104945988,
+      "learning_rate": 0.003,
+      "loss": 4.1044,
+      "step": 4963
+    },
+    {
+      "epoch": 0.04964,
+      "grad_norm": 0.8890963190037607,
+      "learning_rate": 0.003,
+      "loss": 4.079,
+      "step": 4964
+    },
+    {
+      "epoch": 0.04965,
+      "grad_norm": 0.9621708392835049,
+      "learning_rate": 0.003,
+      "loss": 4.0829,
+      "step": 4965
+    },
+    {
+      "epoch": 0.04966,
+      "grad_norm": 1.2746050729963243,
+      "learning_rate": 0.003,
+      "loss": 4.0627,
+      "step": 4966
+    },
+    {
+      "epoch": 0.04967,
+      "grad_norm": 0.9487241879206226,
+      "learning_rate": 0.003,
+      "loss": 4.0831,
+      "step": 4967
+    },
+    {
+      "epoch": 0.04968,
+      "grad_norm": 1.0115561803434678,
+      "learning_rate": 0.003,
+      "loss": 4.1113,
+      "step": 4968
+    },
+    {
+      "epoch": 0.04969,
+      "grad_norm": 1.0743944507868857,
+      "learning_rate": 0.003,
+      "loss": 4.0867,
+      "step": 4969
+    },
+    {
+      "epoch": 0.0497,
+      "grad_norm": 0.9957497513808655,
+      "learning_rate": 0.003,
+      "loss": 4.0797,
+      "step": 4970
+    },
+    {
+      "epoch": 0.04971,
+      "grad_norm": 0.9301130535679807,
+      "learning_rate": 0.003,
+      "loss": 4.0988,
+      "step": 4971
+    },
+    {
+      "epoch": 0.04972,
+      "grad_norm": 1.0252366398011912,
+      "learning_rate": 0.003,
+      "loss": 4.0737,
+      "step": 4972
+    },
+    {
+      "epoch": 0.04973,
+      "grad_norm": 1.0321786535910968,
+      "learning_rate": 0.003,
+      "loss": 4.1218,
+      "step": 4973
+    },
+    {
+      "epoch": 0.04974,
+      "grad_norm": 1.3675268731180446,
+      "learning_rate": 0.003,
+      "loss": 4.0929,
+      "step": 4974
+    },
+    {
+      "epoch": 0.04975,
+      "grad_norm": 0.9689102407574199,
+      "learning_rate": 0.003,
+      "loss": 4.0839,
+      "step": 4975
+    },
+    {
+      "epoch": 0.04976,
+      "grad_norm": 1.034378936349661,
+      "learning_rate": 0.003,
+      "loss": 4.0842,
+      "step": 4976
+    },
+    {
+      "epoch": 0.04977,
+      "grad_norm": 1.0659592144925003,
+      "learning_rate": 0.003,
+      "loss": 4.0971,
+      "step": 4977
+    },
+    {
+      "epoch": 0.04978,
+      "grad_norm": 0.9463690418294757,
+      "learning_rate": 0.003,
+      "loss": 4.0943,
+      "step": 4978
+    },
+    {
+      "epoch": 0.04979,
+      "grad_norm": 1.0778624556709335,
+      "learning_rate": 0.003,
+      "loss": 4.1048,
+      "step": 4979
+    },
+    {
+      "epoch": 0.0498,
+      "grad_norm": 1.1912796103263936,
+      "learning_rate": 0.003,
+      "loss": 4.0978,
+      "step": 4980
+    },
+    {
+      "epoch": 0.04981,
+      "grad_norm": 0.9428519138134729,
+      "learning_rate": 0.003,
+      "loss": 4.0631,
+      "step": 4981
+    },
+    {
+      "epoch": 0.04982,
+      "grad_norm": 0.9114023013651604,
+      "learning_rate": 0.003,
+      "loss": 4.0867,
+      "step": 4982
+    },
+    {
+      "epoch": 0.04983,
+      "grad_norm": 0.843362002569619,
+      "learning_rate": 0.003,
+      "loss": 4.0415,
+      "step": 4983
+    },
+    {
+      "epoch": 0.04984,
+      "grad_norm": 0.7329533030111096,
+      "learning_rate": 0.003,
+      "loss": 4.0463,
+      "step": 4984
+    },
+    {
+      "epoch": 0.04985,
+      "grad_norm": 0.8771158538637082,
+      "learning_rate": 0.003,
+      "loss": 4.0766,
+      "step": 4985
+    },
+    {
+      "epoch": 0.04986,
+      "grad_norm": 0.9492314223910526,
+      "learning_rate": 0.003,
+      "loss": 4.0937,
+      "step": 4986
+    },
+    {
+      "epoch": 0.04987,
+      "grad_norm": 0.9063125309075447,
+      "learning_rate": 0.003,
+      "loss": 4.0605,
+      "step": 4987
+    },
+    {
+      "epoch": 0.04988,
+      "grad_norm": 0.8211391661395855,
+      "learning_rate": 0.003,
+      "loss": 4.09,
+      "step": 4988
+    },
+    {
+      "epoch": 0.04989,
+      "grad_norm": 0.87309817323828,
+      "learning_rate": 0.003,
+      "loss": 4.0952,
+      "step": 4989
+    },
+    {
+      "epoch": 0.0499,
+      "grad_norm": 0.8139798258933807,
+      "learning_rate": 0.003,
+      "loss": 4.0704,
+      "step": 4990
+    },
+    {
+      "epoch": 0.04991,
+      "grad_norm": 0.8390355952116492,
+      "learning_rate": 0.003,
+      "loss": 4.0561,
+      "step": 4991
+    },
+    {
+      "epoch": 0.04992,
+      "grad_norm": 0.9935950349965598,
+      "learning_rate": 0.003,
+      "loss": 4.0456,
+      "step": 4992
+    },
+    {
+      "epoch": 0.04993,
+      "grad_norm": 1.0610402805979944,
+      "learning_rate": 0.003,
+      "loss": 4.0747,
+      "step": 4993
+    },
+    {
+      "epoch": 0.04994,
+      "grad_norm": 0.94882186728126,
+      "learning_rate": 0.003,
+      "loss": 4.0607,
+      "step": 4994
+    },
+    {
+      "epoch": 0.04995,
+      "grad_norm": 0.867388049906827,
+      "learning_rate": 0.003,
+      "loss": 4.0785,
+      "step": 4995
+    },
+    {
+      "epoch": 0.04996,
+      "grad_norm": 0.9650777431351639,
+      "learning_rate": 0.003,
+      "loss": 4.0629,
+      "step": 4996
+    },
+    {
+      "epoch": 0.04997,
+      "grad_norm": 0.9684613017855864,
+      "learning_rate": 0.003,
+      "loss": 4.0792,
+      "step": 4997
+    },
+    {
+      "epoch": 0.04998,
+      "grad_norm": 0.9878003417576896,
+      "learning_rate": 0.003,
+      "loss": 4.1043,
+      "step": 4998
+    },
+    {
+      "epoch": 0.04999,
+      "grad_norm": 0.953046742937088,
+      "learning_rate": 0.003,
+      "loss": 4.0636,
+      "step": 4999
+    },
+    {
+      "epoch": 0.05,
+      "grad_norm": 0.844866115668994,
+      "learning_rate": 0.003,
+      "loss": 4.1039,
+      "step": 5000
+    },
+    {
+      "epoch": 0.05001,
+      "grad_norm": 0.8155144329490257,
+      "learning_rate": 0.003,
+      "loss": 4.098,
+      "step": 5001
+    },
+    {
+      "epoch": 0.05002,
+      "grad_norm": 0.9071734968733114,
+      "learning_rate": 0.003,
+      "loss": 4.0731,
+      "step": 5002
+    },
+    {
+      "epoch": 0.05003,
+      "grad_norm": 1.0523308479446838,
+      "learning_rate": 0.003,
+      "loss": 4.092,
+      "step": 5003
+    },
+    {
+      "epoch": 0.05004,
+      "grad_norm": 1.0673372138869082,
+      "learning_rate": 0.003,
+      "loss": 4.0748,
+      "step": 5004
+    },
+    {
+      "epoch": 0.05005,
+      "grad_norm": 1.0190354983748227,
+      "learning_rate": 0.003,
+      "loss": 4.0976,
+      "step": 5005
+    },
+    {
+      "epoch": 0.05006,
+      "grad_norm": 1.0502501422500765,
+      "learning_rate": 0.003,
+      "loss": 4.0762,
+      "step": 5006
+    },
+    {
+      "epoch": 0.05007,
+      "grad_norm": 1.0050657203422344,
+      "learning_rate": 0.003,
+      "loss": 4.0922,
+      "step": 5007
+    },
+    {
+      "epoch": 0.05008,
+      "grad_norm": 1.0632875671321045,
+      "learning_rate": 0.003,
+      "loss": 4.063,
+      "step": 5008
+    },
+    {
+      "epoch": 0.05009,
+      "grad_norm": 0.7483220321269478,
+      "learning_rate": 0.003,
+      "loss": 4.0633,
+      "step": 5009
+    },
+    {
+      "epoch": 0.0501,
+      "grad_norm": 0.6604154006789746,
+      "learning_rate": 0.003,
+      "loss": 4.076,
+      "step": 5010
+    },
+    {
+      "epoch": 0.05011,
+      "grad_norm": 0.7127635769855144,
+      "learning_rate": 0.003,
+      "loss": 4.0771,
+      "step": 5011
+    },
+    {
+      "epoch": 0.05012,
+      "grad_norm": 0.8658367408694297,
+      "learning_rate": 0.003,
+      "loss": 4.0661,
+      "step": 5012
+    },
+    {
+      "epoch": 0.05013,
+      "grad_norm": 1.0526587712962345,
+      "learning_rate": 0.003,
+      "loss": 4.0761,
+      "step": 5013
+    },
+    {
+      "epoch": 0.05014,
+      "grad_norm": 1.2523503854735638,
+      "learning_rate": 0.003,
+      "loss": 4.0565,
+      "step": 5014
+    },
+    {
+      "epoch": 0.05015,
+      "grad_norm": 0.8154569818223713,
+      "learning_rate": 0.003,
+      "loss": 4.0894,
+      "step": 5015
+    },
+    {
+      "epoch": 0.05016,
+      "grad_norm": 0.8907021793572791,
+      "learning_rate": 0.003,
+      "loss": 4.0694,
+      "step": 5016
+    },
+    {
+      "epoch": 0.05017,
+      "grad_norm": 0.9052920115396309,
+      "learning_rate": 0.003,
+      "loss": 4.0847,
+      "step": 5017
+    },
+    {
+      "epoch": 0.05018,
+      "grad_norm": 0.8754711897047392,
+      "learning_rate": 0.003,
+      "loss": 4.0788,
+      "step": 5018
+    },
+    {
+      "epoch": 0.05019,
+      "grad_norm": 0.8774784593403933,
+      "learning_rate": 0.003,
+      "loss": 4.0979,
+      "step": 5019
+    },
+    {
+      "epoch": 0.0502,
+      "grad_norm": 0.9389866910530438,
+      "learning_rate": 0.003,
+      "loss": 4.0692,
+      "step": 5020
+    },
+    {
+      "epoch": 0.05021,
+      "grad_norm": 0.9591253397524557,
+      "learning_rate": 0.003,
+      "loss": 4.0722,
+      "step": 5021
+    },
+    {
+      "epoch": 0.05022,
+      "grad_norm": 1.1105484396556573,
+      "learning_rate": 0.003,
+      "loss": 4.0783,
+      "step": 5022
+    },
+    {
+      "epoch": 0.05023,
+      "grad_norm": 1.3065590449165418,
+      "learning_rate": 0.003,
+      "loss": 4.1089,
+      "step": 5023
+    },
+    {
+      "epoch": 0.05024,
+      "grad_norm": 0.9093611617258934,
+      "learning_rate": 0.003,
+      "loss": 4.0612,
+      "step": 5024
+    },
+    {
+      "epoch": 0.05025,
+      "grad_norm": 0.8149464138741495,
+      "learning_rate": 0.003,
+      "loss": 4.0977,
+      "step": 5025
+    },
+    {
+      "epoch": 0.05026,
+      "grad_norm": 0.8063387216832264,
+      "learning_rate": 0.003,
+      "loss": 4.0701,
+      "step": 5026
+    },
+    {
+      "epoch": 0.05027,
+      "grad_norm": 0.9928443146318517,
+      "learning_rate": 0.003,
+      "loss": 4.0895,
+      "step": 5027
+    },
+    {
+      "epoch": 0.05028,
+      "grad_norm": 1.1022004638491405,
+      "learning_rate": 0.003,
+      "loss": 4.0788,
+      "step": 5028
+    },
+    {
+      "epoch": 0.05029,
+      "grad_norm": 1.0254112827493802,
+      "learning_rate": 0.003,
+      "loss": 4.0926,
+      "step": 5029
+    },
+    {
+      "epoch": 0.0503,
+      "grad_norm": 0.9739559101423857,
+      "learning_rate": 0.003,
+      "loss": 4.0655,
+      "step": 5030
+    },
+    {
+      "epoch": 0.05031,
+      "grad_norm": 1.195596806455749,
+      "learning_rate": 0.003,
+      "loss": 4.0903,
+      "step": 5031
+    },
+    {
+      "epoch": 0.05032,
+      "grad_norm": 0.8132570593113878,
+      "learning_rate": 0.003,
+      "loss": 4.0818,
+      "step": 5032
+    },
+    {
+      "epoch": 0.05033,
+      "grad_norm": 0.7335990765850869,
+      "learning_rate": 0.003,
+      "loss": 4.0563,
+      "step": 5033
+    },
+    {
+      "epoch": 0.05034,
+      "grad_norm": 0.6554999770414173,
+      "learning_rate": 0.003,
+      "loss": 4.055,
+      "step": 5034
+    },
+    {
+      "epoch": 0.05035,
+      "grad_norm": 0.7532736427609922,
+      "learning_rate": 0.003,
+      "loss": 4.0786,
+      "step": 5035
+    },
+    {
+      "epoch": 0.05036,
+      "grad_norm": 0.8283547847263479,
+      "learning_rate": 0.003,
+      "loss": 4.0614,
+      "step": 5036
+    },
+    {
+      "epoch": 0.05037,
+      "grad_norm": 0.9606336416914282,
+      "learning_rate": 0.003,
+      "loss": 4.0616,
+      "step": 5037
+    },
+    {
+      "epoch": 0.05038,
+      "grad_norm": 1.1756105988578247,
+      "learning_rate": 0.003,
+      "loss": 4.0784,
+      "step": 5038
+    },
+    {
+      "epoch": 0.05039,
+      "grad_norm": 0.7260800281879718,
+      "learning_rate": 0.003,
+      "loss": 4.0816,
+      "step": 5039
+    },
+    {
+      "epoch": 0.0504,
+      "grad_norm": 0.6895678700118688,
+      "learning_rate": 0.003,
+      "loss": 4.0769,
+      "step": 5040
+    },
+    {
+      "epoch": 0.05041,
+      "grad_norm": 0.9054512238934349,
+      "learning_rate": 0.003,
+      "loss": 4.0834,
+      "step": 5041
+    },
+    {
+      "epoch": 0.05042,
+      "grad_norm": 1.0681246931889021,
+      "learning_rate": 0.003,
+      "loss": 4.0798,
+      "step": 5042
+    },
+    {
+      "epoch": 0.05043,
+      "grad_norm": 1.0081790953059804,
+      "learning_rate": 0.003,
+      "loss": 4.0765,
+      "step": 5043
+    },
+    {
+      "epoch": 0.05044,
+      "grad_norm": 1.0083278627521677,
+      "learning_rate": 0.003,
+      "loss": 4.095,
+      "step": 5044
+    },
+    {
+      "epoch": 0.05045,
+      "grad_norm": 1.1323410375959377,
+      "learning_rate": 0.003,
+      "loss": 4.1136,
+      "step": 5045
+    },
+    {
+      "epoch": 0.05046,
+      "grad_norm": 1.038046747857265,
+      "learning_rate": 0.003,
+      "loss": 4.0726,
+      "step": 5046
+    },
+    {
+      "epoch": 0.05047,
+      "grad_norm": 0.8846272273108629,
+      "learning_rate": 0.003,
+      "loss": 4.0807,
+      "step": 5047
+    },
+    {
+      "epoch": 0.05048,
+      "grad_norm": 1.039375733492065,
+      "learning_rate": 0.003,
+      "loss": 4.0824,
+      "step": 5048
+    },
+    {
+      "epoch": 0.05049,
+      "grad_norm": 1.0198096987245278,
+      "learning_rate": 0.003,
+      "loss": 4.0447,
+      "step": 5049
+    },
+    {
+      "epoch": 0.0505,
+      "grad_norm": 0.9914651248213096,
+      "learning_rate": 0.003,
+      "loss": 4.1331,
+      "step": 5050
+    },
+    {
+      "epoch": 0.05051,
+      "grad_norm": 0.9522133302767952,
+      "learning_rate": 0.003,
+      "loss": 4.0903,
+      "step": 5051
+    },
+    {
+      "epoch": 0.05052,
+      "grad_norm": 1.020178995716012,
+      "learning_rate": 0.003,
+      "loss": 4.0667,
+      "step": 5052
+    },
+    {
+      "epoch": 0.05053,
+      "grad_norm": 0.9136513702774933,
+      "learning_rate": 0.003,
+      "loss": 4.0395,
+      "step": 5053
+    },
+    {
+      "epoch": 0.05054,
+      "grad_norm": 0.8516625497650816,
+      "learning_rate": 0.003,
+      "loss": 4.0932,
+      "step": 5054
+    },
+    {
+      "epoch": 0.05055,
+      "grad_norm": 0.8444636192801542,
+      "learning_rate": 0.003,
+      "loss": 4.0714,
+      "step": 5055
+    },
+    {
+      "epoch": 0.05056,
+      "grad_norm": 0.8646124548092219,
+      "learning_rate": 0.003,
+      "loss": 4.0859,
+      "step": 5056
+    },
+    {
+      "epoch": 0.05057,
+      "grad_norm": 0.954021030827355,
+      "learning_rate": 0.003,
+      "loss": 4.0913,
+      "step": 5057
+    },
+    {
+      "epoch": 0.05058,
+      "grad_norm": 1.1354179546437237,
+      "learning_rate": 0.003,
+      "loss": 4.0972,
+      "step": 5058
+    },
+    {
+      "epoch": 0.05059,
+      "grad_norm": 0.8412247009608202,
+      "learning_rate": 0.003,
+      "loss": 4.0589,
+      "step": 5059
+    },
+    {
+      "epoch": 0.0506,
+      "grad_norm": 0.8258030312088399,
+      "learning_rate": 0.003,
+      "loss": 4.089,
+      "step": 5060
+    },
+    {
+      "epoch": 0.05061,
+      "grad_norm": 0.8796678252375678,
+      "learning_rate": 0.003,
+      "loss": 4.0694,
+      "step": 5061
+    },
+    {
+      "epoch": 0.05062,
+      "grad_norm": 0.9882911088154981,
+      "learning_rate": 0.003,
+      "loss": 4.055,
+      "step": 5062
+    },
+    {
+      "epoch": 0.05063,
+      "grad_norm": 0.9432186009346578,
+      "learning_rate": 0.003,
+      "loss": 4.0424,
+      "step": 5063
+    },
+    {
+      "epoch": 0.05064,
+      "grad_norm": 0.9485814708559243,
+      "learning_rate": 0.003,
+      "loss": 4.0823,
+      "step": 5064
+    },
+    {
+      "epoch": 0.05065,
+      "grad_norm": 0.9973027340770267,
+      "learning_rate": 0.003,
+      "loss": 4.0716,
+      "step": 5065
+    },
+    {
+      "epoch": 0.05066,
+      "grad_norm": 1.055670001942788,
+      "learning_rate": 0.003,
+      "loss": 4.0612,
+      "step": 5066
+    },
+    {
+      "epoch": 0.05067,
+      "grad_norm": 0.9244772620512077,
+      "learning_rate": 0.003,
+      "loss": 4.0698,
+      "step": 5067
+    },
+    {
+      "epoch": 0.05068,
+      "grad_norm": 0.958855347779479,
+      "learning_rate": 0.003,
+      "loss": 4.093,
+      "step": 5068
+    },
+    {
+      "epoch": 0.05069,
+      "grad_norm": 0.9074306933330328,
+      "learning_rate": 0.003,
+      "loss": 4.0826,
+      "step": 5069
+    },
+    {
+      "epoch": 0.0507,
+      "grad_norm": 0.7465736760883044,
+      "learning_rate": 0.003,
+      "loss": 4.0915,
+      "step": 5070
+    },
+    {
+      "epoch": 0.05071,
+      "grad_norm": 0.7907220925527945,
+      "learning_rate": 0.003,
+      "loss": 4.0797,
+      "step": 5071
+    },
+    {
+      "epoch": 0.05072,
+      "grad_norm": 0.8791832214881413,
+      "learning_rate": 0.003,
+      "loss": 4.0542,
+      "step": 5072
+    },
+    {
+      "epoch": 0.05073,
+      "grad_norm": 1.1193497551932123,
+      "learning_rate": 0.003,
+      "loss": 4.0625,
+      "step": 5073
+    },
+    {
+      "epoch": 0.05074,
+      "grad_norm": 1.007997199633059,
+      "learning_rate": 0.003,
+      "loss": 4.0568,
+      "step": 5074
+    },
+    {
+      "epoch": 0.05075,
+      "grad_norm": 0.9210544932505602,
+      "learning_rate": 0.003,
+      "loss": 4.0754,
+      "step": 5075
+    },
+    {
+      "epoch": 0.05076,
+      "grad_norm": 0.8971244157459003,
+      "learning_rate": 0.003,
+      "loss": 4.09,
+      "step": 5076
+    },
+    {
+      "epoch": 0.05077,
+      "grad_norm": 0.8845891734232929,
+      "learning_rate": 0.003,
+      "loss": 4.0928,
+      "step": 5077
+    },
+    {
+      "epoch": 0.05078,
+      "grad_norm": 0.9113721083676786,
+      "learning_rate": 0.003,
+      "loss": 4.0864,
+      "step": 5078
+    },
+    {
+      "epoch": 0.05079,
+      "grad_norm": 0.9084231790144179,
+      "learning_rate": 0.003,
+      "loss": 4.0586,
+      "step": 5079
+    },
+    {
+      "epoch": 0.0508,
+      "grad_norm": 1.0477669246554797,
+      "learning_rate": 0.003,
+      "loss": 4.0583,
+      "step": 5080
+    },
+    {
+      "epoch": 0.05081,
+      "grad_norm": 1.0274679159676723,
+      "learning_rate": 0.003,
+      "loss": 4.0841,
+      "step": 5081
+    },
+    {
+      "epoch": 0.05082,
+      "grad_norm": 0.9811435796751755,
+      "learning_rate": 0.003,
+      "loss": 4.0879,
+      "step": 5082
+    },
+    {
+      "epoch": 0.05083,
+      "grad_norm": 0.9115900027432353,
+      "learning_rate": 0.003,
+      "loss": 4.0503,
+      "step": 5083
+    },
+    {
+      "epoch": 0.05084,
+      "grad_norm": 0.9665650959530513,
+      "learning_rate": 0.003,
+      "loss": 4.0697,
+      "step": 5084
+    },
+    {
+      "epoch": 0.05085,
+      "grad_norm": 1.094785200544881,
+      "learning_rate": 0.003,
+      "loss": 4.0406,
+      "step": 5085
+    },
+    {
+      "epoch": 0.05086,
+      "grad_norm": 1.0395235168756072,
+      "learning_rate": 0.003,
+      "loss": 4.0495,
+      "step": 5086
+    },
+    {
+      "epoch": 0.05087,
+      "grad_norm": 1.1914983032220043,
+      "learning_rate": 0.003,
+      "loss": 4.0776,
+      "step": 5087
+    },
+    {
+      "epoch": 0.05088,
+      "grad_norm": 0.9164887728560919,
+      "learning_rate": 0.003,
+      "loss": 4.0725,
+      "step": 5088
+    },
+    {
+      "epoch": 0.05089,
+      "grad_norm": 0.9984418259866299,
+      "learning_rate": 0.003,
+      "loss": 4.0696,
+      "step": 5089
+    },
+    {
+      "epoch": 0.0509,
+      "grad_norm": 0.9643661674080093,
+      "learning_rate": 0.003,
+      "loss": 4.1039,
+      "step": 5090
+    },
+    {
+      "epoch": 0.05091,
+      "grad_norm": 0.9837794177673876,
+      "learning_rate": 0.003,
+      "loss": 4.067,
+      "step": 5091
+    },
+    {
+      "epoch": 0.05092,
+      "grad_norm": 0.8924921438341328,
+      "learning_rate": 0.003,
+      "loss": 4.0663,
+      "step": 5092
+    },
+    {
+      "epoch": 0.05093,
+      "grad_norm": 0.8256552974904879,
+      "learning_rate": 0.003,
+      "loss": 4.0814,
+      "step": 5093
+    },
+    {
+      "epoch": 0.05094,
+      "grad_norm": 0.9039163713516233,
+      "learning_rate": 0.003,
+      "loss": 4.0451,
+      "step": 5094
+    },
+    {
+      "epoch": 0.05095,
+      "grad_norm": 1.2383238891133954,
+      "learning_rate": 0.003,
+      "loss": 4.0887,
+      "step": 5095
+    },
+    {
+      "epoch": 0.05096,
+      "grad_norm": 0.9650892595888488,
+      "learning_rate": 0.003,
+      "loss": 4.074,
+      "step": 5096
+    },
+    {
+      "epoch": 0.05097,
+      "grad_norm": 0.9342147612808765,
+      "learning_rate": 0.003,
+      "loss": 4.0678,
+      "step": 5097
+    },
+    {
+      "epoch": 0.05098,
+      "grad_norm": 0.9150581045088569,
+      "learning_rate": 0.003,
+      "loss": 4.0596,
+      "step": 5098
+    },
+    {
+      "epoch": 0.05099,
+      "grad_norm": 1.0196257914961222,
+      "learning_rate": 0.003,
+      "loss": 4.0429,
+      "step": 5099
+    },
+    {
+      "epoch": 0.051,
+      "grad_norm": 1.0100480797466045,
+      "learning_rate": 0.003,
+      "loss": 4.0651,
+      "step": 5100
+    },
+    {
+      "epoch": 0.05101,
+      "grad_norm": 1.076613085076168,
+      "learning_rate": 0.003,
+      "loss": 4.1062,
+      "step": 5101
+    },
+    {
+      "epoch": 0.05102,
+      "grad_norm": 0.9177055767593738,
+      "learning_rate": 0.003,
+      "loss": 4.0748,
+      "step": 5102
+    },
+    {
+      "epoch": 0.05103,
+      "grad_norm": 0.7867724366196349,
+      "learning_rate": 0.003,
+      "loss": 4.0841,
+      "step": 5103
+    },
+    {
+      "epoch": 0.05104,
+      "grad_norm": 0.8457986934357559,
+      "learning_rate": 0.003,
+      "loss": 4.0646,
+      "step": 5104
+    },
+    {
+      "epoch": 0.05105,
+      "grad_norm": 0.8367744837015725,
+      "learning_rate": 0.003,
+      "loss": 4.055,
+      "step": 5105
+    },
+    {
+      "epoch": 0.05106,
+      "grad_norm": 0.8022124167980758,
+      "learning_rate": 0.003,
+      "loss": 4.0295,
+      "step": 5106
+    },
+    {
+      "epoch": 0.05107,
+      "grad_norm": 0.6952811544982499,
+      "learning_rate": 0.003,
+      "loss": 4.0528,
+      "step": 5107
+    },
+    {
+      "epoch": 0.05108,
+      "grad_norm": 0.7444855516396648,
+      "learning_rate": 0.003,
+      "loss": 4.1015,
+      "step": 5108
+    },
+    {
+      "epoch": 0.05109,
+      "grad_norm": 0.878589896084288,
+      "learning_rate": 0.003,
+      "loss": 4.0436,
+      "step": 5109
+    },
+    {
+      "epoch": 0.0511,
+      "grad_norm": 0.8745594675885457,
+      "learning_rate": 0.003,
+      "loss": 4.0782,
+      "step": 5110
+    },
+    {
+      "epoch": 0.05111,
+      "grad_norm": 1.0139622357551754,
+      "learning_rate": 0.003,
+      "loss": 4.0972,
+      "step": 5111
+    },
+    {
+      "epoch": 0.05112,
+      "grad_norm": 1.248129365292357,
+      "learning_rate": 0.003,
+      "loss": 4.0919,
+      "step": 5112
+    },
+    {
+      "epoch": 0.05113,
+      "grad_norm": 0.9885210087032184,
+      "learning_rate": 0.003,
+      "loss": 4.0743,
+      "step": 5113
+    },
+    {
+      "epoch": 0.05114,
+      "grad_norm": 0.9571971369820982,
+      "learning_rate": 0.003,
+      "loss": 4.0506,
+      "step": 5114
+    },
+    {
+      "epoch": 0.05115,
+      "grad_norm": 0.9106967184247701,
+      "learning_rate": 0.003,
+      "loss": 4.0784,
+      "step": 5115
+    },
+    {
+      "epoch": 0.05116,
+      "grad_norm": 1.053212984649482,
+      "learning_rate": 0.003,
+      "loss": 4.0904,
+      "step": 5116
+    },
+    {
+      "epoch": 0.05117,
+      "grad_norm": 0.9762026130378781,
+      "learning_rate": 0.003,
+      "loss": 4.066,
+      "step": 5117
+    },
+    {
+      "epoch": 0.05118,
+      "grad_norm": 0.9813459473673732,
+      "learning_rate": 0.003,
+      "loss": 4.0675,
+      "step": 5118
+    },
+    {
+      "epoch": 0.05119,
+      "grad_norm": 1.0553578473596843,
+      "learning_rate": 0.003,
+      "loss": 4.0545,
+      "step": 5119
+    },
+    {
+      "epoch": 0.0512,
+      "grad_norm": 1.070915563107028,
+      "learning_rate": 0.003,
+      "loss": 4.045,
+      "step": 5120
+    },
+    {
+      "epoch": 0.05121,
+      "grad_norm": 0.8748371385007153,
+      "learning_rate": 0.003,
+      "loss": 4.0596,
+      "step": 5121
+    },
+    {
+      "epoch": 0.05122,
+      "grad_norm": 0.960144764905716,
+      "learning_rate": 0.003,
+      "loss": 4.052,
+      "step": 5122
+    },
+    {
+      "epoch": 0.05123,
+      "grad_norm": 1.210628825057938,
+      "learning_rate": 0.003,
+      "loss": 4.0783,
+      "step": 5123
+    },
+    {
+      "epoch": 0.05124,
+      "grad_norm": 0.9135871799109425,
+      "learning_rate": 0.003,
+      "loss": 4.1068,
+      "step": 5124
+    },
+    {
+      "epoch": 0.05125,
+      "grad_norm": 0.9676901109206436,
+      "learning_rate": 0.003,
+      "loss": 4.0741,
+      "step": 5125
+    },
+    {
+      "epoch": 0.05126,
+      "grad_norm": 0.8403551330424058,
+      "learning_rate": 0.003,
+      "loss": 4.0649,
+      "step": 5126
+    },
+    {
+      "epoch": 0.05127,
+      "grad_norm": 0.7466734661308947,
+      "learning_rate": 0.003,
+      "loss": 4.0648,
+      "step": 5127
+    },
+    {
+      "epoch": 0.05128,
+      "grad_norm": 0.728346628542258,
+      "learning_rate": 0.003,
+      "loss": 4.0822,
+      "step": 5128
+    },
+    {
+      "epoch": 0.05129,
+      "grad_norm": 0.7017928229749225,
+      "learning_rate": 0.003,
+      "loss": 4.0485,
+      "step": 5129
+    },
+    {
+      "epoch": 0.0513,
+      "grad_norm": 0.7221803684673601,
+      "learning_rate": 0.003,
+      "loss": 4.0348,
+      "step": 5130
+    },
+    {
+      "epoch": 0.05131,
+      "grad_norm": 0.7808750308328987,
+      "learning_rate": 0.003,
+      "loss": 4.0746,
+      "step": 5131
+    },
+    {
+      "epoch": 0.05132,
+      "grad_norm": 0.8913684947688636,
+      "learning_rate": 0.003,
+      "loss": 4.0614,
+      "step": 5132
+    },
+    {
+      "epoch": 0.05133,
+      "grad_norm": 1.1454285750946989,
+      "learning_rate": 0.003,
+      "loss": 4.0771,
+      "step": 5133
+    },
+    {
+      "epoch": 0.05134,
+      "grad_norm": 0.9684078269337082,
+      "learning_rate": 0.003,
+      "loss": 4.0834,
+      "step": 5134
+    },
+    {
+      "epoch": 0.05135,
+      "grad_norm": 1.1615129798672017,
+      "learning_rate": 0.003,
+      "loss": 4.0576,
+      "step": 5135
+    },
+    {
+      "epoch": 0.05136,
+      "grad_norm": 0.8816470981524503,
+      "learning_rate": 0.003,
+      "loss": 4.0367,
+      "step": 5136
+    },
+    {
+      "epoch": 0.05137,
+      "grad_norm": 0.8808116644656396,
+      "learning_rate": 0.003,
+      "loss": 4.0813,
+      "step": 5137
+    },
+    {
+      "epoch": 0.05138,
+      "grad_norm": 1.030677698421762,
+      "learning_rate": 0.003,
+      "loss": 4.0754,
+      "step": 5138
+    },
+    {
+      "epoch": 0.05139,
+      "grad_norm": 1.1279946491267732,
+      "learning_rate": 0.003,
+      "loss": 4.0747,
+      "step": 5139
+    },
+    {
+      "epoch": 0.0514,
+      "grad_norm": 0.9370222106532408,
+      "learning_rate": 0.003,
+      "loss": 4.0676,
+      "step": 5140
+    },
+    {
+      "epoch": 0.05141,
+      "grad_norm": 0.8884938807254016,
+      "learning_rate": 0.003,
+      "loss": 4.1182,
+      "step": 5141
+    },
+    {
+      "epoch": 0.05142,
+      "grad_norm": 0.840850706102145,
+      "learning_rate": 0.003,
+      "loss": 4.0609,
+      "step": 5142
+    },
+    {
+      "epoch": 0.05143,
+      "grad_norm": 0.880595971401337,
+      "learning_rate": 0.003,
+      "loss": 4.0693,
+      "step": 5143
+    },
+    {
+      "epoch": 0.05144,
+      "grad_norm": 0.8704477269765287,
+      "learning_rate": 0.003,
+      "loss": 4.0567,
+      "step": 5144
+    },
+    {
+      "epoch": 0.05145,
+      "grad_norm": 0.8916851706228778,
+      "learning_rate": 0.003,
+      "loss": 4.055,
+      "step": 5145
+    },
+    {
+      "epoch": 0.05146,
+      "grad_norm": 0.9663208313115196,
+      "learning_rate": 0.003,
+      "loss": 4.0476,
+      "step": 5146
+    },
+    {
+      "epoch": 0.05147,
+      "grad_norm": 1.1070241756865764,
+      "learning_rate": 0.003,
+      "loss": 4.0845,
+      "step": 5147
+    },
+    {
+      "epoch": 0.05148,
+      "grad_norm": 0.9885155736285973,
+      "learning_rate": 0.003,
+      "loss": 4.0813,
+      "step": 5148
+    },
+    {
+      "epoch": 0.05149,
+      "grad_norm": 0.9670853678419679,
+      "learning_rate": 0.003,
+      "loss": 4.0561,
+      "step": 5149
+    },
+    {
+      "epoch": 0.0515,
+      "grad_norm": 0.9350123569517975,
+      "learning_rate": 0.003,
+      "loss": 4.0934,
+      "step": 5150
+    },
+    {
+      "epoch": 0.05151,
+      "grad_norm": 0.9079989062981128,
+      "learning_rate": 0.003,
+      "loss": 4.0546,
+      "step": 5151
+    },
+    {
+      "epoch": 0.05152,
+      "grad_norm": 0.9776926173785698,
+      "learning_rate": 0.003,
+      "loss": 4.0937,
+      "step": 5152
+    },
+    {
+      "epoch": 0.05153,
+      "grad_norm": 0.9166249719052514,
+      "learning_rate": 0.003,
+      "loss": 4.07,
+      "step": 5153
+    },
+    {
+      "epoch": 0.05154,
+      "grad_norm": 0.8423591519799284,
+      "learning_rate": 0.003,
+      "loss": 4.0811,
+      "step": 5154
+    },
+    {
+      "epoch": 0.05155,
+      "grad_norm": 0.9495784465505834,
+      "learning_rate": 0.003,
+      "loss": 4.0838,
+      "step": 5155
+    },
+    {
+      "epoch": 0.05156,
+      "grad_norm": 0.8711852658243976,
+      "learning_rate": 0.003,
+      "loss": 4.1039,
+      "step": 5156
+    },
+    {
+      "epoch": 0.05157,
+      "grad_norm": 0.9761077463552009,
+      "learning_rate": 0.003,
+      "loss": 4.0795,
+      "step": 5157
+    },
+    {
+      "epoch": 0.05158,
+      "grad_norm": 0.9931352807114164,
+      "learning_rate": 0.003,
+      "loss": 4.0791,
+      "step": 5158
+    },
+    {
+      "epoch": 0.05159,
+      "grad_norm": 1.1193695002600963,
+      "learning_rate": 0.003,
+      "loss": 4.0619,
+      "step": 5159
+    },
+    {
+      "epoch": 0.0516,
+      "grad_norm": 0.9267022082064607,
+      "learning_rate": 0.003,
+      "loss": 4.0636,
+      "step": 5160
+    },
+    {
+      "epoch": 0.05161,
+      "grad_norm": 0.9665280786599717,
+      "learning_rate": 0.003,
+      "loss": 4.0389,
+      "step": 5161
+    },
+    {
+      "epoch": 0.05162,
+      "grad_norm": 1.0869042755370812,
+      "learning_rate": 0.003,
+      "loss": 4.0735,
+      "step": 5162
+    },
+    {
+      "epoch": 0.05163,
+      "grad_norm": 1.0568255095175747,
+      "learning_rate": 0.003,
+      "loss": 4.0885,
+      "step": 5163
+    },
+    {
+      "epoch": 0.05164,
+      "grad_norm": 1.1739265193923354,
+      "learning_rate": 0.003,
+      "loss": 4.0693,
+      "step": 5164
+    },
+    {
+      "epoch": 0.05165,
+      "grad_norm": 0.9577726183728671,
+      "learning_rate": 0.003,
+      "loss": 4.0856,
+      "step": 5165
+    },
+    {
+      "epoch": 0.05166,
+      "grad_norm": 1.1352998589608443,
+      "learning_rate": 0.003,
+      "loss": 4.084,
+      "step": 5166
+    },
+    {
+      "epoch": 0.05167,
+      "grad_norm": 0.8717433391514583,
+      "learning_rate": 0.003,
+      "loss": 4.0888,
+      "step": 5167
+    },
+    {
+      "epoch": 0.05168,
+      "grad_norm": 0.870068697924481,
+      "learning_rate": 0.003,
+      "loss": 4.115,
+      "step": 5168
+    },
+    {
+      "epoch": 0.05169,
+      "grad_norm": 1.0023358246624934,
+      "learning_rate": 0.003,
+      "loss": 4.0695,
+      "step": 5169
+    },
+    {
+      "epoch": 0.0517,
+      "grad_norm": 1.2431734452049883,
+      "learning_rate": 0.003,
+      "loss": 4.0434,
+      "step": 5170
+    },
+    {
+      "epoch": 0.05171,
+      "grad_norm": 0.9116624433354724,
+      "learning_rate": 0.003,
+      "loss": 4.068,
+      "step": 5171
+    },
+    {
+      "epoch": 0.05172,
+      "grad_norm": 0.9323927967551716,
+      "learning_rate": 0.003,
+      "loss": 4.0832,
+      "step": 5172
+    },
+    {
+      "epoch": 0.05173,
+      "grad_norm": 1.054805595580774,
+      "learning_rate": 0.003,
+      "loss": 4.0957,
+      "step": 5173
+    },
+    {
+      "epoch": 0.05174,
+      "grad_norm": 1.122856751648051,
+      "learning_rate": 0.003,
+      "loss": 4.0906,
+      "step": 5174
+    },
+    {
+      "epoch": 0.05175,
+      "grad_norm": 0.9937471915910451,
+      "learning_rate": 0.003,
+      "loss": 4.0927,
+      "step": 5175
+    },
+    {
+      "epoch": 0.05176,
+      "grad_norm": 0.9840832744172103,
+      "learning_rate": 0.003,
+      "loss": 4.0595,
+      "step": 5176
+    },
+    {
+      "epoch": 0.05177,
+      "grad_norm": 0.7800980213457716,
+      "learning_rate": 0.003,
+      "loss": 4.0512,
+      "step": 5177
+    },
+    {
+      "epoch": 0.05178,
+      "grad_norm": 0.8673592583630511,
+      "learning_rate": 0.003,
+      "loss": 4.0513,
+      "step": 5178
+    },
+    {
+      "epoch": 0.05179,
+      "grad_norm": 0.8980602578393677,
+      "learning_rate": 0.003,
+      "loss": 4.07,
+      "step": 5179
+    },
+    {
+      "epoch": 0.0518,
+      "grad_norm": 1.0782172804196768,
+      "learning_rate": 0.003,
+      "loss": 4.0708,
+      "step": 5180
+    },
+    {
+      "epoch": 0.05181,
+      "grad_norm": 1.3393343782861993,
+      "learning_rate": 0.003,
+      "loss": 4.0904,
+      "step": 5181
+    },
+    {
+      "epoch": 0.05182,
+      "grad_norm": 0.6874879870433082,
+      "learning_rate": 0.003,
+      "loss": 4.0907,
+      "step": 5182
+    },
+    {
+      "epoch": 0.05183,
+      "grad_norm": 0.8933939115278953,
+      "learning_rate": 0.003,
+      "loss": 4.0571,
+      "step": 5183
+    },
+    {
+      "epoch": 0.05184,
+      "grad_norm": 1.0375617190613013,
+      "learning_rate": 0.003,
+      "loss": 4.0642,
+      "step": 5184
+    },
+    {
+      "epoch": 0.05185,
+      "grad_norm": 0.978363863806633,
+      "learning_rate": 0.003,
+      "loss": 4.0629,
+      "step": 5185
+    },
+    {
+      "epoch": 0.05186,
+      "grad_norm": 0.9787815400755827,
+      "learning_rate": 0.003,
+      "loss": 4.0721,
+      "step": 5186
+    },
+    {
+      "epoch": 0.05187,
+      "grad_norm": 1.026722875042982,
+      "learning_rate": 0.003,
+      "loss": 4.1133,
+      "step": 5187
+    },
+    {
+      "epoch": 0.05188,
+      "grad_norm": 1.0387976995995625,
+      "learning_rate": 0.003,
+      "loss": 4.0896,
+      "step": 5188
+    },
+    {
+      "epoch": 0.05189,
+      "grad_norm": 0.9737693143733961,
+      "learning_rate": 0.003,
+      "loss": 4.0633,
+      "step": 5189
+    },
+    {
+      "epoch": 0.0519,
+      "grad_norm": 1.1745663438513876,
+      "learning_rate": 0.003,
+      "loss": 4.0705,
+      "step": 5190
+    },
+    {
+      "epoch": 0.05191,
+      "grad_norm": 0.9594735851770623,
+      "learning_rate": 0.003,
+      "loss": 4.0673,
+      "step": 5191
+    },
+    {
+      "epoch": 0.05192,
+      "grad_norm": 0.9706881201053492,
+      "learning_rate": 0.003,
+      "loss": 4.0869,
+      "step": 5192
+    },
+    {
+      "epoch": 0.05193,
+      "grad_norm": 0.927921231735236,
+      "learning_rate": 0.003,
+      "loss": 4.0815,
+      "step": 5193
+    },
+    {
+      "epoch": 0.05194,
+      "grad_norm": 0.8511889522904593,
+      "learning_rate": 0.003,
+      "loss": 4.0442,
+      "step": 5194
+    },
+    {
+      "epoch": 0.05195,
+      "grad_norm": 0.8688574305845654,
+      "learning_rate": 0.003,
+      "loss": 4.0833,
+      "step": 5195
+    },
+    {
+      "epoch": 0.05196,
+      "grad_norm": 1.0602526179506908,
+      "learning_rate": 0.003,
+      "loss": 4.0662,
+      "step": 5196
+    },
+    {
+      "epoch": 0.05197,
+      "grad_norm": 1.004033518921005,
+      "learning_rate": 0.003,
+      "loss": 4.0874,
+      "step": 5197
+    },
+    {
+      "epoch": 0.05198,
+      "grad_norm": 0.9382654504233616,
+      "learning_rate": 0.003,
+      "loss": 4.0577,
+      "step": 5198
+    },
+    {
+      "epoch": 0.05199,
+      "grad_norm": 0.8240832698733139,
+      "learning_rate": 0.003,
+      "loss": 4.0443,
+      "step": 5199
+    },
+    {
+      "epoch": 0.052,
+      "grad_norm": 0.8010190091673817,
+      "learning_rate": 0.003,
+      "loss": 4.0748,
+      "step": 5200
+    },
+    {
+      "epoch": 0.05201,
+      "grad_norm": 0.9345369763160736,
+      "learning_rate": 0.003,
+      "loss": 4.079,
+      "step": 5201
+    },
+    {
+      "epoch": 0.05202,
+      "grad_norm": 1.1219968562809288,
+      "learning_rate": 0.003,
+      "loss": 4.0713,
+      "step": 5202
+    },
+    {
+      "epoch": 0.05203,
+      "grad_norm": 1.165320649672993,
+      "learning_rate": 0.003,
+      "loss": 4.1032,
+      "step": 5203
+    },
+    {
+      "epoch": 0.05204,
+      "grad_norm": 1.0069940016760848,
+      "learning_rate": 0.003,
+      "loss": 4.0912,
+      "step": 5204
+    },
+    {
+      "epoch": 0.05205,
+      "grad_norm": 0.9073269196171544,
+      "learning_rate": 0.003,
+      "loss": 4.0947,
+      "step": 5205
+    },
+    {
+      "epoch": 0.05206,
+      "grad_norm": 0.9676783509672855,
+      "learning_rate": 0.003,
+      "loss": 4.0719,
+      "step": 5206
+    },
+    {
+      "epoch": 0.05207,
+      "grad_norm": 0.9614251201683252,
+      "learning_rate": 0.003,
+      "loss": 4.0559,
+      "step": 5207
+    },
+    {
+      "epoch": 0.05208,
+      "grad_norm": 0.8233264549032968,
+      "learning_rate": 0.003,
+      "loss": 4.0703,
+      "step": 5208
+    },
+    {
+      "epoch": 0.05209,
+      "grad_norm": 0.795950781260021,
+      "learning_rate": 0.003,
+      "loss": 4.0636,
+      "step": 5209
+    },
+    {
+      "epoch": 0.0521,
+      "grad_norm": 0.850100905132024,
+      "learning_rate": 0.003,
+      "loss": 4.0376,
+      "step": 5210
+    },
+    {
+      "epoch": 0.05211,
+      "grad_norm": 1.0102855739696595,
+      "learning_rate": 0.003,
+      "loss": 4.064,
+      "step": 5211
+    },
+    {
+      "epoch": 0.05212,
+      "grad_norm": 1.3483238868956038,
+      "learning_rate": 0.003,
+      "loss": 4.1233,
+      "step": 5212
+    },
+    {
+      "epoch": 0.05213,
+      "grad_norm": 0.771838626249321,
+      "learning_rate": 0.003,
+      "loss": 4.0664,
+      "step": 5213
+    },
+    {
+      "epoch": 0.05214,
+      "grad_norm": 0.6993971591998677,
+      "learning_rate": 0.003,
+      "loss": 4.0572,
+      "step": 5214
+    },
+    {
+      "epoch": 0.05215,
+      "grad_norm": 0.7581728248150723,
+      "learning_rate": 0.003,
+      "loss": 4.0501,
+      "step": 5215
+    },
+    {
+      "epoch": 0.05216,
+      "grad_norm": 0.8519198915236995,
+      "learning_rate": 0.003,
+      "loss": 4.0619,
+      "step": 5216
+    },
+    {
+      "epoch": 0.05217,
+      "grad_norm": 1.0745388491695431,
+      "learning_rate": 0.003,
+      "loss": 4.0712,
+      "step": 5217
+    },
+    {
+      "epoch": 0.05218,
+      "grad_norm": 0.9935688948883226,
+      "learning_rate": 0.003,
+      "loss": 4.0616,
+      "step": 5218
+    },
+    {
+      "epoch": 0.05219,
+      "grad_norm": 0.9084561291587792,
+      "learning_rate": 0.003,
+      "loss": 4.0856,
+      "step": 5219
+    },
+    {
+      "epoch": 0.0522,
+      "grad_norm": 1.0337978058374222,
+      "learning_rate": 0.003,
+      "loss": 4.0525,
+      "step": 5220
+    },
+    {
+      "epoch": 0.05221,
+      "grad_norm": 1.1422321793715118,
+      "learning_rate": 0.003,
+      "loss": 4.0672,
+      "step": 5221
+    },
+    {
+      "epoch": 0.05222,
+      "grad_norm": 0.7554460463065314,
+      "learning_rate": 0.003,
+      "loss": 4.069,
+      "step": 5222
+    },
+    {
+      "epoch": 0.05223,
+      "grad_norm": 0.8000343955599867,
+      "learning_rate": 0.003,
+      "loss": 4.0844,
+      "step": 5223
+    },
+    {
+      "epoch": 0.05224,
+      "grad_norm": 0.9379433719212857,
+      "learning_rate": 0.003,
+      "loss": 4.0613,
+      "step": 5224
+    },
+    {
+      "epoch": 0.05225,
+      "grad_norm": 1.2417850745548258,
+      "learning_rate": 0.003,
+      "loss": 4.1011,
+      "step": 5225
+    },
+    {
+      "epoch": 0.05226,
+      "grad_norm": 0.9382790639135026,
+      "learning_rate": 0.003,
+      "loss": 4.0804,
+      "step": 5226
+    },
+    {
+      "epoch": 0.05227,
+      "grad_norm": 0.8974481342791866,
+      "learning_rate": 0.003,
+      "loss": 4.1067,
+      "step": 5227
+    },
+    {
+      "epoch": 0.05228,
+      "grad_norm": 0.8629619700937163,
+      "learning_rate": 0.003,
+      "loss": 4.0838,
+      "step": 5228
+    },
+    {
+      "epoch": 0.05229,
+      "grad_norm": 1.019751882064597,
+      "learning_rate": 0.003,
+      "loss": 4.0498,
+      "step": 5229
+    },
+    {
+      "epoch": 0.0523,
+      "grad_norm": 1.1377445229403527,
+      "learning_rate": 0.003,
+      "loss": 4.0445,
+      "step": 5230
+    },
+    {
+      "epoch": 0.05231,
+      "grad_norm": 0.9424656899524195,
+      "learning_rate": 0.003,
+      "loss": 4.0795,
+      "step": 5231
+    },
+    {
+      "epoch": 0.05232,
+      "grad_norm": 0.9575611751186932,
+      "learning_rate": 0.003,
+      "loss": 4.0678,
+      "step": 5232
+    },
+    {
+      "epoch": 0.05233,
+      "grad_norm": 0.9293995877028672,
+      "learning_rate": 0.003,
+      "loss": 4.0546,
+      "step": 5233
+    },
+    {
+      "epoch": 0.05234,
+      "grad_norm": 0.9615259872002241,
+      "learning_rate": 0.003,
+      "loss": 4.1018,
+      "step": 5234
+    },
+    {
+      "epoch": 0.05235,
+      "grad_norm": 0.983028393165795,
+      "learning_rate": 0.003,
+      "loss": 4.0777,
+      "step": 5235
+    },
+    {
+      "epoch": 0.05236,
+      "grad_norm": 1.0016310432668707,
+      "learning_rate": 0.003,
+      "loss": 4.0616,
+      "step": 5236
+    },
+    {
+      "epoch": 0.05237,
+      "grad_norm": 1.015432602850747,
+      "learning_rate": 0.003,
+      "loss": 4.0653,
+      "step": 5237
+    },
+    {
+      "epoch": 0.05238,
+      "grad_norm": 1.078845385798474,
+      "learning_rate": 0.003,
+      "loss": 4.058,
+      "step": 5238
+    },
+    {
+      "epoch": 0.05239,
+      "grad_norm": 0.9307396208378222,
+      "learning_rate": 0.003,
+      "loss": 4.0772,
+      "step": 5239
+    },
+    {
+      "epoch": 0.0524,
+      "grad_norm": 0.9571027247324106,
+      "learning_rate": 0.003,
+      "loss": 4.0895,
+      "step": 5240
+    },
+    {
+      "epoch": 0.05241,
+      "grad_norm": 0.9792332364709184,
+      "learning_rate": 0.003,
+      "loss": 4.0927,
+      "step": 5241
+    },
+    {
+      "epoch": 0.05242,
+      "grad_norm": 0.9182971281795369,
+      "learning_rate": 0.003,
+      "loss": 4.0894,
+      "step": 5242
+    },
+    {
+      "epoch": 0.05243,
+      "grad_norm": 0.8942676834653476,
+      "learning_rate": 0.003,
+      "loss": 4.082,
+      "step": 5243
+    },
+    {
+      "epoch": 0.05244,
+      "grad_norm": 0.89594845212589,
+      "learning_rate": 0.003,
+      "loss": 4.0581,
+      "step": 5244
+    },
+    {
+      "epoch": 0.05245,
+      "grad_norm": 1.0089126308943768,
+      "learning_rate": 0.003,
+      "loss": 4.0668,
+      "step": 5245
+    },
+    {
+      "epoch": 0.05246,
+      "grad_norm": 1.192205008646422,
+      "learning_rate": 0.003,
+      "loss": 4.0777,
+      "step": 5246
+    },
+    {
+      "epoch": 0.05247,
+      "grad_norm": 1.0822271048921281,
+      "learning_rate": 0.003,
+      "loss": 4.0762,
+      "step": 5247
+    },
+    {
+      "epoch": 0.05248,
+      "grad_norm": 0.9132052877500991,
+      "learning_rate": 0.003,
+      "loss": 4.0792,
+      "step": 5248
+    },
+    {
+      "epoch": 0.05249,
+      "grad_norm": 1.0439345558274598,
+      "learning_rate": 0.003,
+      "loss": 4.0939,
+      "step": 5249
+    },
+    {
+      "epoch": 0.0525,
+      "grad_norm": 1.1063976766968928,
+      "learning_rate": 0.003,
+      "loss": 4.0888,
+      "step": 5250
+    },
+    {
+      "epoch": 0.05251,
+      "grad_norm": 0.8427299394254306,
+      "learning_rate": 0.003,
+      "loss": 4.0693,
+      "step": 5251
+    },
+    {
+      "epoch": 0.05252,
+      "grad_norm": 0.8460576745435954,
+      "learning_rate": 0.003,
+      "loss": 4.0952,
+      "step": 5252
+    },
+    {
+      "epoch": 0.05253,
+      "grad_norm": 0.7631623954154223,
+      "learning_rate": 0.003,
+      "loss": 4.0598,
+      "step": 5253
+    },
+    {
+      "epoch": 0.05254,
+      "grad_norm": 0.7582085848780449,
+      "learning_rate": 0.003,
+      "loss": 4.0466,
+      "step": 5254
+    },
+    {
+      "epoch": 0.05255,
+      "grad_norm": 0.871269617254244,
+      "learning_rate": 0.003,
+      "loss": 4.05,
+      "step": 5255
+    },
+    {
+      "epoch": 0.05256,
+      "grad_norm": 1.1274865245345602,
+      "learning_rate": 0.003,
+      "loss": 4.0815,
+      "step": 5256
+    },
+    {
+      "epoch": 0.05257,
+      "grad_norm": 0.9482986096135555,
+      "learning_rate": 0.003,
+      "loss": 4.07,
+      "step": 5257
+    },
+    {
+      "epoch": 0.05258,
+      "grad_norm": 0.8120437150184056,
+      "learning_rate": 0.003,
+      "loss": 4.0675,
+      "step": 5258
+    },
+    {
+      "epoch": 0.05259,
+      "grad_norm": 0.7370127557005074,
+      "learning_rate": 0.003,
+      "loss": 4.0499,
+      "step": 5259
+    },
+    {
+      "epoch": 0.0526,
+      "grad_norm": 0.8055148797864925,
+      "learning_rate": 0.003,
+      "loss": 4.0816,
+      "step": 5260
+    },
+    {
+      "epoch": 0.05261,
+      "grad_norm": 0.7414676121854172,
+      "learning_rate": 0.003,
+      "loss": 4.0469,
+      "step": 5261
+    },
+    {
+      "epoch": 0.05262,
+      "grad_norm": 0.8431409042373192,
+      "learning_rate": 0.003,
+      "loss": 4.0417,
+      "step": 5262
+    },
+    {
+      "epoch": 0.05263,
+      "grad_norm": 0.9936473719599972,
+      "learning_rate": 0.003,
+      "loss": 4.0923,
+      "step": 5263
+    },
+    {
+      "epoch": 0.05264,
+      "grad_norm": 1.2454811247723998,
+      "learning_rate": 0.003,
+      "loss": 4.1006,
+      "step": 5264
+    },
+    {
+      "epoch": 0.05265,
+      "grad_norm": 0.7140907414977667,
+      "learning_rate": 0.003,
+      "loss": 4.0501,
+      "step": 5265
+    },
+    {
+      "epoch": 0.05266,
+      "grad_norm": 0.6844293037667735,
+      "learning_rate": 0.003,
+      "loss": 4.0361,
+      "step": 5266
+    },
+    {
+      "epoch": 0.05267,
+      "grad_norm": 0.7232891737003544,
+      "learning_rate": 0.003,
+      "loss": 4.0891,
+      "step": 5267
+    },
+    {
+      "epoch": 0.05268,
+      "grad_norm": 0.8650861484246375,
+      "learning_rate": 0.003,
+      "loss": 4.0554,
+      "step": 5268
+    },
+    {
+      "epoch": 0.05269,
+      "grad_norm": 1.078417956716715,
+      "learning_rate": 0.003,
+      "loss": 4.04,
+      "step": 5269
+    },
+    {
+      "epoch": 0.0527,
+      "grad_norm": 1.081957308191504,
+      "learning_rate": 0.003,
+      "loss": 4.0748,
+      "step": 5270
+    },
+    {
+      "epoch": 0.05271,
+      "grad_norm": 1.0713979709889552,
+      "learning_rate": 0.003,
+      "loss": 4.0407,
+      "step": 5271
+    },
+    {
+      "epoch": 0.05272,
+      "grad_norm": 0.9799954386145029,
+      "learning_rate": 0.003,
+      "loss": 4.0932,
+      "step": 5272
+    },
+    {
+      "epoch": 0.05273,
+      "grad_norm": 1.1944522490266327,
+      "learning_rate": 0.003,
+      "loss": 4.0858,
+      "step": 5273
+    },
+    {
+      "epoch": 0.05274,
+      "grad_norm": 0.9595893728165487,
+      "learning_rate": 0.003,
+      "loss": 4.0886,
+      "step": 5274
+    },
+    {
+      "epoch": 0.05275,
+      "grad_norm": 1.0373746989136285,
+      "learning_rate": 0.003,
+      "loss": 4.0638,
+      "step": 5275
+    },
+    {
+      "epoch": 0.05276,
+      "grad_norm": 0.9783693469288199,
+      "learning_rate": 0.003,
+      "loss": 4.055,
+      "step": 5276
+    },
+    {
+      "epoch": 0.05277,
+      "grad_norm": 0.9308459462602469,
+      "learning_rate": 0.003,
+      "loss": 4.0437,
+      "step": 5277
+    },
+    {
+      "epoch": 0.05278,
+      "grad_norm": 0.9127924883155014,
+      "learning_rate": 0.003,
+      "loss": 4.0563,
+      "step": 5278
+    },
+    {
+      "epoch": 0.05279,
+      "grad_norm": 0.8905127840769295,
+      "learning_rate": 0.003,
+      "loss": 4.0569,
+      "step": 5279
+    },
+    {
+      "epoch": 0.0528,
+      "grad_norm": 0.9160560894552265,
+      "learning_rate": 0.003,
+      "loss": 4.0569,
+      "step": 5280
+    },
+    {
+      "epoch": 0.05281,
+      "grad_norm": 0.9898967226775388,
+      "learning_rate": 0.003,
+      "loss": 4.073,
+      "step": 5281
+    },
+    {
+      "epoch": 0.05282,
+      "grad_norm": 1.0607654232659915,
+      "learning_rate": 0.003,
+      "loss": 4.0717,
+      "step": 5282
+    },
+    {
+      "epoch": 0.05283,
+      "grad_norm": 0.7891736411399345,
+      "learning_rate": 0.003,
+      "loss": 4.0496,
+      "step": 5283
+    },
+    {
+      "epoch": 0.05284,
+      "grad_norm": 0.7117985343229531,
+      "learning_rate": 0.003,
+      "loss": 4.0763,
+      "step": 5284
+    },
+    {
+      "epoch": 0.05285,
+      "grad_norm": 0.7116143826691809,
+      "learning_rate": 0.003,
+      "loss": 4.0279,
+      "step": 5285
+    },
+    {
+      "epoch": 0.05286,
+      "grad_norm": 0.7444977551621328,
+      "learning_rate": 0.003,
+      "loss": 4.0924,
+      "step": 5286
+    },
+    {
+      "epoch": 0.05287,
+      "grad_norm": 0.7927132129772895,
+      "learning_rate": 0.003,
+      "loss": 4.0621,
+      "step": 5287
+    },
+    {
+      "epoch": 0.05288,
+      "grad_norm": 0.8599837583361685,
+      "learning_rate": 0.003,
+      "loss": 4.0469,
+      "step": 5288
+    },
+    {
+      "epoch": 0.05289,
+      "grad_norm": 1.030013828539183,
+      "learning_rate": 0.003,
+      "loss": 4.0765,
+      "step": 5289
+    },
+    {
+      "epoch": 0.0529,
+      "grad_norm": 1.320070502048142,
+      "learning_rate": 0.003,
+      "loss": 4.0834,
+      "step": 5290
+    },
+    {
+      "epoch": 0.05291,
+      "grad_norm": 0.708003930048573,
+      "learning_rate": 0.003,
+      "loss": 4.083,
+      "step": 5291
+    },
+    {
+      "epoch": 0.05292,
+      "grad_norm": 0.7099716170045386,
+      "learning_rate": 0.003,
+      "loss": 4.0775,
+      "step": 5292
+    },
+    {
+      "epoch": 0.05293,
+      "grad_norm": 1.0571312622562001,
+      "learning_rate": 0.003,
+      "loss": 4.0822,
+      "step": 5293
+    },
+    {
+      "epoch": 0.05294,
+      "grad_norm": 1.0749670114406467,
+      "learning_rate": 0.003,
+      "loss": 4.063,
+      "step": 5294
+    },
+    {
+      "epoch": 0.05295,
+      "grad_norm": 0.9466743696328925,
+      "learning_rate": 0.003,
+      "loss": 4.0777,
+      "step": 5295
+    },
+    {
+      "epoch": 0.05296,
+      "grad_norm": 0.9334033172520125,
+      "learning_rate": 0.003,
+      "loss": 4.0574,
+      "step": 5296
+    },
+    {
+      "epoch": 0.05297,
+      "grad_norm": 0.9085832862861088,
+      "learning_rate": 0.003,
+      "loss": 4.0592,
+      "step": 5297
+    },
+    {
+      "epoch": 0.05298,
+      "grad_norm": 0.8733996457415404,
+      "learning_rate": 0.003,
+      "loss": 4.0767,
+      "step": 5298
+    },
+    {
+      "epoch": 0.05299,
+      "grad_norm": 0.9180130832605389,
+      "learning_rate": 0.003,
+      "loss": 4.0536,
+      "step": 5299
+    },
+    {
+      "epoch": 0.053,
+      "grad_norm": 0.9730509978164584,
+      "learning_rate": 0.003,
+      "loss": 4.0738,
+      "step": 5300
+    },
+    {
+      "epoch": 0.05301,
+      "grad_norm": 1.0895476694519508,
+      "learning_rate": 0.003,
+      "loss": 4.09,
+      "step": 5301
+    },
+    {
+      "epoch": 0.05302,
+      "grad_norm": 1.0558202746074077,
+      "learning_rate": 0.003,
+      "loss": 4.0643,
+      "step": 5302
+    },
+    {
+      "epoch": 0.05303,
+      "grad_norm": 0.9797012960446656,
+      "learning_rate": 0.003,
+      "loss": 4.0756,
+      "step": 5303
+    },
+    {
+      "epoch": 0.05304,
+      "grad_norm": 1.0039073817988884,
+      "learning_rate": 0.003,
+      "loss": 4.0642,
+      "step": 5304
+    },
+    {
+      "epoch": 0.05305,
+      "grad_norm": 1.1408892390131204,
+      "learning_rate": 0.003,
+      "loss": 4.0809,
+      "step": 5305
+    },
+    {
+      "epoch": 0.05306,
+      "grad_norm": 0.93995449289434,
+      "learning_rate": 0.003,
+      "loss": 4.0814,
+      "step": 5306
+    },
+    {
+      "epoch": 0.05307,
+      "grad_norm": 0.9726004341078571,
+      "learning_rate": 0.003,
+      "loss": 4.0793,
+      "step": 5307
+    },
+    {
+      "epoch": 0.05308,
+      "grad_norm": 0.9846823344904868,
+      "learning_rate": 0.003,
+      "loss": 4.0905,
+      "step": 5308
+    },
+    {
+      "epoch": 0.05309,
+      "grad_norm": 1.1645641513184255,
+      "learning_rate": 0.003,
+      "loss": 4.061,
+      "step": 5309
+    },
+    {
+      "epoch": 0.0531,
+      "grad_norm": 0.9255234669205171,
+      "learning_rate": 0.003,
+      "loss": 4.0682,
+      "step": 5310
+    },
+    {
+      "epoch": 0.05311,
+      "grad_norm": 0.9954755426824138,
+      "learning_rate": 0.003,
+      "loss": 4.1049,
+      "step": 5311
+    },
+    {
+      "epoch": 0.05312,
+      "grad_norm": 1.140912604498098,
+      "learning_rate": 0.003,
+      "loss": 4.0651,
+      "step": 5312
+    },
+    {
+      "epoch": 0.05313,
+      "grad_norm": 1.0342859706036291,
+      "learning_rate": 0.003,
+      "loss": 4.0473,
+      "step": 5313
+    },
+    {
+      "epoch": 0.05314,
+      "grad_norm": 1.0050903165394378,
+      "learning_rate": 0.003,
+      "loss": 4.0762,
+      "step": 5314
+    },
+    {
+      "epoch": 0.05315,
+      "grad_norm": 1.082132110001949,
+      "learning_rate": 0.003,
+      "loss": 4.0846,
+      "step": 5315
+    },
+    {
+      "epoch": 0.05316,
+      "grad_norm": 0.8775743349709987,
+      "learning_rate": 0.003,
+      "loss": 4.0834,
+      "step": 5316
+    },
+    {
+      "epoch": 0.05317,
+      "grad_norm": 0.7524051363352736,
+      "learning_rate": 0.003,
+      "loss": 4.0802,
+      "step": 5317
+    },
+    {
+      "epoch": 0.05318,
+      "grad_norm": 0.6968397634473481,
+      "learning_rate": 0.003,
+      "loss": 4.0477,
+      "step": 5318
+    },
+    {
+      "epoch": 0.05319,
+      "grad_norm": 0.7303279353638819,
+      "learning_rate": 0.003,
+      "loss": 4.0522,
+      "step": 5319
+    },
+    {
+      "epoch": 0.0532,
+      "grad_norm": 0.8312560764557734,
+      "learning_rate": 0.003,
+      "loss": 4.0663,
+      "step": 5320
+    },
+    {
+      "epoch": 0.05321,
+      "grad_norm": 0.9440547584257499,
+      "learning_rate": 0.003,
+      "loss": 4.0686,
+      "step": 5321
+    },
+    {
+      "epoch": 0.05322,
+      "grad_norm": 1.0145241749604252,
+      "learning_rate": 0.003,
+      "loss": 4.0914,
+      "step": 5322
+    },
+    {
+      "epoch": 0.05323,
+      "grad_norm": 1.0026467673881212,
+      "learning_rate": 0.003,
+      "loss": 4.1116,
+      "step": 5323
+    },
+    {
+      "epoch": 0.05324,
+      "grad_norm": 0.9550475870668266,
+      "learning_rate": 0.003,
+      "loss": 4.0532,
+      "step": 5324
+    },
+    {
+      "epoch": 0.05325,
+      "grad_norm": 1.0200704518446446,
+      "learning_rate": 0.003,
+      "loss": 4.0775,
+      "step": 5325
+    },
+    {
+      "epoch": 0.05326,
+      "grad_norm": 0.9264156483614246,
+      "learning_rate": 0.003,
+      "loss": 4.095,
+      "step": 5326
+    },
+    {
+      "epoch": 0.05327,
+      "grad_norm": 0.9074863872575771,
+      "learning_rate": 0.003,
+      "loss": 4.0743,
+      "step": 5327
+    },
+    {
+      "epoch": 0.05328,
+      "grad_norm": 0.9484736745601532,
+      "learning_rate": 0.003,
+      "loss": 4.0822,
+      "step": 5328
+    },
+    {
+      "epoch": 0.05329,
+      "grad_norm": 1.1056353318349814,
+      "learning_rate": 0.003,
+      "loss": 4.076,
+      "step": 5329
+    },
+    {
+      "epoch": 0.0533,
+      "grad_norm": 0.8601194407643497,
+      "learning_rate": 0.003,
+      "loss": 4.0871,
+      "step": 5330
+    },
+    {
+      "epoch": 0.05331,
+      "grad_norm": 0.8648893165616912,
+      "learning_rate": 0.003,
+      "loss": 4.0762,
+      "step": 5331
+    },
+    {
+      "epoch": 0.05332,
+      "grad_norm": 1.0628193250784792,
+      "learning_rate": 0.003,
+      "loss": 4.0971,
+      "step": 5332
+    },
+    {
+      "epoch": 0.05333,
+      "grad_norm": 1.1066111469551312,
+      "learning_rate": 0.003,
+      "loss": 4.0725,
+      "step": 5333
+    },
+    {
+      "epoch": 0.05334,
+      "grad_norm": 0.953883540436792,
+      "learning_rate": 0.003,
+      "loss": 4.0553,
+      "step": 5334
+    },
+    {
+      "epoch": 0.05335,
+      "grad_norm": 0.9008830143608041,
+      "learning_rate": 0.003,
+      "loss": 4.0857,
+      "step": 5335
+    },
+    {
+      "epoch": 0.05336,
+      "grad_norm": 0.8996222966966763,
+      "learning_rate": 0.003,
+      "loss": 4.0943,
+      "step": 5336
+    },
+    {
+      "epoch": 0.05337,
+      "grad_norm": 0.9470909772908184,
+      "learning_rate": 0.003,
+      "loss": 4.0839,
+      "step": 5337
+    },
+    {
+      "epoch": 0.05338,
+      "grad_norm": 0.9368087966120492,
+      "learning_rate": 0.003,
+      "loss": 4.0689,
+      "step": 5338
+    },
+    {
+      "epoch": 0.05339,
+      "grad_norm": 0.9839774636806952,
+      "learning_rate": 0.003,
+      "loss": 4.0553,
+      "step": 5339
+    },
+    {
+      "epoch": 0.0534,
+      "grad_norm": 1.10664494283966,
+      "learning_rate": 0.003,
+      "loss": 4.0807,
+      "step": 5340
+    },
+    {
+      "epoch": 0.05341,
+      "grad_norm": 1.0229490590816641,
+      "learning_rate": 0.003,
+      "loss": 4.0794,
+      "step": 5341
+    },
+    {
+      "epoch": 0.05342,
+      "grad_norm": 1.0465584419606189,
+      "learning_rate": 0.003,
+      "loss": 4.0637,
+      "step": 5342
+    },
+    {
+      "epoch": 0.05343,
+      "grad_norm": 1.1640384929974874,
+      "learning_rate": 0.003,
+      "loss": 4.086,
+      "step": 5343
+    },
+    {
+      "epoch": 0.05344,
+      "grad_norm": 0.884957738973249,
+      "learning_rate": 0.003,
+      "loss": 4.0428,
+      "step": 5344
+    },
+    {
+      "epoch": 0.05345,
+      "grad_norm": 0.8433240280430108,
+      "learning_rate": 0.003,
+      "loss": 4.0515,
+      "step": 5345
+    },
+    {
+      "epoch": 0.05346,
+      "grad_norm": 0.9566468834365702,
+      "learning_rate": 0.003,
+      "loss": 4.0722,
+      "step": 5346
+    },
+    {
+      "epoch": 0.05347,
+      "grad_norm": 0.9368336590324607,
+      "learning_rate": 0.003,
+      "loss": 4.0619,
+      "step": 5347
+    },
+    {
+      "epoch": 0.05348,
+      "grad_norm": 1.0141732765839058,
+      "learning_rate": 0.003,
+      "loss": 4.0592,
+      "step": 5348
+    },
+    {
+      "epoch": 0.05349,
+      "grad_norm": 1.154078618191757,
+      "learning_rate": 0.003,
+      "loss": 4.0792,
+      "step": 5349
+    },
+    {
+      "epoch": 0.0535,
+      "grad_norm": 1.0175917042790932,
+      "learning_rate": 0.003,
+      "loss": 4.0622,
+      "step": 5350
+    },
+    {
+      "epoch": 0.05351,
+      "grad_norm": 0.8496580619444323,
+      "learning_rate": 0.003,
+      "loss": 4.0623,
+      "step": 5351
+    },
+    {
+      "epoch": 0.05352,
+      "grad_norm": 0.8895367323216903,
+      "learning_rate": 0.003,
+      "loss": 4.0592,
+      "step": 5352
+    },
+    {
+      "epoch": 0.05353,
+      "grad_norm": 0.9383414607042285,
+      "learning_rate": 0.003,
+      "loss": 4.0711,
+      "step": 5353
+    },
+    {
+      "epoch": 0.05354,
+      "grad_norm": 0.9932999421264617,
+      "learning_rate": 0.003,
+      "loss": 4.0607,
+      "step": 5354
+    },
+    {
+      "epoch": 0.05355,
+      "grad_norm": 1.1090959785058532,
+      "learning_rate": 0.003,
+      "loss": 4.0568,
+      "step": 5355
+    },
+    {
+      "epoch": 0.05356,
+      "grad_norm": 0.9113253650658273,
+      "learning_rate": 0.003,
+      "loss": 4.089,
+      "step": 5356
+    },
+    {
+      "epoch": 0.05357,
+      "grad_norm": 0.9713969124835692,
+      "learning_rate": 0.003,
+      "loss": 4.0763,
+      "step": 5357
+    },
+    {
+      "epoch": 0.05358,
+      "grad_norm": 1.1111797677781718,
+      "learning_rate": 0.003,
+      "loss": 4.0716,
+      "step": 5358
+    },
+    {
+      "epoch": 0.05359,
+      "grad_norm": 1.12964921180821,
+      "learning_rate": 0.003,
+      "loss": 4.0863,
+      "step": 5359
+    },
+    {
+      "epoch": 0.0536,
+      "grad_norm": 0.7734821023817453,
+      "learning_rate": 0.003,
+      "loss": 4.0673,
+      "step": 5360
+    },
+    {
+      "epoch": 0.05361,
+      "grad_norm": 0.8519551241940354,
+      "learning_rate": 0.003,
+      "loss": 4.0789,
+      "step": 5361
+    },
+    {
+      "epoch": 0.05362,
+      "grad_norm": 0.9322670717841903,
+      "learning_rate": 0.003,
+      "loss": 4.0897,
+      "step": 5362
+    },
+    {
+      "epoch": 0.05363,
+      "grad_norm": 1.0074134211588726,
+      "learning_rate": 0.003,
+      "loss": 4.0958,
+      "step": 5363
+    },
+    {
+      "epoch": 0.05364,
+      "grad_norm": 0.9873618582440303,
+      "learning_rate": 0.003,
+      "loss": 4.09,
+      "step": 5364
+    },
+    {
+      "epoch": 0.05365,
+      "grad_norm": 0.9112488220616313,
+      "learning_rate": 0.003,
+      "loss": 4.0814,
+      "step": 5365
+    },
+    {
+      "epoch": 0.05366,
+      "grad_norm": 0.9087748964782536,
+      "learning_rate": 0.003,
+      "loss": 4.0636,
+      "step": 5366
+    },
+    {
+      "epoch": 0.05367,
+      "grad_norm": 1.1130213121604795,
+      "learning_rate": 0.003,
+      "loss": 4.0686,
+      "step": 5367
+    },
+    {
+      "epoch": 0.05368,
+      "grad_norm": 1.051640889768411,
+      "learning_rate": 0.003,
+      "loss": 4.0592,
+      "step": 5368
+    },
+    {
+      "epoch": 0.05369,
+      "grad_norm": 0.9572517998403266,
+      "learning_rate": 0.003,
+      "loss": 4.0713,
+      "step": 5369
+    },
+    {
+      "epoch": 0.0537,
+      "grad_norm": 1.132811606167689,
+      "learning_rate": 0.003,
+      "loss": 4.077,
+      "step": 5370
+    },
+    {
+      "epoch": 0.05371,
+      "grad_norm": 0.9187614369569057,
+      "learning_rate": 0.003,
+      "loss": 4.0915,
+      "step": 5371
+    },
+    {
+      "epoch": 0.05372,
+      "grad_norm": 1.0470001010321264,
+      "learning_rate": 0.003,
+      "loss": 4.0716,
+      "step": 5372
+    },
+    {
+      "epoch": 0.05373,
+      "grad_norm": 0.9617588492397875,
+      "learning_rate": 0.003,
+      "loss": 4.0878,
+      "step": 5373
+    },
+    {
+      "epoch": 0.05374,
+      "grad_norm": 0.9433193815843771,
+      "learning_rate": 0.003,
+      "loss": 4.0654,
+      "step": 5374
+    },
+    {
+      "epoch": 0.05375,
+      "grad_norm": 0.9043465599508047,
+      "learning_rate": 0.003,
+      "loss": 4.0601,
+      "step": 5375
+    },
+    {
+      "epoch": 0.05376,
+      "grad_norm": 0.8650017321204917,
+      "learning_rate": 0.003,
+      "loss": 4.0858,
+      "step": 5376
+    },
+    {
+      "epoch": 0.05377,
+      "grad_norm": 0.8366118978178542,
+      "learning_rate": 0.003,
+      "loss": 4.0738,
+      "step": 5377
+    },
+    {
+      "epoch": 0.05378,
+      "grad_norm": 0.9549435064058331,
+      "learning_rate": 0.003,
+      "loss": 4.0786,
+      "step": 5378
+    },
+    {
+      "epoch": 0.05379,
+      "grad_norm": 1.1106796516282058,
+      "learning_rate": 0.003,
+      "loss": 4.0908,
+      "step": 5379
+    },
+    {
+      "epoch": 0.0538,
+      "grad_norm": 0.831546358079104,
+      "learning_rate": 0.003,
+      "loss": 4.054,
+      "step": 5380
+    },
+    {
+      "epoch": 0.05381,
+      "grad_norm": 0.7873840620558364,
+      "learning_rate": 0.003,
+      "loss": 4.0624,
+      "step": 5381
+    },
+    {
+      "epoch": 0.05382,
+      "grad_norm": 0.7745681783586985,
+      "learning_rate": 0.003,
+      "loss": 4.0811,
+      "step": 5382
+    },
+    {
+      "epoch": 0.05383,
+      "grad_norm": 0.7612145463814121,
+      "learning_rate": 0.003,
+      "loss": 4.0764,
+      "step": 5383
+    },
+    {
+      "epoch": 0.05384,
+      "grad_norm": 0.8198652694354317,
+      "learning_rate": 0.003,
+      "loss": 4.0569,
+      "step": 5384
+    },
+    {
+      "epoch": 0.05385,
+      "grad_norm": 0.9417575755533557,
+      "learning_rate": 0.003,
+      "loss": 4.0871,
+      "step": 5385
+    },
+    {
+      "epoch": 0.05386,
+      "grad_norm": 0.9333515318457634,
+      "learning_rate": 0.003,
+      "loss": 4.0683,
+      "step": 5386
+    },
+    {
+      "epoch": 0.05387,
+      "grad_norm": 0.9470842553395012,
+      "learning_rate": 0.003,
+      "loss": 4.0524,
+      "step": 5387
+    },
+    {
+      "epoch": 0.05388,
+      "grad_norm": 1.0202541931803695,
+      "learning_rate": 0.003,
+      "loss": 4.0944,
+      "step": 5388
+    },
+    {
+      "epoch": 0.05389,
+      "grad_norm": 1.0837366526435621,
+      "learning_rate": 0.003,
+      "loss": 4.0717,
+      "step": 5389
+    },
+    {
+      "epoch": 0.0539,
+      "grad_norm": 0.8073770997786047,
+      "learning_rate": 0.003,
+      "loss": 4.0349,
+      "step": 5390
+    },
+    {
+      "epoch": 0.05391,
+      "grad_norm": 0.7682773134551675,
+      "learning_rate": 0.003,
+      "loss": 4.0506,
+      "step": 5391
+    },
+    {
+      "epoch": 0.05392,
+      "grad_norm": 0.7018203354535223,
+      "learning_rate": 0.003,
+      "loss": 4.0532,
+      "step": 5392
+    },
+    {
+      "epoch": 0.05393,
+      "grad_norm": 0.8253528505413662,
+      "learning_rate": 0.003,
+      "loss": 4.0812,
+      "step": 5393
+    },
+    {
+      "epoch": 0.05394,
+      "grad_norm": 0.8821082824869386,
+      "learning_rate": 0.003,
+      "loss": 4.0553,
+      "step": 5394
+    },
+    {
+      "epoch": 0.05395,
+      "grad_norm": 1.0233479675976396,
+      "learning_rate": 0.003,
+      "loss": 4.0882,
+      "step": 5395
+    },
+    {
+      "epoch": 0.05396,
+      "grad_norm": 0.9868685749565336,
+      "learning_rate": 0.003,
+      "loss": 4.0791,
+      "step": 5396
+    },
+    {
+      "epoch": 0.05397,
+      "grad_norm": 0.9032364856754863,
+      "learning_rate": 0.003,
+      "loss": 4.0506,
+      "step": 5397
+    },
+    {
+      "epoch": 0.05398,
+      "grad_norm": 0.9305986337257256,
+      "learning_rate": 0.003,
+      "loss": 4.0513,
+      "step": 5398
+    },
+    {
+      "epoch": 0.05399,
+      "grad_norm": 0.943643567039578,
+      "learning_rate": 0.003,
+      "loss": 4.0821,
+      "step": 5399
+    },
+    {
+      "epoch": 0.054,
+      "grad_norm": 0.9727836672752513,
+      "learning_rate": 0.003,
+      "loss": 4.0924,
+      "step": 5400
+    },
+    {
+      "epoch": 0.05401,
+      "grad_norm": 1.0950746328718843,
+      "learning_rate": 0.003,
+      "loss": 4.0757,
+      "step": 5401
+    },
+    {
+      "epoch": 0.05402,
+      "grad_norm": 0.9394359475802941,
+      "learning_rate": 0.003,
+      "loss": 4.0828,
+      "step": 5402
+    },
+    {
+      "epoch": 0.05403,
+      "grad_norm": 1.100262618217403,
+      "learning_rate": 0.003,
+      "loss": 4.0753,
+      "step": 5403
+    },
+    {
+      "epoch": 0.05404,
+      "grad_norm": 0.8289796141686676,
+      "learning_rate": 0.003,
+      "loss": 4.0628,
+      "step": 5404
+    },
+    {
+      "epoch": 0.05405,
+      "grad_norm": 0.8162230398030023,
+      "learning_rate": 0.003,
+      "loss": 4.0671,
+      "step": 5405
+    },
+    {
+      "epoch": 0.05406,
+      "grad_norm": 0.7883046496886217,
+      "learning_rate": 0.003,
+      "loss": 4.0444,
+      "step": 5406
+    },
+    {
+      "epoch": 0.05407,
+      "grad_norm": 0.7936421744252384,
+      "learning_rate": 0.003,
+      "loss": 4.0599,
+      "step": 5407
+    },
+    {
+      "epoch": 0.05408,
+      "grad_norm": 0.9053334375282556,
+      "learning_rate": 0.003,
+      "loss": 4.0505,
+      "step": 5408
+    },
+    {
+      "epoch": 0.05409,
+      "grad_norm": 0.9451799555181426,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 5409
+    },
+    {
+      "epoch": 0.0541,
+      "grad_norm": 1.0666670568701657,
+      "learning_rate": 0.003,
+      "loss": 4.0735,
+      "step": 5410
+    },
+    {
+      "epoch": 0.05411,
+      "grad_norm": 0.9181326660070915,
+      "learning_rate": 0.003,
+      "loss": 4.07,
+      "step": 5411
+    },
+    {
+      "epoch": 0.05412,
+      "grad_norm": 1.021682961374821,
+      "learning_rate": 0.003,
+      "loss": 4.0665,
+      "step": 5412
+    },
+    {
+      "epoch": 0.05413,
+      "grad_norm": 1.1798050564607474,
+      "learning_rate": 0.003,
+      "loss": 4.0722,
+      "step": 5413
+    },
+    {
+      "epoch": 0.05414,
+      "grad_norm": 0.9232912835820161,
+      "learning_rate": 0.003,
+      "loss": 4.0421,
+      "step": 5414
+    },
+    {
+      "epoch": 0.05415,
+      "grad_norm": 0.8725384280761546,
+      "learning_rate": 0.003,
+      "loss": 4.0803,
+      "step": 5415
+    },
+    {
+      "epoch": 0.05416,
+      "grad_norm": 0.8010341905607388,
+      "learning_rate": 0.003,
+      "loss": 4.0356,
+      "step": 5416
+    },
+    {
+      "epoch": 0.05417,
+      "grad_norm": 0.9197583841797057,
+      "learning_rate": 0.003,
+      "loss": 4.0607,
+      "step": 5417
+    },
+    {
+      "epoch": 0.05418,
+      "grad_norm": 0.9755108515112911,
+      "learning_rate": 0.003,
+      "loss": 4.0547,
+      "step": 5418
+    },
+    {
+      "epoch": 0.05419,
+      "grad_norm": 0.9843859573873037,
+      "learning_rate": 0.003,
+      "loss": 4.0759,
+      "step": 5419
+    },
+    {
+      "epoch": 0.0542,
+      "grad_norm": 1.1783693707948757,
+      "learning_rate": 0.003,
+      "loss": 4.0721,
+      "step": 5420
+    },
+    {
+      "epoch": 0.05421,
+      "grad_norm": 1.0734083109765062,
+      "learning_rate": 0.003,
+      "loss": 4.0646,
+      "step": 5421
+    },
+    {
+      "epoch": 0.05422,
+      "grad_norm": 0.980524671161552,
+      "learning_rate": 0.003,
+      "loss": 4.0763,
+      "step": 5422
+    },
+    {
+      "epoch": 0.05423,
+      "grad_norm": 1.134071438565773,
+      "learning_rate": 0.003,
+      "loss": 4.0724,
+      "step": 5423
+    },
+    {
+      "epoch": 0.05424,
+      "grad_norm": 1.2330179209043135,
+      "learning_rate": 0.003,
+      "loss": 4.1119,
+      "step": 5424
+    },
+    {
+      "epoch": 0.05425,
+      "grad_norm": 1.013866607135996,
+      "learning_rate": 0.003,
+      "loss": 4.0789,
+      "step": 5425
+    },
+    {
+      "epoch": 0.05426,
+      "grad_norm": 1.1436320829532476,
+      "learning_rate": 0.003,
+      "loss": 4.0688,
+      "step": 5426
+    },
+    {
+      "epoch": 0.05427,
+      "grad_norm": 0.8459842849855579,
+      "learning_rate": 0.003,
+      "loss": 4.057,
+      "step": 5427
+    },
+    {
+      "epoch": 0.05428,
+      "grad_norm": 0.9549700978254552,
+      "learning_rate": 0.003,
+      "loss": 4.0747,
+      "step": 5428
+    },
+    {
+      "epoch": 0.05429,
+      "grad_norm": 0.9172026670565415,
+      "learning_rate": 0.003,
+      "loss": 4.077,
+      "step": 5429
+    },
+    {
+      "epoch": 0.0543,
+      "grad_norm": 1.120952695970183,
+      "learning_rate": 0.003,
+      "loss": 4.0999,
+      "step": 5430
+    },
+    {
+      "epoch": 0.05431,
+      "grad_norm": 1.1974245320079564,
+      "learning_rate": 0.003,
+      "loss": 4.0746,
+      "step": 5431
+    },
+    {
+      "epoch": 0.05432,
+      "grad_norm": 0.8852096173514169,
+      "learning_rate": 0.003,
+      "loss": 4.0639,
+      "step": 5432
+    },
+    {
+      "epoch": 0.05433,
+      "grad_norm": 0.9401011460255838,
+      "learning_rate": 0.003,
+      "loss": 4.0389,
+      "step": 5433
+    },
+    {
+      "epoch": 0.05434,
+      "grad_norm": 0.9678587871600712,
+      "learning_rate": 0.003,
+      "loss": 4.0657,
+      "step": 5434
+    },
+    {
+      "epoch": 0.05435,
+      "grad_norm": 1.0899343219435063,
+      "learning_rate": 0.003,
+      "loss": 4.0589,
+      "step": 5435
+    },
+    {
+      "epoch": 0.05436,
+      "grad_norm": 1.0563921820873317,
+      "learning_rate": 0.003,
+      "loss": 4.0824,
+      "step": 5436
+    },
+    {
+      "epoch": 0.05437,
+      "grad_norm": 1.0856872480417759,
+      "learning_rate": 0.003,
+      "loss": 4.0848,
+      "step": 5437
+    },
+    {
+      "epoch": 0.05438,
+      "grad_norm": 0.7984063525797823,
+      "learning_rate": 0.003,
+      "loss": 4.0697,
+      "step": 5438
+    },
+    {
+      "epoch": 0.05439,
+      "grad_norm": 0.7454173407684945,
+      "learning_rate": 0.003,
+      "loss": 4.0604,
+      "step": 5439
+    },
+    {
+      "epoch": 0.0544,
+      "grad_norm": 0.7921737605396232,
+      "learning_rate": 0.003,
+      "loss": 4.0538,
+      "step": 5440
+    },
+    {
+      "epoch": 0.05441,
+      "grad_norm": 1.0225413249269952,
+      "learning_rate": 0.003,
+      "loss": 4.0923,
+      "step": 5441
+    },
+    {
+      "epoch": 0.05442,
+      "grad_norm": 1.2118058028339453,
+      "learning_rate": 0.003,
+      "loss": 4.0603,
+      "step": 5442
+    },
+    {
+      "epoch": 0.05443,
+      "grad_norm": 0.6321819000655003,
+      "learning_rate": 0.003,
+      "loss": 4.0637,
+      "step": 5443
+    },
+    {
+      "epoch": 0.05444,
+      "grad_norm": 0.8027542268287786,
+      "learning_rate": 0.003,
+      "loss": 4.0558,
+      "step": 5444
+    },
+    {
+      "epoch": 0.05445,
+      "grad_norm": 0.8735412506958715,
+      "learning_rate": 0.003,
+      "loss": 4.0545,
+      "step": 5445
+    },
+    {
+      "epoch": 0.05446,
+      "grad_norm": 0.7936222595707908,
+      "learning_rate": 0.003,
+      "loss": 4.0557,
+      "step": 5446
+    },
+    {
+      "epoch": 0.05447,
+      "grad_norm": 0.6924205266934994,
+      "learning_rate": 0.003,
+      "loss": 4.0471,
+      "step": 5447
+    },
+    {
+      "epoch": 0.05448,
+      "grad_norm": 0.6812700924878988,
+      "learning_rate": 0.003,
+      "loss": 4.0836,
+      "step": 5448
+    },
+    {
+      "epoch": 0.05449,
+      "grad_norm": 0.7401926125886807,
+      "learning_rate": 0.003,
+      "loss": 4.073,
+      "step": 5449
+    },
+    {
+      "epoch": 0.0545,
+      "grad_norm": 0.8398967204050066,
+      "learning_rate": 0.003,
+      "loss": 4.0719,
+      "step": 5450
+    },
+    {
+      "epoch": 0.05451,
+      "grad_norm": 0.9232648502496631,
+      "learning_rate": 0.003,
+      "loss": 4.0973,
+      "step": 5451
+    },
+    {
+      "epoch": 0.05452,
+      "grad_norm": 1.018982529104012,
+      "learning_rate": 0.003,
+      "loss": 4.0663,
+      "step": 5452
+    },
+    {
+      "epoch": 0.05453,
+      "grad_norm": 1.09144709823669,
+      "learning_rate": 0.003,
+      "loss": 4.0627,
+      "step": 5453
+    },
+    {
+      "epoch": 0.05454,
+      "grad_norm": 0.9389422208676496,
+      "learning_rate": 0.003,
+      "loss": 4.0651,
+      "step": 5454
+    },
+    {
+      "epoch": 0.05455,
+      "grad_norm": 1.011860130767919,
+      "learning_rate": 0.003,
+      "loss": 4.0927,
+      "step": 5455
+    },
+    {
+      "epoch": 0.05456,
+      "grad_norm": 1.1005201477477284,
+      "learning_rate": 0.003,
+      "loss": 4.0715,
+      "step": 5456
+    },
+    {
+      "epoch": 0.05457,
+      "grad_norm": 0.8394502161551594,
+      "learning_rate": 0.003,
+      "loss": 4.0859,
+      "step": 5457
+    },
+    {
+      "epoch": 0.05458,
+      "grad_norm": 0.8522093521223234,
+      "learning_rate": 0.003,
+      "loss": 4.0609,
+      "step": 5458
+    },
+    {
+      "epoch": 0.05459,
+      "grad_norm": 0.8521617398500968,
+      "learning_rate": 0.003,
+      "loss": 4.0715,
+      "step": 5459
+    },
+    {
+      "epoch": 0.0546,
+      "grad_norm": 0.9898185774528117,
+      "learning_rate": 0.003,
+      "loss": 4.0659,
+      "step": 5460
+    },
+    {
+      "epoch": 0.05461,
+      "grad_norm": 1.037914610295231,
+      "learning_rate": 0.003,
+      "loss": 4.0711,
+      "step": 5461
+    },
+    {
+      "epoch": 0.05462,
+      "grad_norm": 1.0314633015920627,
+      "learning_rate": 0.003,
+      "loss": 4.099,
+      "step": 5462
+    },
+    {
+      "epoch": 0.05463,
+      "grad_norm": 1.0714702174113098,
+      "learning_rate": 0.003,
+      "loss": 4.0603,
+      "step": 5463
+    },
+    {
+      "epoch": 0.05464,
+      "grad_norm": 1.0656649423089701,
+      "learning_rate": 0.003,
+      "loss": 4.0999,
+      "step": 5464
+    },
+    {
+      "epoch": 0.05465,
+      "grad_norm": 1.0070370867844776,
+      "learning_rate": 0.003,
+      "loss": 4.0936,
+      "step": 5465
+    },
+    {
+      "epoch": 0.05466,
+      "grad_norm": 0.971949088054457,
+      "learning_rate": 0.003,
+      "loss": 4.1024,
+      "step": 5466
+    },
+    {
+      "epoch": 0.05467,
+      "grad_norm": 1.1027271603054372,
+      "learning_rate": 0.003,
+      "loss": 4.0866,
+      "step": 5467
+    },
+    {
+      "epoch": 0.05468,
+      "grad_norm": 0.9722631661342838,
+      "learning_rate": 0.003,
+      "loss": 4.0569,
+      "step": 5468
+    },
+    {
+      "epoch": 0.05469,
+      "grad_norm": 1.535529064177968,
+      "learning_rate": 0.003,
+      "loss": 4.0661,
+      "step": 5469
+    },
+    {
+      "epoch": 0.0547,
+      "grad_norm": 0.8945992610917329,
+      "learning_rate": 0.003,
+      "loss": 4.0611,
+      "step": 5470
+    },
+    {
+      "epoch": 0.05471,
+      "grad_norm": 0.8971438224187216,
+      "learning_rate": 0.003,
+      "loss": 4.0677,
+      "step": 5471
+    },
+    {
+      "epoch": 0.05472,
+      "grad_norm": 0.785537906391861,
+      "learning_rate": 0.003,
+      "loss": 4.061,
+      "step": 5472
+    },
+    {
+      "epoch": 0.05473,
+      "grad_norm": 0.7645096419487161,
+      "learning_rate": 0.003,
+      "loss": 4.0775,
+      "step": 5473
+    },
+    {
+      "epoch": 0.05474,
+      "grad_norm": 0.9043493315770855,
+      "learning_rate": 0.003,
+      "loss": 4.0522,
+      "step": 5474
+    },
+    {
+      "epoch": 0.05475,
+      "grad_norm": 0.9826087228695892,
+      "learning_rate": 0.003,
+      "loss": 4.1121,
+      "step": 5475
+    },
+    {
+      "epoch": 0.05476,
+      "grad_norm": 1.0455880982626005,
+      "learning_rate": 0.003,
+      "loss": 4.0383,
+      "step": 5476
+    },
+    {
+      "epoch": 0.05477,
+      "grad_norm": 1.0460718667427993,
+      "learning_rate": 0.003,
+      "loss": 4.0667,
+      "step": 5477
+    },
+    {
+      "epoch": 0.05478,
+      "grad_norm": 0.9784914027077458,
+      "learning_rate": 0.003,
+      "loss": 4.0654,
+      "step": 5478
+    },
+    {
+      "epoch": 0.05479,
+      "grad_norm": 1.06468120790793,
+      "learning_rate": 0.003,
+      "loss": 4.0835,
+      "step": 5479
+    },
+    {
+      "epoch": 0.0548,
+      "grad_norm": 0.9724427087606514,
+      "learning_rate": 0.003,
+      "loss": 4.08,
+      "step": 5480
+    },
+    {
+      "epoch": 0.05481,
+      "grad_norm": 1.139450823817563,
+      "learning_rate": 0.003,
+      "loss": 4.0741,
+      "step": 5481
+    },
+    {
+      "epoch": 0.05482,
+      "grad_norm": 1.0103917095506671,
+      "learning_rate": 0.003,
+      "loss": 4.0652,
+      "step": 5482
+    },
+    {
+      "epoch": 0.05483,
+      "grad_norm": 0.9941838375298019,
+      "learning_rate": 0.003,
+      "loss": 4.0665,
+      "step": 5483
+    },
+    {
+      "epoch": 0.05484,
+      "grad_norm": 1.0065630407668353,
+      "learning_rate": 0.003,
+      "loss": 4.0901,
+      "step": 5484
+    },
+    {
+      "epoch": 0.05485,
+      "grad_norm": 1.1859794607003078,
+      "learning_rate": 0.003,
+      "loss": 4.1009,
+      "step": 5485
+    },
+    {
+      "epoch": 0.05486,
+      "grad_norm": 0.9117774295489004,
+      "learning_rate": 0.003,
+      "loss": 4.0766,
+      "step": 5486
+    },
+    {
+      "epoch": 0.05487,
+      "grad_norm": 1.0354321077724413,
+      "learning_rate": 0.003,
+      "loss": 4.0847,
+      "step": 5487
+    },
+    {
+      "epoch": 0.05488,
+      "grad_norm": 0.9520742156522823,
+      "learning_rate": 0.003,
+      "loss": 4.0785,
+      "step": 5488
+    },
+    {
+      "epoch": 0.05489,
+      "grad_norm": 0.9583850608961483,
+      "learning_rate": 0.003,
+      "loss": 4.1004,
+      "step": 5489
+    },
+    {
+      "epoch": 0.0549,
+      "grad_norm": 0.9323245193356091,
+      "learning_rate": 0.003,
+      "loss": 4.0724,
+      "step": 5490
+    },
+    {
+      "epoch": 0.05491,
+      "grad_norm": 0.9691425932975921,
+      "learning_rate": 0.003,
+      "loss": 4.0421,
+      "step": 5491
+    },
+    {
+      "epoch": 0.05492,
+      "grad_norm": 1.0404125399983395,
+      "learning_rate": 0.003,
+      "loss": 4.092,
+      "step": 5492
+    },
+    {
+      "epoch": 0.05493,
+      "grad_norm": 0.8797071139515741,
+      "learning_rate": 0.003,
+      "loss": 4.0522,
+      "step": 5493
+    },
+    {
+      "epoch": 0.05494,
+      "grad_norm": 0.9257700859047452,
+      "learning_rate": 0.003,
+      "loss": 4.0842,
+      "step": 5494
+    },
+    {
+      "epoch": 0.05495,
+      "grad_norm": 0.837046815044842,
+      "learning_rate": 0.003,
+      "loss": 4.0673,
+      "step": 5495
+    },
+    {
+      "epoch": 0.05496,
+      "grad_norm": 0.868795138162349,
+      "learning_rate": 0.003,
+      "loss": 4.0595,
+      "step": 5496
+    },
+    {
+      "epoch": 0.05497,
+      "grad_norm": 1.1666053988158387,
+      "learning_rate": 0.003,
+      "loss": 4.0877,
+      "step": 5497
+    },
+    {
+      "epoch": 0.05498,
+      "grad_norm": 0.9182958212992614,
+      "learning_rate": 0.003,
+      "loss": 4.066,
+      "step": 5498
+    },
+    {
+      "epoch": 0.05499,
+      "grad_norm": 0.8243177544407708,
+      "learning_rate": 0.003,
+      "loss": 4.0472,
+      "step": 5499
+    },
+    {
+      "epoch": 0.055,
+      "grad_norm": 0.930633426581144,
+      "learning_rate": 0.003,
+      "loss": 4.0516,
+      "step": 5500
+    },
+    {
+      "epoch": 0.05501,
+      "grad_norm": 0.9853106164483082,
+      "learning_rate": 0.003,
+      "loss": 4.0665,
+      "step": 5501
+    },
+    {
+      "epoch": 0.05502,
+      "grad_norm": 1.002532887849376,
+      "learning_rate": 0.003,
+      "loss": 4.0537,
+      "step": 5502
+    },
+    {
+      "epoch": 0.05503,
+      "grad_norm": 1.134728987679396,
+      "learning_rate": 0.003,
+      "loss": 4.0643,
+      "step": 5503
+    },
+    {
+      "epoch": 0.05504,
+      "grad_norm": 0.8688543177042416,
+      "learning_rate": 0.003,
+      "loss": 4.1085,
+      "step": 5504
+    },
+    {
+      "epoch": 0.05505,
+      "grad_norm": 0.8885019557224482,
+      "learning_rate": 0.003,
+      "loss": 4.0594,
+      "step": 5505
+    },
+    {
+      "epoch": 0.05506,
+      "grad_norm": 1.0191696763985227,
+      "learning_rate": 0.003,
+      "loss": 4.0397,
+      "step": 5506
+    },
+    {
+      "epoch": 0.05507,
+      "grad_norm": 0.9581989274432549,
+      "learning_rate": 0.003,
+      "loss": 4.0946,
+      "step": 5507
+    },
+    {
+      "epoch": 0.05508,
+      "grad_norm": 1.2059958374753366,
+      "learning_rate": 0.003,
+      "loss": 4.0772,
+      "step": 5508
+    },
+    {
+      "epoch": 0.05509,
+      "grad_norm": 1.0509079701116766,
+      "learning_rate": 0.003,
+      "loss": 4.0711,
+      "step": 5509
+    },
+    {
+      "epoch": 0.0551,
+      "grad_norm": 0.9629459111896204,
+      "learning_rate": 0.003,
+      "loss": 4.0534,
+      "step": 5510
+    },
+    {
+      "epoch": 0.05511,
+      "grad_norm": 0.9385152557998871,
+      "learning_rate": 0.003,
+      "loss": 4.0771,
+      "step": 5511
+    },
+    {
+      "epoch": 0.05512,
+      "grad_norm": 0.9992228341288117,
+      "learning_rate": 0.003,
+      "loss": 4.0826,
+      "step": 5512
+    },
+    {
+      "epoch": 0.05513,
+      "grad_norm": 0.9076413869206522,
+      "learning_rate": 0.003,
+      "loss": 4.0662,
+      "step": 5513
+    },
+    {
+      "epoch": 0.05514,
+      "grad_norm": 0.9454063730057051,
+      "learning_rate": 0.003,
+      "loss": 4.0752,
+      "step": 5514
+    },
+    {
+      "epoch": 0.05515,
+      "grad_norm": 0.9333899274103106,
+      "learning_rate": 0.003,
+      "loss": 4.0558,
+      "step": 5515
+    },
+    {
+      "epoch": 0.05516,
+      "grad_norm": 0.9128223147263614,
+      "learning_rate": 0.003,
+      "loss": 4.0914,
+      "step": 5516
+    },
+    {
+      "epoch": 0.05517,
+      "grad_norm": 0.9501252498361874,
+      "learning_rate": 0.003,
+      "loss": 4.0529,
+      "step": 5517
+    },
+    {
+      "epoch": 0.05518,
+      "grad_norm": 0.8979581497014404,
+      "learning_rate": 0.003,
+      "loss": 4.0721,
+      "step": 5518
+    },
+    {
+      "epoch": 0.05519,
+      "grad_norm": 0.8443581212066782,
+      "learning_rate": 0.003,
+      "loss": 4.0975,
+      "step": 5519
+    },
+    {
+      "epoch": 0.0552,
+      "grad_norm": 0.8902006864056781,
+      "learning_rate": 0.003,
+      "loss": 4.0685,
+      "step": 5520
+    },
+    {
+      "epoch": 0.05521,
+      "grad_norm": 1.087402109927747,
+      "learning_rate": 0.003,
+      "loss": 4.0883,
+      "step": 5521
+    },
+    {
+      "epoch": 0.05522,
+      "grad_norm": 1.0502403491974475,
+      "learning_rate": 0.003,
+      "loss": 4.1026,
+      "step": 5522
+    },
+    {
+      "epoch": 0.05523,
+      "grad_norm": 1.0452410374132677,
+      "learning_rate": 0.003,
+      "loss": 4.0777,
+      "step": 5523
+    },
+    {
+      "epoch": 0.05524,
+      "grad_norm": 0.9514484278835924,
+      "learning_rate": 0.003,
+      "loss": 4.0862,
+      "step": 5524
+    },
+    {
+      "epoch": 0.05525,
+      "grad_norm": 1.0272241045462294,
+      "learning_rate": 0.003,
+      "loss": 4.0567,
+      "step": 5525
+    },
+    {
+      "epoch": 0.05526,
+      "grad_norm": 0.8733718754810375,
+      "learning_rate": 0.003,
+      "loss": 4.0598,
+      "step": 5526
+    },
+    {
+      "epoch": 0.05527,
+      "grad_norm": 0.8731879871732975,
+      "learning_rate": 0.003,
+      "loss": 4.0216,
+      "step": 5527
+    },
+    {
+      "epoch": 0.05528,
+      "grad_norm": 1.1037182994286576,
+      "learning_rate": 0.003,
+      "loss": 4.0739,
+      "step": 5528
+    },
+    {
+      "epoch": 0.05529,
+      "grad_norm": 1.0027208430036605,
+      "learning_rate": 0.003,
+      "loss": 4.0852,
+      "step": 5529
+    },
+    {
+      "epoch": 0.0553,
+      "grad_norm": 1.035023109428945,
+      "learning_rate": 0.003,
+      "loss": 4.0996,
+      "step": 5530
+    },
+    {
+      "epoch": 0.05531,
+      "grad_norm": 0.9462736114003235,
+      "learning_rate": 0.003,
+      "loss": 4.0845,
+      "step": 5531
+    },
+    {
+      "epoch": 0.05532,
+      "grad_norm": 1.1792902449799267,
+      "learning_rate": 0.003,
+      "loss": 4.0782,
+      "step": 5532
+    },
+    {
+      "epoch": 0.05533,
+      "grad_norm": 1.0162972400663883,
+      "learning_rate": 0.003,
+      "loss": 4.0923,
+      "step": 5533
+    },
+    {
+      "epoch": 0.05534,
+      "grad_norm": 1.1486180781871198,
+      "learning_rate": 0.003,
+      "loss": 4.0833,
+      "step": 5534
+    },
+    {
+      "epoch": 0.05535,
+      "grad_norm": 1.0752582119191083,
+      "learning_rate": 0.003,
+      "loss": 4.0953,
+      "step": 5535
+    },
+    {
+      "epoch": 0.05536,
+      "grad_norm": 0.9581234101005884,
+      "learning_rate": 0.003,
+      "loss": 4.0797,
+      "step": 5536
+    },
+    {
+      "epoch": 0.05537,
+      "grad_norm": 0.8208048349609377,
+      "learning_rate": 0.003,
+      "loss": 4.0722,
+      "step": 5537
+    },
+    {
+      "epoch": 0.05538,
+      "grad_norm": 0.9473770252339098,
+      "learning_rate": 0.003,
+      "loss": 4.0707,
+      "step": 5538
+    },
+    {
+      "epoch": 0.05539,
+      "grad_norm": 1.2702509072007524,
+      "learning_rate": 0.003,
+      "loss": 4.0646,
+      "step": 5539
+    },
+    {
+      "epoch": 0.0554,
+      "grad_norm": 0.9017522335362036,
+      "learning_rate": 0.003,
+      "loss": 4.1166,
+      "step": 5540
+    },
+    {
+      "epoch": 0.05541,
+      "grad_norm": 0.8359582648956668,
+      "learning_rate": 0.003,
+      "loss": 4.0888,
+      "step": 5541
+    },
+    {
+      "epoch": 0.05542,
+      "grad_norm": 0.9218480107385333,
+      "learning_rate": 0.003,
+      "loss": 4.0662,
+      "step": 5542
+    },
+    {
+      "epoch": 0.05543,
+      "grad_norm": 0.8696347559650821,
+      "learning_rate": 0.003,
+      "loss": 4.0559,
+      "step": 5543
+    },
+    {
+      "epoch": 0.05544,
+      "grad_norm": 1.055625206756629,
+      "learning_rate": 0.003,
+      "loss": 4.1022,
+      "step": 5544
+    },
+    {
+      "epoch": 0.05545,
+      "grad_norm": 1.1728733370559363,
+      "learning_rate": 0.003,
+      "loss": 4.0957,
+      "step": 5545
+    },
+    {
+      "epoch": 0.05546,
+      "grad_norm": 0.7788999498236301,
+      "learning_rate": 0.003,
+      "loss": 4.0702,
+      "step": 5546
+    },
+    {
+      "epoch": 0.05547,
+      "grad_norm": 0.8311570515952018,
+      "learning_rate": 0.003,
+      "loss": 4.0669,
+      "step": 5547
+    },
+    {
+      "epoch": 0.05548,
+      "grad_norm": 0.9335591947026343,
+      "learning_rate": 0.003,
+      "loss": 4.0453,
+      "step": 5548
+    },
+    {
+      "epoch": 0.05549,
+      "grad_norm": 1.0573495296822653,
+      "learning_rate": 0.003,
+      "loss": 4.0933,
+      "step": 5549
+    },
+    {
+      "epoch": 0.0555,
+      "grad_norm": 1.0502011204824901,
+      "learning_rate": 0.003,
+      "loss": 4.0719,
+      "step": 5550
+    },
+    {
+      "epoch": 0.05551,
+      "grad_norm": 0.9831951258747678,
+      "learning_rate": 0.003,
+      "loss": 4.0369,
+      "step": 5551
+    },
+    {
+      "epoch": 0.05552,
+      "grad_norm": 1.1313502639381925,
+      "learning_rate": 0.003,
+      "loss": 4.0597,
+      "step": 5552
+    },
+    {
+      "epoch": 0.05553,
+      "grad_norm": 0.8713935309218466,
+      "learning_rate": 0.003,
+      "loss": 4.0751,
+      "step": 5553
+    },
+    {
+      "epoch": 0.05554,
+      "grad_norm": 0.9135700599486382,
+      "learning_rate": 0.003,
+      "loss": 4.0607,
+      "step": 5554
+    },
+    {
+      "epoch": 0.05555,
+      "grad_norm": 0.8386399502596644,
+      "learning_rate": 0.003,
+      "loss": 4.1019,
+      "step": 5555
+    },
+    {
+      "epoch": 0.05556,
+      "grad_norm": 0.8427182198646174,
+      "learning_rate": 0.003,
+      "loss": 4.0535,
+      "step": 5556
+    },
+    {
+      "epoch": 0.05557,
+      "grad_norm": 0.8853744024878168,
+      "learning_rate": 0.003,
+      "loss": 4.0796,
+      "step": 5557
+    },
+    {
+      "epoch": 0.05558,
+      "grad_norm": 0.8682691991197287,
+      "learning_rate": 0.003,
+      "loss": 4.0396,
+      "step": 5558
+    },
+    {
+      "epoch": 0.05559,
+      "grad_norm": 0.9168284316303011,
+      "learning_rate": 0.003,
+      "loss": 4.065,
+      "step": 5559
+    },
+    {
+      "epoch": 0.0556,
+      "grad_norm": 0.8495910068211255,
+      "learning_rate": 0.003,
+      "loss": 4.088,
+      "step": 5560
+    },
+    {
+      "epoch": 0.05561,
+      "grad_norm": 0.8580860935781918,
+      "learning_rate": 0.003,
+      "loss": 4.045,
+      "step": 5561
+    },
+    {
+      "epoch": 0.05562,
+      "grad_norm": 0.8995255593141265,
+      "learning_rate": 0.003,
+      "loss": 4.0569,
+      "step": 5562
+    },
+    {
+      "epoch": 0.05563,
+      "grad_norm": 0.9397411181545186,
+      "learning_rate": 0.003,
+      "loss": 4.067,
+      "step": 5563
+    },
+    {
+      "epoch": 0.05564,
+      "grad_norm": 0.9961124788426614,
+      "learning_rate": 0.003,
+      "loss": 4.052,
+      "step": 5564
+    },
+    {
+      "epoch": 0.05565,
+      "grad_norm": 1.017582443976892,
+      "learning_rate": 0.003,
+      "loss": 4.0949,
+      "step": 5565
+    },
+    {
+      "epoch": 0.05566,
+      "grad_norm": 0.9196943277209378,
+      "learning_rate": 0.003,
+      "loss": 4.0422,
+      "step": 5566
+    },
+    {
+      "epoch": 0.05567,
+      "grad_norm": 0.887778482100592,
+      "learning_rate": 0.003,
+      "loss": 4.0503,
+      "step": 5567
+    },
+    {
+      "epoch": 0.05568,
+      "grad_norm": 0.9446867885667367,
+      "learning_rate": 0.003,
+      "loss": 4.0839,
+      "step": 5568
+    },
+    {
+      "epoch": 0.05569,
+      "grad_norm": 1.2048355489642966,
+      "learning_rate": 0.003,
+      "loss": 4.0653,
+      "step": 5569
+    },
+    {
+      "epoch": 0.0557,
+      "grad_norm": 0.9947727024541948,
+      "learning_rate": 0.003,
+      "loss": 4.0846,
+      "step": 5570
+    },
+    {
+      "epoch": 0.05571,
+      "grad_norm": 0.9242279105036012,
+      "learning_rate": 0.003,
+      "loss": 4.0716,
+      "step": 5571
+    },
+    {
+      "epoch": 0.05572,
+      "grad_norm": 0.8718726616570441,
+      "learning_rate": 0.003,
+      "loss": 4.0475,
+      "step": 5572
+    },
+    {
+      "epoch": 0.05573,
+      "grad_norm": 0.9988632134064598,
+      "learning_rate": 0.003,
+      "loss": 4.0917,
+      "step": 5573
+    },
+    {
+      "epoch": 0.05574,
+      "grad_norm": 1.0858108953319112,
+      "learning_rate": 0.003,
+      "loss": 4.067,
+      "step": 5574
+    },
+    {
+      "epoch": 0.05575,
+      "grad_norm": 0.89341861731438,
+      "learning_rate": 0.003,
+      "loss": 4.0599,
+      "step": 5575
+    },
+    {
+      "epoch": 0.05576,
+      "grad_norm": 0.9927093004321265,
+      "learning_rate": 0.003,
+      "loss": 4.0382,
+      "step": 5576
+    },
+    {
+      "epoch": 0.05577,
+      "grad_norm": 1.0561976942629276,
+      "learning_rate": 0.003,
+      "loss": 4.0644,
+      "step": 5577
+    },
+    {
+      "epoch": 0.05578,
+      "grad_norm": 0.9226854378392683,
+      "learning_rate": 0.003,
+      "loss": 4.0614,
+      "step": 5578
+    },
+    {
+      "epoch": 0.05579,
+      "grad_norm": 0.9660492552781841,
+      "learning_rate": 0.003,
+      "loss": 4.0787,
+      "step": 5579
+    },
+    {
+      "epoch": 0.0558,
+      "grad_norm": 1.0219080757366508,
+      "learning_rate": 0.003,
+      "loss": 4.0709,
+      "step": 5580
+    },
+    {
+      "epoch": 0.05581,
+      "grad_norm": 0.9262442274506089,
+      "learning_rate": 0.003,
+      "loss": 4.084,
+      "step": 5581
+    },
+    {
+      "epoch": 0.05582,
+      "grad_norm": 0.9589806371639085,
+      "learning_rate": 0.003,
+      "loss": 4.096,
+      "step": 5582
+    },
+    {
+      "epoch": 0.05583,
+      "grad_norm": 1.0645509578183838,
+      "learning_rate": 0.003,
+      "loss": 4.0734,
+      "step": 5583
+    },
+    {
+      "epoch": 0.05584,
+      "grad_norm": 1.051226914948146,
+      "learning_rate": 0.003,
+      "loss": 4.0703,
+      "step": 5584
+    },
+    {
+      "epoch": 0.05585,
+      "grad_norm": 1.0467720353310674,
+      "learning_rate": 0.003,
+      "loss": 4.043,
+      "step": 5585
+    },
+    {
+      "epoch": 0.05586,
+      "grad_norm": 0.9663212590556503,
+      "learning_rate": 0.003,
+      "loss": 4.0829,
+      "step": 5586
+    },
+    {
+      "epoch": 0.05587,
+      "grad_norm": 0.9185379338813185,
+      "learning_rate": 0.003,
+      "loss": 4.0844,
+      "step": 5587
+    },
+    {
+      "epoch": 0.05588,
+      "grad_norm": 0.9694934031853484,
+      "learning_rate": 0.003,
+      "loss": 4.0574,
+      "step": 5588
+    },
+    {
+      "epoch": 0.05589,
+      "grad_norm": 1.0526451937838062,
+      "learning_rate": 0.003,
+      "loss": 4.0742,
+      "step": 5589
+    },
+    {
+      "epoch": 0.0559,
+      "grad_norm": 1.1085070368071448,
+      "learning_rate": 0.003,
+      "loss": 4.0737,
+      "step": 5590
+    },
+    {
+      "epoch": 0.05591,
+      "grad_norm": 1.0162203009874562,
+      "learning_rate": 0.003,
+      "loss": 4.079,
+      "step": 5591
+    },
+    {
+      "epoch": 0.05592,
+      "grad_norm": 0.951694969481166,
+      "learning_rate": 0.003,
+      "loss": 4.0804,
+      "step": 5592
+    },
+    {
+      "epoch": 0.05593,
+      "grad_norm": 0.8175074193854779,
+      "learning_rate": 0.003,
+      "loss": 4.064,
+      "step": 5593
+    },
+    {
+      "epoch": 0.05594,
+      "grad_norm": 0.9255694773963952,
+      "learning_rate": 0.003,
+      "loss": 4.0578,
+      "step": 5594
+    },
+    {
+      "epoch": 0.05595,
+      "grad_norm": 1.0156609990647611,
+      "learning_rate": 0.003,
+      "loss": 4.0433,
+      "step": 5595
+    },
+    {
+      "epoch": 0.05596,
+      "grad_norm": 0.932955278279892,
+      "learning_rate": 0.003,
+      "loss": 4.0517,
+      "step": 5596
+    },
+    {
+      "epoch": 0.05597,
+      "grad_norm": 0.920908250325548,
+      "learning_rate": 0.003,
+      "loss": 4.0877,
+      "step": 5597
+    },
+    {
+      "epoch": 0.05598,
+      "grad_norm": 1.0626157594613586,
+      "learning_rate": 0.003,
+      "loss": 4.0501,
+      "step": 5598
+    },
+    {
+      "epoch": 0.05599,
+      "grad_norm": 0.9591722885772912,
+      "learning_rate": 0.003,
+      "loss": 4.0951,
+      "step": 5599
+    },
+    {
+      "epoch": 0.056,
+      "grad_norm": 0.8972051686796552,
+      "learning_rate": 0.003,
+      "loss": 4.0365,
+      "step": 5600
+    },
+    {
+      "epoch": 0.05601,
+      "grad_norm": 0.94039167026587,
+      "learning_rate": 0.003,
+      "loss": 4.0535,
+      "step": 5601
+    },
+    {
+      "epoch": 0.05602,
+      "grad_norm": 0.8789138951678084,
+      "learning_rate": 0.003,
+      "loss": 4.0467,
+      "step": 5602
+    },
+    {
+      "epoch": 0.05603,
+      "grad_norm": 1.0980048694137343,
+      "learning_rate": 0.003,
+      "loss": 4.0647,
+      "step": 5603
+    },
+    {
+      "epoch": 0.05604,
+      "grad_norm": 1.132678130199623,
+      "learning_rate": 0.003,
+      "loss": 4.1005,
+      "step": 5604
+    },
+    {
+      "epoch": 0.05605,
+      "grad_norm": 0.9376010958362765,
+      "learning_rate": 0.003,
+      "loss": 4.0273,
+      "step": 5605
+    },
+    {
+      "epoch": 0.05606,
+      "grad_norm": 1.012601875884022,
+      "learning_rate": 0.003,
+      "loss": 4.0823,
+      "step": 5606
+    },
+    {
+      "epoch": 0.05607,
+      "grad_norm": 1.078658031552116,
+      "learning_rate": 0.003,
+      "loss": 4.1016,
+      "step": 5607
+    },
+    {
+      "epoch": 0.05608,
+      "grad_norm": 1.0104480034658165,
+      "learning_rate": 0.003,
+      "loss": 4.0225,
+      "step": 5608
+    },
+    {
+      "epoch": 0.05609,
+      "grad_norm": 1.1587851237585396,
+      "learning_rate": 0.003,
+      "loss": 4.0737,
+      "step": 5609
+    },
+    {
+      "epoch": 0.0561,
+      "grad_norm": 0.9960041589297254,
+      "learning_rate": 0.003,
+      "loss": 4.1041,
+      "step": 5610
+    },
+    {
+      "epoch": 0.05611,
+      "grad_norm": 0.9361590301922911,
+      "learning_rate": 0.003,
+      "loss": 4.072,
+      "step": 5611
+    },
+    {
+      "epoch": 0.05612,
+      "grad_norm": 0.9764370565765741,
+      "learning_rate": 0.003,
+      "loss": 4.0647,
+      "step": 5612
+    },
+    {
+      "epoch": 0.05613,
+      "grad_norm": 1.0014577790700079,
+      "learning_rate": 0.003,
+      "loss": 4.0891,
+      "step": 5613
+    },
+    {
+      "epoch": 0.05614,
+      "grad_norm": 0.9831216849472494,
+      "learning_rate": 0.003,
+      "loss": 4.0713,
+      "step": 5614
+    },
+    {
+      "epoch": 0.05615,
+      "grad_norm": 1.1586083324328103,
+      "learning_rate": 0.003,
+      "loss": 4.0484,
+      "step": 5615
+    },
+    {
+      "epoch": 0.05616,
+      "grad_norm": 0.8890921040375313,
+      "learning_rate": 0.003,
+      "loss": 4.0553,
+      "step": 5616
+    },
+    {
+      "epoch": 0.05617,
+      "grad_norm": 0.7957949051675782,
+      "learning_rate": 0.003,
+      "loss": 4.0774,
+      "step": 5617
+    },
+    {
+      "epoch": 0.05618,
+      "grad_norm": 0.7215932182196926,
+      "learning_rate": 0.003,
+      "loss": 4.0523,
+      "step": 5618
+    },
+    {
+      "epoch": 0.05619,
+      "grad_norm": 0.7741180783292809,
+      "learning_rate": 0.003,
+      "loss": 4.1083,
+      "step": 5619
+    },
+    {
+      "epoch": 0.0562,
+      "grad_norm": 0.7511553658323501,
+      "learning_rate": 0.003,
+      "loss": 4.0637,
+      "step": 5620
+    },
+    {
+      "epoch": 0.05621,
+      "grad_norm": 0.6396239807569799,
+      "learning_rate": 0.003,
+      "loss": 4.0687,
+      "step": 5621
+    },
+    {
+      "epoch": 0.05622,
+      "grad_norm": 0.6074326667753387,
+      "learning_rate": 0.003,
+      "loss": 4.0512,
+      "step": 5622
+    },
+    {
+      "epoch": 0.05623,
+      "grad_norm": 0.6362168788902988,
+      "learning_rate": 0.003,
+      "loss": 4.0722,
+      "step": 5623
+    },
+    {
+      "epoch": 0.05624,
+      "grad_norm": 0.7164948632194134,
+      "learning_rate": 0.003,
+      "loss": 4.0905,
+      "step": 5624
+    },
+    {
+      "epoch": 0.05625,
+      "grad_norm": 0.7858917280239912,
+      "learning_rate": 0.003,
+      "loss": 4.084,
+      "step": 5625
+    },
+    {
+      "epoch": 0.05626,
+      "grad_norm": 0.9610068641748187,
+      "learning_rate": 0.003,
+      "loss": 4.0547,
+      "step": 5626
+    },
+    {
+      "epoch": 0.05627,
+      "grad_norm": 1.0076786147054788,
+      "learning_rate": 0.003,
+      "loss": 4.0661,
+      "step": 5627
+    },
+    {
+      "epoch": 0.05628,
+      "grad_norm": 1.1184221088760653,
+      "learning_rate": 0.003,
+      "loss": 4.0685,
+      "step": 5628
+    },
+    {
+      "epoch": 0.05629,
+      "grad_norm": 1.009005087989754,
+      "learning_rate": 0.003,
+      "loss": 4.0715,
+      "step": 5629
+    },
+    {
+      "epoch": 0.0563,
+      "grad_norm": 0.9675845365105762,
+      "learning_rate": 0.003,
+      "loss": 4.0532,
+      "step": 5630
+    },
+    {
+      "epoch": 0.05631,
+      "grad_norm": 0.9722083145337348,
+      "learning_rate": 0.003,
+      "loss": 4.0425,
+      "step": 5631
+    },
+    {
+      "epoch": 0.05632,
+      "grad_norm": 0.9663539092020825,
+      "learning_rate": 0.003,
+      "loss": 4.0739,
+      "step": 5632
+    },
+    {
+      "epoch": 0.05633,
+      "grad_norm": 0.9758861223474846,
+      "learning_rate": 0.003,
+      "loss": 4.0704,
+      "step": 5633
+    },
+    {
+      "epoch": 0.05634,
+      "grad_norm": 1.1099380799726346,
+      "learning_rate": 0.003,
+      "loss": 4.0551,
+      "step": 5634
+    },
+    {
+      "epoch": 0.05635,
+      "grad_norm": 0.9793289159407238,
+      "learning_rate": 0.003,
+      "loss": 4.0725,
+      "step": 5635
+    },
+    {
+      "epoch": 0.05636,
+      "grad_norm": 1.0928319649450002,
+      "learning_rate": 0.003,
+      "loss": 4.0674,
+      "step": 5636
+    },
+    {
+      "epoch": 0.05637,
+      "grad_norm": 0.9411278378658644,
+      "learning_rate": 0.003,
+      "loss": 4.1044,
+      "step": 5637
+    },
+    {
+      "epoch": 0.05638,
+      "grad_norm": 1.006716505320645,
+      "learning_rate": 0.003,
+      "loss": 4.056,
+      "step": 5638
+    },
+    {
+      "epoch": 0.05639,
+      "grad_norm": 1.140827528278389,
+      "learning_rate": 0.003,
+      "loss": 4.076,
+      "step": 5639
+    },
+    {
+      "epoch": 0.0564,
+      "grad_norm": 1.1625088389769833,
+      "learning_rate": 0.003,
+      "loss": 4.0816,
+      "step": 5640
+    },
+    {
+      "epoch": 0.05641,
+      "grad_norm": 0.9893427923995295,
+      "learning_rate": 0.003,
+      "loss": 4.0295,
+      "step": 5641
+    },
+    {
+      "epoch": 0.05642,
+      "grad_norm": 0.9716727488546525,
+      "learning_rate": 0.003,
+      "loss": 4.0966,
+      "step": 5642
+    },
+    {
+      "epoch": 0.05643,
+      "grad_norm": 0.8388359388426512,
+      "learning_rate": 0.003,
+      "loss": 4.0521,
+      "step": 5643
+    },
+    {
+      "epoch": 0.05644,
+      "grad_norm": 0.8864776479628189,
+      "learning_rate": 0.003,
+      "loss": 4.0654,
+      "step": 5644
+    },
+    {
+      "epoch": 0.05645,
+      "grad_norm": 1.0289020783924894,
+      "learning_rate": 0.003,
+      "loss": 4.058,
+      "step": 5645
+    },
+    {
+      "epoch": 0.05646,
+      "grad_norm": 1.1004794764123877,
+      "learning_rate": 0.003,
+      "loss": 4.0709,
+      "step": 5646
+    },
+    {
+      "epoch": 0.05647,
+      "grad_norm": 0.9547139445319417,
+      "learning_rate": 0.003,
+      "loss": 4.0686,
+      "step": 5647
+    },
+    {
+      "epoch": 0.05648,
+      "grad_norm": 0.967189977084837,
+      "learning_rate": 0.003,
+      "loss": 4.0816,
+      "step": 5648
+    },
+    {
+      "epoch": 0.05649,
+      "grad_norm": 0.973718841402612,
+      "learning_rate": 0.003,
+      "loss": 4.0719,
+      "step": 5649
+    },
+    {
+      "epoch": 0.0565,
+      "grad_norm": 1.02998820647941,
+      "learning_rate": 0.003,
+      "loss": 4.0905,
+      "step": 5650
+    },
+    {
+      "epoch": 0.05651,
+      "grad_norm": 0.8716926811029921,
+      "learning_rate": 0.003,
+      "loss": 4.048,
+      "step": 5651
+    },
+    {
+      "epoch": 0.05652,
+      "grad_norm": 0.8882622031970813,
+      "learning_rate": 0.003,
+      "loss": 4.0499,
+      "step": 5652
+    },
+    {
+      "epoch": 0.05653,
+      "grad_norm": 0.8791179996457095,
+      "learning_rate": 0.003,
+      "loss": 4.077,
+      "step": 5653
+    },
+    {
+      "epoch": 0.05654,
+      "grad_norm": 0.9428467074127278,
+      "learning_rate": 0.003,
+      "loss": 4.0866,
+      "step": 5654
+    },
+    {
+      "epoch": 0.05655,
+      "grad_norm": 0.9168230086253641,
+      "learning_rate": 0.003,
+      "loss": 4.0776,
+      "step": 5655
+    },
+    {
+      "epoch": 0.05656,
+      "grad_norm": 0.9563246120783195,
+      "learning_rate": 0.003,
+      "loss": 4.0765,
+      "step": 5656
+    },
+    {
+      "epoch": 0.05657,
+      "grad_norm": 1.0720010551454098,
+      "learning_rate": 0.003,
+      "loss": 4.0778,
+      "step": 5657
+    },
+    {
+      "epoch": 0.05658,
+      "grad_norm": 0.8856156383346027,
+      "learning_rate": 0.003,
+      "loss": 4.0643,
+      "step": 5658
+    },
+    {
+      "epoch": 0.05659,
+      "grad_norm": 0.8368418786449191,
+      "learning_rate": 0.003,
+      "loss": 4.0653,
+      "step": 5659
+    },
+    {
+      "epoch": 0.0566,
+      "grad_norm": 0.8374298075537798,
+      "learning_rate": 0.003,
+      "loss": 4.0704,
+      "step": 5660
+    },
+    {
+      "epoch": 0.05661,
+      "grad_norm": 0.9364487912430249,
+      "learning_rate": 0.003,
+      "loss": 4.1059,
+      "step": 5661
+    },
+    {
+      "epoch": 0.05662,
+      "grad_norm": 1.000052264217599,
+      "learning_rate": 0.003,
+      "loss": 4.0506,
+      "step": 5662
+    },
+    {
+      "epoch": 0.05663,
+      "grad_norm": 1.0446835856273087,
+      "learning_rate": 0.003,
+      "loss": 4.0731,
+      "step": 5663
+    },
+    {
+      "epoch": 0.05664,
+      "grad_norm": 0.9023506375796267,
+      "learning_rate": 0.003,
+      "loss": 4.045,
+      "step": 5664
+    },
+    {
+      "epoch": 0.05665,
+      "grad_norm": 0.977889227291688,
+      "learning_rate": 0.003,
+      "loss": 4.0567,
+      "step": 5665
+    },
+    {
+      "epoch": 0.05666,
+      "grad_norm": 1.130926840966814,
+      "learning_rate": 0.003,
+      "loss": 4.0754,
+      "step": 5666
+    },
+    {
+      "epoch": 0.05667,
+      "grad_norm": 1.0607816508483603,
+      "learning_rate": 0.003,
+      "loss": 4.0466,
+      "step": 5667
+    },
+    {
+      "epoch": 0.05668,
+      "grad_norm": 0.9792158523254157,
+      "learning_rate": 0.003,
+      "loss": 4.0887,
+      "step": 5668
+    },
+    {
+      "epoch": 0.05669,
+      "grad_norm": 0.9055981740448197,
+      "learning_rate": 0.003,
+      "loss": 4.0327,
+      "step": 5669
+    },
+    {
+      "epoch": 0.0567,
+      "grad_norm": 1.1476920087687061,
+      "learning_rate": 0.003,
+      "loss": 4.0832,
+      "step": 5670
+    },
+    {
+      "epoch": 0.05671,
+      "grad_norm": 0.8960616098490328,
+      "learning_rate": 0.003,
+      "loss": 4.0576,
+      "step": 5671
+    },
+    {
+      "epoch": 0.05672,
+      "grad_norm": 0.99321915369349,
+      "learning_rate": 0.003,
+      "loss": 4.0618,
+      "step": 5672
+    },
+    {
+      "epoch": 0.05673,
+      "grad_norm": 1.0447752766651002,
+      "learning_rate": 0.003,
+      "loss": 4.0455,
+      "step": 5673
+    },
+    {
+      "epoch": 0.05674,
+      "grad_norm": 1.077079251419245,
+      "learning_rate": 0.003,
+      "loss": 4.0823,
+      "step": 5674
+    },
+    {
+      "epoch": 0.05675,
+      "grad_norm": 0.919109346681063,
+      "learning_rate": 0.003,
+      "loss": 4.0671,
+      "step": 5675
+    },
+    {
+      "epoch": 0.05676,
+      "grad_norm": 0.9790547358628968,
+      "learning_rate": 0.003,
+      "loss": 4.0789,
+      "step": 5676
+    },
+    {
+      "epoch": 0.05677,
+      "grad_norm": 1.1793670646509047,
+      "learning_rate": 0.003,
+      "loss": 4.0818,
+      "step": 5677
+    },
+    {
+      "epoch": 0.05678,
+      "grad_norm": 0.7472606166861204,
+      "learning_rate": 0.003,
+      "loss": 4.0619,
+      "step": 5678
+    },
+    {
+      "epoch": 0.05679,
+      "grad_norm": 0.8050508896957376,
+      "learning_rate": 0.003,
+      "loss": 4.0787,
+      "step": 5679
+    },
+    {
+      "epoch": 0.0568,
+      "grad_norm": 0.9344025333558529,
+      "learning_rate": 0.003,
+      "loss": 4.0594,
+      "step": 5680
+    },
+    {
+      "epoch": 0.05681,
+      "grad_norm": 1.2348092828821307,
+      "learning_rate": 0.003,
+      "loss": 4.0683,
+      "step": 5681
+    },
+    {
+      "epoch": 0.05682,
+      "grad_norm": 1.1959381020011484,
+      "learning_rate": 0.003,
+      "loss": 4.0818,
+      "step": 5682
+    },
+    {
+      "epoch": 0.05683,
+      "grad_norm": 0.8976257998161941,
+      "learning_rate": 0.003,
+      "loss": 4.0345,
+      "step": 5683
+    },
+    {
+      "epoch": 0.05684,
+      "grad_norm": 0.7252712065821193,
+      "learning_rate": 0.003,
+      "loss": 4.0424,
+      "step": 5684
+    },
+    {
+      "epoch": 0.05685,
+      "grad_norm": 0.8829690678207763,
+      "learning_rate": 0.003,
+      "loss": 4.0853,
+      "step": 5685
+    },
+    {
+      "epoch": 0.05686,
+      "grad_norm": 1.1058469730277019,
+      "learning_rate": 0.003,
+      "loss": 4.0923,
+      "step": 5686
+    },
+    {
+      "epoch": 0.05687,
+      "grad_norm": 0.9611307746153006,
+      "learning_rate": 0.003,
+      "loss": 4.0397,
+      "step": 5687
+    },
+    {
+      "epoch": 0.05688,
+      "grad_norm": 0.972565534030192,
+      "learning_rate": 0.003,
+      "loss": 4.0554,
+      "step": 5688
+    },
+    {
+      "epoch": 0.05689,
+      "grad_norm": 1.096241549462336,
+      "learning_rate": 0.003,
+      "loss": 4.044,
+      "step": 5689
+    },
+    {
+      "epoch": 0.0569,
+      "grad_norm": 0.8973939245294639,
+      "learning_rate": 0.003,
+      "loss": 4.061,
+      "step": 5690
+    },
+    {
+      "epoch": 0.05691,
+      "grad_norm": 1.1419881519139168,
+      "learning_rate": 0.003,
+      "loss": 4.0996,
+      "step": 5691
+    },
+    {
+      "epoch": 0.05692,
+      "grad_norm": 0.9949113970884143,
+      "learning_rate": 0.003,
+      "loss": 4.0696,
+      "step": 5692
+    },
+    {
+      "epoch": 0.05693,
+      "grad_norm": 0.8512031165359729,
+      "learning_rate": 0.003,
+      "loss": 4.0674,
+      "step": 5693
+    },
+    {
+      "epoch": 0.05694,
+      "grad_norm": 0.9426191438665297,
+      "learning_rate": 0.003,
+      "loss": 4.0481,
+      "step": 5694
+    },
+    {
+      "epoch": 0.05695,
+      "grad_norm": 0.9020092132644848,
+      "learning_rate": 0.003,
+      "loss": 4.0481,
+      "step": 5695
+    },
+    {
+      "epoch": 0.05696,
+      "grad_norm": 0.8485145899081942,
+      "learning_rate": 0.003,
+      "loss": 4.053,
+      "step": 5696
+    },
+    {
+      "epoch": 0.05697,
+      "grad_norm": 0.8561615924770996,
+      "learning_rate": 0.003,
+      "loss": 4.0902,
+      "step": 5697
+    },
+    {
+      "epoch": 0.05698,
+      "grad_norm": 0.8260144341160308,
+      "learning_rate": 0.003,
+      "loss": 4.085,
+      "step": 5698
+    },
+    {
+      "epoch": 0.05699,
+      "grad_norm": 0.966693143570733,
+      "learning_rate": 0.003,
+      "loss": 4.0727,
+      "step": 5699
+    },
+    {
+      "epoch": 0.057,
+      "grad_norm": 1.1487198582029357,
+      "learning_rate": 0.003,
+      "loss": 4.0577,
+      "step": 5700
+    },
+    {
+      "epoch": 0.05701,
+      "grad_norm": 1.0854758797164454,
+      "learning_rate": 0.003,
+      "loss": 4.0818,
+      "step": 5701
+    },
+    {
+      "epoch": 0.05702,
+      "grad_norm": 0.8835018328849502,
+      "learning_rate": 0.003,
+      "loss": 4.0552,
+      "step": 5702
+    },
+    {
+      "epoch": 0.05703,
+      "grad_norm": 0.7845742412565407,
+      "learning_rate": 0.003,
+      "loss": 4.0972,
+      "step": 5703
+    },
+    {
+      "epoch": 0.05704,
+      "grad_norm": 0.8498642388489823,
+      "learning_rate": 0.003,
+      "loss": 4.0557,
+      "step": 5704
+    },
+    {
+      "epoch": 0.05705,
+      "grad_norm": 1.1326574547883799,
+      "learning_rate": 0.003,
+      "loss": 4.0631,
+      "step": 5705
+    },
+    {
+      "epoch": 0.05706,
+      "grad_norm": 1.1434161313327718,
+      "learning_rate": 0.003,
+      "loss": 4.0632,
+      "step": 5706
+    },
+    {
+      "epoch": 0.05707,
+      "grad_norm": 0.8510990859187535,
+      "learning_rate": 0.003,
+      "loss": 4.058,
+      "step": 5707
+    },
+    {
+      "epoch": 0.05708,
+      "grad_norm": 0.8230771160938593,
+      "learning_rate": 0.003,
+      "loss": 4.0924,
+      "step": 5708
+    },
+    {
+      "epoch": 0.05709,
+      "grad_norm": 0.8626308606823649,
+      "learning_rate": 0.003,
+      "loss": 4.0661,
+      "step": 5709
+    },
+    {
+      "epoch": 0.0571,
+      "grad_norm": 0.8749237183120739,
+      "learning_rate": 0.003,
+      "loss": 4.0483,
+      "step": 5710
+    },
+    {
+      "epoch": 0.05711,
+      "grad_norm": 1.0435884220325964,
+      "learning_rate": 0.003,
+      "loss": 4.0818,
+      "step": 5711
+    },
+    {
+      "epoch": 0.05712,
+      "grad_norm": 1.1753101650891566,
+      "learning_rate": 0.003,
+      "loss": 4.0589,
+      "step": 5712
+    },
+    {
+      "epoch": 0.05713,
+      "grad_norm": 0.9392540388426064,
+      "learning_rate": 0.003,
+      "loss": 4.0568,
+      "step": 5713
+    },
+    {
+      "epoch": 0.05714,
+      "grad_norm": 1.1219818951823304,
+      "learning_rate": 0.003,
+      "loss": 4.1143,
+      "step": 5714
+    },
+    {
+      "epoch": 0.05715,
+      "grad_norm": 0.9027198476231381,
+      "learning_rate": 0.003,
+      "loss": 4.1093,
+      "step": 5715
+    },
+    {
+      "epoch": 0.05716,
+      "grad_norm": 0.9350712749494232,
+      "learning_rate": 0.003,
+      "loss": 4.0788,
+      "step": 5716
+    },
+    {
+      "epoch": 0.05717,
+      "grad_norm": 1.078949022904377,
+      "learning_rate": 0.003,
+      "loss": 4.0947,
+      "step": 5717
+    },
+    {
+      "epoch": 0.05718,
+      "grad_norm": 0.9320114628230994,
+      "learning_rate": 0.003,
+      "loss": 4.0732,
+      "step": 5718
+    },
+    {
+      "epoch": 0.05719,
+      "grad_norm": 1.0737725128729791,
+      "learning_rate": 0.003,
+      "loss": 4.0798,
+      "step": 5719
+    },
+    {
+      "epoch": 0.0572,
+      "grad_norm": 1.4370901793089597,
+      "learning_rate": 0.003,
+      "loss": 4.0809,
+      "step": 5720
+    },
+    {
+      "epoch": 0.05721,
+      "grad_norm": 0.6835699725834176,
+      "learning_rate": 0.003,
+      "loss": 4.0512,
+      "step": 5721
+    },
+    {
+      "epoch": 0.05722,
+      "grad_norm": 0.9019947466995998,
+      "learning_rate": 0.003,
+      "loss": 4.0632,
+      "step": 5722
+    },
+    {
+      "epoch": 0.05723,
+      "grad_norm": 0.9478261688039891,
+      "learning_rate": 0.003,
+      "loss": 4.0664,
+      "step": 5723
+    },
+    {
+      "epoch": 0.05724,
+      "grad_norm": 0.9063450900670779,
+      "learning_rate": 0.003,
+      "loss": 4.0646,
+      "step": 5724
+    },
+    {
+      "epoch": 0.05725,
+      "grad_norm": 0.8956563510530542,
+      "learning_rate": 0.003,
+      "loss": 4.0772,
+      "step": 5725
+    },
+    {
+      "epoch": 0.05726,
+      "grad_norm": 0.944931440902404,
+      "learning_rate": 0.003,
+      "loss": 4.0655,
+      "step": 5726
+    },
+    {
+      "epoch": 0.05727,
+      "grad_norm": 0.9785206525274993,
+      "learning_rate": 0.003,
+      "loss": 4.0527,
+      "step": 5727
+    },
+    {
+      "epoch": 0.05728,
+      "grad_norm": 0.9750029413661264,
+      "learning_rate": 0.003,
+      "loss": 4.0822,
+      "step": 5728
+    },
+    {
+      "epoch": 0.05729,
+      "grad_norm": 0.9458877993555904,
+      "learning_rate": 0.003,
+      "loss": 4.0644,
+      "step": 5729
+    },
+    {
+      "epoch": 0.0573,
+      "grad_norm": 0.8220018329351471,
+      "learning_rate": 0.003,
+      "loss": 4.0298,
+      "step": 5730
+    },
+    {
+      "epoch": 0.05731,
+      "grad_norm": 0.8935219092820262,
+      "learning_rate": 0.003,
+      "loss": 4.0499,
+      "step": 5731
+    },
+    {
+      "epoch": 0.05732,
+      "grad_norm": 1.011416798805237,
+      "learning_rate": 0.003,
+      "loss": 4.083,
+      "step": 5732
+    },
+    {
+      "epoch": 0.05733,
+      "grad_norm": 1.191217640923215,
+      "learning_rate": 0.003,
+      "loss": 4.0948,
+      "step": 5733
+    },
+    {
+      "epoch": 0.05734,
+      "grad_norm": 0.9117001751188784,
+      "learning_rate": 0.003,
+      "loss": 4.087,
+      "step": 5734
+    },
+    {
+      "epoch": 0.05735,
+      "grad_norm": 0.7434085878525878,
+      "learning_rate": 0.003,
+      "loss": 4.0458,
+      "step": 5735
+    },
+    {
+      "epoch": 0.05736,
+      "grad_norm": 0.7886018670166627,
+      "learning_rate": 0.003,
+      "loss": 4.0733,
+      "step": 5736
+    },
+    {
+      "epoch": 0.05737,
+      "grad_norm": 0.8276223745361921,
+      "learning_rate": 0.003,
+      "loss": 4.0646,
+      "step": 5737
+    },
+    {
+      "epoch": 0.05738,
+      "grad_norm": 0.9579259513547983,
+      "learning_rate": 0.003,
+      "loss": 4.0419,
+      "step": 5738
+    },
+    {
+      "epoch": 0.05739,
+      "grad_norm": 0.9985511509251391,
+      "learning_rate": 0.003,
+      "loss": 4.054,
+      "step": 5739
+    },
+    {
+      "epoch": 0.0574,
+      "grad_norm": 0.9372357444408635,
+      "learning_rate": 0.003,
+      "loss": 4.0562,
+      "step": 5740
+    },
+    {
+      "epoch": 0.05741,
+      "grad_norm": 1.037244120040037,
+      "learning_rate": 0.003,
+      "loss": 4.0494,
+      "step": 5741
+    },
+    {
+      "epoch": 0.05742,
+      "grad_norm": 1.0925704013324562,
+      "learning_rate": 0.003,
+      "loss": 4.0667,
+      "step": 5742
+    },
+    {
+      "epoch": 0.05743,
+      "grad_norm": 0.9822101766913746,
+      "learning_rate": 0.003,
+      "loss": 4.0591,
+      "step": 5743
+    },
+    {
+      "epoch": 0.05744,
+      "grad_norm": 0.9415350376527191,
+      "learning_rate": 0.003,
+      "loss": 4.071,
+      "step": 5744
+    },
+    {
+      "epoch": 0.05745,
+      "grad_norm": 0.8514513969086831,
+      "learning_rate": 0.003,
+      "loss": 4.0658,
+      "step": 5745
+    },
+    {
+      "epoch": 0.05746,
+      "grad_norm": 0.9590813051602415,
+      "learning_rate": 0.003,
+      "loss": 4.0516,
+      "step": 5746
+    },
+    {
+      "epoch": 0.05747,
+      "grad_norm": 1.033811045759274,
+      "learning_rate": 0.003,
+      "loss": 4.0826,
+      "step": 5747
+    },
+    {
+      "epoch": 0.05748,
+      "grad_norm": 0.9881694198935187,
+      "learning_rate": 0.003,
+      "loss": 4.0998,
+      "step": 5748
+    },
+    {
+      "epoch": 0.05749,
+      "grad_norm": 1.2197505780445184,
+      "learning_rate": 0.003,
+      "loss": 4.0589,
+      "step": 5749
+    },
+    {
+      "epoch": 0.0575,
+      "grad_norm": 0.8960825057299955,
+      "learning_rate": 0.003,
+      "loss": 4.0731,
+      "step": 5750
+    },
+    {
+      "epoch": 0.05751,
+      "grad_norm": 0.9380756538662388,
+      "learning_rate": 0.003,
+      "loss": 4.09,
+      "step": 5751
+    },
+    {
+      "epoch": 0.05752,
+      "grad_norm": 1.0494754059465994,
+      "learning_rate": 0.003,
+      "loss": 4.0827,
+      "step": 5752
+    },
+    {
+      "epoch": 0.05753,
+      "grad_norm": 0.9737106865451927,
+      "learning_rate": 0.003,
+      "loss": 4.0695,
+      "step": 5753
+    },
+    {
+      "epoch": 0.05754,
+      "grad_norm": 1.14044841129864,
+      "learning_rate": 0.003,
+      "loss": 4.0745,
+      "step": 5754
+    },
+    {
+      "epoch": 0.05755,
+      "grad_norm": 0.9859781548984715,
+      "learning_rate": 0.003,
+      "loss": 4.054,
+      "step": 5755
+    },
+    {
+      "epoch": 0.05756,
+      "grad_norm": 1.1440582623675561,
+      "learning_rate": 0.003,
+      "loss": 4.0929,
+      "step": 5756
+    },
+    {
+      "epoch": 0.05757,
+      "grad_norm": 1.0784582404787597,
+      "learning_rate": 0.003,
+      "loss": 4.0793,
+      "step": 5757
+    },
+    {
+      "epoch": 0.05758,
+      "grad_norm": 1.0238978196194495,
+      "learning_rate": 0.003,
+      "loss": 4.0714,
+      "step": 5758
+    },
+    {
+      "epoch": 0.05759,
+      "grad_norm": 1.1962555833625972,
+      "learning_rate": 0.003,
+      "loss": 4.0785,
+      "step": 5759
+    },
+    {
+      "epoch": 0.0576,
+      "grad_norm": 0.8606928693615246,
+      "learning_rate": 0.003,
+      "loss": 4.0673,
+      "step": 5760
+    },
+    {
+      "epoch": 0.05761,
+      "grad_norm": 0.8065313954609131,
+      "learning_rate": 0.003,
+      "loss": 4.0573,
+      "step": 5761
+    },
+    {
+      "epoch": 0.05762,
+      "grad_norm": 1.006617466336666,
+      "learning_rate": 0.003,
+      "loss": 4.0473,
+      "step": 5762
+    },
+    {
+      "epoch": 0.05763,
+      "grad_norm": 1.1736455692173053,
+      "learning_rate": 0.003,
+      "loss": 4.0705,
+      "step": 5763
+    },
+    {
+      "epoch": 0.05764,
+      "grad_norm": 1.0156358262996825,
+      "learning_rate": 0.003,
+      "loss": 4.0641,
+      "step": 5764
+    },
+    {
+      "epoch": 0.05765,
+      "grad_norm": 1.1676317429304073,
+      "learning_rate": 0.003,
+      "loss": 4.0792,
+      "step": 5765
+    },
+    {
+      "epoch": 0.05766,
+      "grad_norm": 0.9296797123737459,
+      "learning_rate": 0.003,
+      "loss": 4.0741,
+      "step": 5766
+    },
+    {
+      "epoch": 0.05767,
+      "grad_norm": 1.108411398313796,
+      "learning_rate": 0.003,
+      "loss": 4.0659,
+      "step": 5767
+    },
+    {
+      "epoch": 0.05768,
+      "grad_norm": 0.8873418356321205,
+      "learning_rate": 0.003,
+      "loss": 4.0824,
+      "step": 5768
+    },
+    {
+      "epoch": 0.05769,
+      "grad_norm": 0.8943391072499615,
+      "learning_rate": 0.003,
+      "loss": 4.0656,
+      "step": 5769
+    },
+    {
+      "epoch": 0.0577,
+      "grad_norm": 0.7643635046640151,
+      "learning_rate": 0.003,
+      "loss": 4.0669,
+      "step": 5770
+    },
+    {
+      "epoch": 0.05771,
+      "grad_norm": 0.8171732047022162,
+      "learning_rate": 0.003,
+      "loss": 4.073,
+      "step": 5771
+    },
+    {
+      "epoch": 0.05772,
+      "grad_norm": 0.9195825274621192,
+      "learning_rate": 0.003,
+      "loss": 4.0851,
+      "step": 5772
+    },
+    {
+      "epoch": 0.05773,
+      "grad_norm": 1.1643144465255408,
+      "learning_rate": 0.003,
+      "loss": 4.0861,
+      "step": 5773
+    },
+    {
+      "epoch": 0.05774,
+      "grad_norm": 1.0670738905589834,
+      "learning_rate": 0.003,
+      "loss": 4.0581,
+      "step": 5774
+    },
+    {
+      "epoch": 0.05775,
+      "grad_norm": 1.1379995423607696,
+      "learning_rate": 0.003,
+      "loss": 4.0635,
+      "step": 5775
+    },
+    {
+      "epoch": 0.05776,
+      "grad_norm": 1.0629804758835848,
+      "learning_rate": 0.003,
+      "loss": 4.0594,
+      "step": 5776
+    },
+    {
+      "epoch": 0.05777,
+      "grad_norm": 0.9806929321830686,
+      "learning_rate": 0.003,
+      "loss": 4.075,
+      "step": 5777
+    },
+    {
+      "epoch": 0.05778,
+      "grad_norm": 1.0104200584581524,
+      "learning_rate": 0.003,
+      "loss": 4.061,
+      "step": 5778
+    },
+    {
+      "epoch": 0.05779,
+      "grad_norm": 1.0501375318209158,
+      "learning_rate": 0.003,
+      "loss": 4.0847,
+      "step": 5779
+    },
+    {
+      "epoch": 0.0578,
+      "grad_norm": 1.0936198888153104,
+      "learning_rate": 0.003,
+      "loss": 4.0439,
+      "step": 5780
+    },
+    {
+      "epoch": 0.05781,
+      "grad_norm": 1.0599398702570635,
+      "learning_rate": 0.003,
+      "loss": 4.0555,
+      "step": 5781
+    },
+    {
+      "epoch": 0.05782,
+      "grad_norm": 0.8124457081867787,
+      "learning_rate": 0.003,
+      "loss": 4.0822,
+      "step": 5782
+    },
+    {
+      "epoch": 0.05783,
+      "grad_norm": 0.8785108285243248,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 5783
+    },
+    {
+      "epoch": 0.05784,
+      "grad_norm": 0.9665672722542055,
+      "learning_rate": 0.003,
+      "loss": 4.0681,
+      "step": 5784
+    },
+    {
+      "epoch": 0.05785,
+      "grad_norm": 1.0852885954128955,
+      "learning_rate": 0.003,
+      "loss": 4.0851,
+      "step": 5785
+    },
+    {
+      "epoch": 0.05786,
+      "grad_norm": 0.9695946116446583,
+      "learning_rate": 0.003,
+      "loss": 4.0711,
+      "step": 5786
+    },
+    {
+      "epoch": 0.05787,
+      "grad_norm": 1.0650628416583132,
+      "learning_rate": 0.003,
+      "loss": 4.0481,
+      "step": 5787
+    },
+    {
+      "epoch": 0.05788,
+      "grad_norm": 0.7594393281247598,
+      "learning_rate": 0.003,
+      "loss": 4.0593,
+      "step": 5788
+    },
+    {
+      "epoch": 0.05789,
+      "grad_norm": 0.7541805830813078,
+      "learning_rate": 0.003,
+      "loss": 4.0576,
+      "step": 5789
+    },
+    {
+      "epoch": 0.0579,
+      "grad_norm": 0.8652244813315739,
+      "learning_rate": 0.003,
+      "loss": 4.0775,
+      "step": 5790
+    },
+    {
+      "epoch": 0.05791,
+      "grad_norm": 0.8818398092099728,
+      "learning_rate": 0.003,
+      "loss": 4.0567,
+      "step": 5791
+    },
+    {
+      "epoch": 0.05792,
+      "grad_norm": 0.7291575921298588,
+      "learning_rate": 0.003,
+      "loss": 4.054,
+      "step": 5792
+    },
+    {
+      "epoch": 0.05793,
+      "grad_norm": 0.7978555275990008,
+      "learning_rate": 0.003,
+      "loss": 4.0461,
+      "step": 5793
+    },
+    {
+      "epoch": 0.05794,
+      "grad_norm": 0.9171123664036167,
+      "learning_rate": 0.003,
+      "loss": 4.0626,
+      "step": 5794
+    },
+    {
+      "epoch": 0.05795,
+      "grad_norm": 0.8607991972557187,
+      "learning_rate": 0.003,
+      "loss": 4.0958,
+      "step": 5795
+    },
+    {
+      "epoch": 0.05796,
+      "grad_norm": 0.7463660684838923,
+      "learning_rate": 0.003,
+      "loss": 4.0635,
+      "step": 5796
+    },
+    {
+      "epoch": 0.05797,
+      "grad_norm": 0.7284446091050844,
+      "learning_rate": 0.003,
+      "loss": 4.0618,
+      "step": 5797
+    },
+    {
+      "epoch": 0.05798,
+      "grad_norm": 0.9660475572533875,
+      "learning_rate": 0.003,
+      "loss": 4.0459,
+      "step": 5798
+    },
+    {
+      "epoch": 0.05799,
+      "grad_norm": 1.5107968538940575,
+      "learning_rate": 0.003,
+      "loss": 4.0941,
+      "step": 5799
+    },
+    {
+      "epoch": 0.058,
+      "grad_norm": 0.8675842372107159,
+      "learning_rate": 0.003,
+      "loss": 4.0793,
+      "step": 5800
+    },
+    {
+      "epoch": 0.05801,
+      "grad_norm": 1.0182860429769818,
+      "learning_rate": 0.003,
+      "loss": 4.0686,
+      "step": 5801
+    },
+    {
+      "epoch": 0.05802,
+      "grad_norm": 0.8847441154270007,
+      "learning_rate": 0.003,
+      "loss": 4.0703,
+      "step": 5802
+    },
+    {
+      "epoch": 0.05803,
+      "grad_norm": 0.9726482455890011,
+      "learning_rate": 0.003,
+      "loss": 4.0323,
+      "step": 5803
+    },
+    {
+      "epoch": 0.05804,
+      "grad_norm": 1.0017317638615917,
+      "learning_rate": 0.003,
+      "loss": 4.0292,
+      "step": 5804
+    },
+    {
+      "epoch": 0.05805,
+      "grad_norm": 1.1460034384721858,
+      "learning_rate": 0.003,
+      "loss": 4.0394,
+      "step": 5805
+    },
+    {
+      "epoch": 0.05806,
+      "grad_norm": 0.8265329809662282,
+      "learning_rate": 0.003,
+      "loss": 4.0704,
+      "step": 5806
+    },
+    {
+      "epoch": 0.05807,
+      "grad_norm": 0.7999918311654125,
+      "learning_rate": 0.003,
+      "loss": 4.0749,
+      "step": 5807
+    },
+    {
+      "epoch": 0.05808,
+      "grad_norm": 0.7978362275046719,
+      "learning_rate": 0.003,
+      "loss": 4.0842,
+      "step": 5808
+    },
+    {
+      "epoch": 0.05809,
+      "grad_norm": 0.8854482959350243,
+      "learning_rate": 0.003,
+      "loss": 4.0865,
+      "step": 5809
+    },
+    {
+      "epoch": 0.0581,
+      "grad_norm": 1.0871463000956632,
+      "learning_rate": 0.003,
+      "loss": 4.0384,
+      "step": 5810
+    },
+    {
+      "epoch": 0.05811,
+      "grad_norm": 1.1086945676399056,
+      "learning_rate": 0.003,
+      "loss": 4.0577,
+      "step": 5811
+    },
+    {
+      "epoch": 0.05812,
+      "grad_norm": 0.9304552613092574,
+      "learning_rate": 0.003,
+      "loss": 4.0565,
+      "step": 5812
+    },
+    {
+      "epoch": 0.05813,
+      "grad_norm": 0.9983452335160318,
+      "learning_rate": 0.003,
+      "loss": 4.0698,
+      "step": 5813
+    },
+    {
+      "epoch": 0.05814,
+      "grad_norm": 0.9975951286224842,
+      "learning_rate": 0.003,
+      "loss": 4.0537,
+      "step": 5814
+    },
+    {
+      "epoch": 0.05815,
+      "grad_norm": 0.8924647102734461,
+      "learning_rate": 0.003,
+      "loss": 4.0289,
+      "step": 5815
+    },
+    {
+      "epoch": 0.05816,
+      "grad_norm": 0.9225284620016561,
+      "learning_rate": 0.003,
+      "loss": 4.0736,
+      "step": 5816
+    },
+    {
+      "epoch": 0.05817,
+      "grad_norm": 0.9979994283490636,
+      "learning_rate": 0.003,
+      "loss": 4.0855,
+      "step": 5817
+    },
+    {
+      "epoch": 0.05818,
+      "grad_norm": 1.0439768447035485,
+      "learning_rate": 0.003,
+      "loss": 4.0675,
+      "step": 5818
+    },
+    {
+      "epoch": 0.05819,
+      "grad_norm": 1.033649032838979,
+      "learning_rate": 0.003,
+      "loss": 4.06,
+      "step": 5819
+    },
+    {
+      "epoch": 0.0582,
+      "grad_norm": 1.1096343593879232,
+      "learning_rate": 0.003,
+      "loss": 4.0693,
+      "step": 5820
+    },
+    {
+      "epoch": 0.05821,
+      "grad_norm": 0.9004393239006013,
+      "learning_rate": 0.003,
+      "loss": 4.0785,
+      "step": 5821
+    },
+    {
+      "epoch": 0.05822,
+      "grad_norm": 1.0863173751230966,
+      "learning_rate": 0.003,
+      "loss": 4.043,
+      "step": 5822
+    },
+    {
+      "epoch": 0.05823,
+      "grad_norm": 1.0599324875126475,
+      "learning_rate": 0.003,
+      "loss": 4.0747,
+      "step": 5823
+    },
+    {
+      "epoch": 0.05824,
+      "grad_norm": 0.8876300345105752,
+      "learning_rate": 0.003,
+      "loss": 4.0721,
+      "step": 5824
+    },
+    {
+      "epoch": 0.05825,
+      "grad_norm": 1.0008579052062283,
+      "learning_rate": 0.003,
+      "loss": 4.0976,
+      "step": 5825
+    },
+    {
+      "epoch": 0.05826,
+      "grad_norm": 1.1748225539903427,
+      "learning_rate": 0.003,
+      "loss": 4.0533,
+      "step": 5826
+    },
+    {
+      "epoch": 0.05827,
+      "grad_norm": 0.9593447519304307,
+      "learning_rate": 0.003,
+      "loss": 4.0775,
+      "step": 5827
+    },
+    {
+      "epoch": 0.05828,
+      "grad_norm": 1.0225296000471948,
+      "learning_rate": 0.003,
+      "loss": 4.0345,
+      "step": 5828
+    },
+    {
+      "epoch": 0.05829,
+      "grad_norm": 1.0560219694488389,
+      "learning_rate": 0.003,
+      "loss": 4.0774,
+      "step": 5829
+    },
+    {
+      "epoch": 0.0583,
+      "grad_norm": 0.9860898705161171,
+      "learning_rate": 0.003,
+      "loss": 4.0239,
+      "step": 5830
+    },
+    {
+      "epoch": 0.05831,
+      "grad_norm": 1.1250031799560922,
+      "learning_rate": 0.003,
+      "loss": 4.0403,
+      "step": 5831
+    },
+    {
+      "epoch": 0.05832,
+      "grad_norm": 1.157236041461952,
+      "learning_rate": 0.003,
+      "loss": 4.0901,
+      "step": 5832
+    },
+    {
+      "epoch": 0.05833,
+      "grad_norm": 0.978478470266213,
+      "learning_rate": 0.003,
+      "loss": 4.0713,
+      "step": 5833
+    },
+    {
+      "epoch": 0.05834,
+      "grad_norm": 0.9798668444646972,
+      "learning_rate": 0.003,
+      "loss": 4.0828,
+      "step": 5834
+    },
+    {
+      "epoch": 0.05835,
+      "grad_norm": 0.7832001924434391,
+      "learning_rate": 0.003,
+      "loss": 4.0929,
+      "step": 5835
+    },
+    {
+      "epoch": 0.05836,
+      "grad_norm": 0.8249432319588443,
+      "learning_rate": 0.003,
+      "loss": 4.074,
+      "step": 5836
+    },
+    {
+      "epoch": 0.05837,
+      "grad_norm": 0.9729603126849787,
+      "learning_rate": 0.003,
+      "loss": 4.0738,
+      "step": 5837
+    },
+    {
+      "epoch": 0.05838,
+      "grad_norm": 0.9502514262519923,
+      "learning_rate": 0.003,
+      "loss": 4.076,
+      "step": 5838
+    },
+    {
+      "epoch": 0.05839,
+      "grad_norm": 0.9267029498844601,
+      "learning_rate": 0.003,
+      "loss": 4.0745,
+      "step": 5839
+    },
+    {
+      "epoch": 0.0584,
+      "grad_norm": 0.9657962011078538,
+      "learning_rate": 0.003,
+      "loss": 4.0937,
+      "step": 5840
+    },
+    {
+      "epoch": 0.05841,
+      "grad_norm": 1.0401049872356143,
+      "learning_rate": 0.003,
+      "loss": 4.0518,
+      "step": 5841
+    },
+    {
+      "epoch": 0.05842,
+      "grad_norm": 1.1291241708881259,
+      "learning_rate": 0.003,
+      "loss": 4.0677,
+      "step": 5842
+    },
+    {
+      "epoch": 0.05843,
+      "grad_norm": 1.0657328740989145,
+      "learning_rate": 0.003,
+      "loss": 4.07,
+      "step": 5843
+    },
+    {
+      "epoch": 0.05844,
+      "grad_norm": 0.936956030075049,
+      "learning_rate": 0.003,
+      "loss": 4.0636,
+      "step": 5844
+    },
+    {
+      "epoch": 0.05845,
+      "grad_norm": 0.954122564913459,
+      "learning_rate": 0.003,
+      "loss": 4.063,
+      "step": 5845
+    },
+    {
+      "epoch": 0.05846,
+      "grad_norm": 1.0055872032953703,
+      "learning_rate": 0.003,
+      "loss": 4.0577,
+      "step": 5846
+    },
+    {
+      "epoch": 0.05847,
+      "grad_norm": 1.0847001970093764,
+      "learning_rate": 0.003,
+      "loss": 4.0895,
+      "step": 5847
+    },
+    {
+      "epoch": 0.05848,
+      "grad_norm": 0.9328803987155841,
+      "learning_rate": 0.003,
+      "loss": 4.0603,
+      "step": 5848
+    },
+    {
+      "epoch": 0.05849,
+      "grad_norm": 1.0216731452531402,
+      "learning_rate": 0.003,
+      "loss": 4.0272,
+      "step": 5849
+    },
+    {
+      "epoch": 0.0585,
+      "grad_norm": 1.042034861189281,
+      "learning_rate": 0.003,
+      "loss": 4.0597,
+      "step": 5850
+    },
+    {
+      "epoch": 0.05851,
+      "grad_norm": 0.9696375907305427,
+      "learning_rate": 0.003,
+      "loss": 4.0506,
+      "step": 5851
+    },
+    {
+      "epoch": 0.05852,
+      "grad_norm": 0.9548659469705912,
+      "learning_rate": 0.003,
+      "loss": 4.0562,
+      "step": 5852
+    },
+    {
+      "epoch": 0.05853,
+      "grad_norm": 0.9934590507196531,
+      "learning_rate": 0.003,
+      "loss": 4.045,
+      "step": 5853
+    },
+    {
+      "epoch": 0.05854,
+      "grad_norm": 1.109837806391397,
+      "learning_rate": 0.003,
+      "loss": 4.1178,
+      "step": 5854
+    },
+    {
+      "epoch": 0.05855,
+      "grad_norm": 0.9612662531040636,
+      "learning_rate": 0.003,
+      "loss": 4.0724,
+      "step": 5855
+    },
+    {
+      "epoch": 0.05856,
+      "grad_norm": 0.9262688978815322,
+      "learning_rate": 0.003,
+      "loss": 4.0639,
+      "step": 5856
+    },
+    {
+      "epoch": 0.05857,
+      "grad_norm": 1.0672841758813956,
+      "learning_rate": 0.003,
+      "loss": 4.0825,
+      "step": 5857
+    },
+    {
+      "epoch": 0.05858,
+      "grad_norm": 0.9924933017958931,
+      "learning_rate": 0.003,
+      "loss": 4.0483,
+      "step": 5858
+    },
+    {
+      "epoch": 0.05859,
+      "grad_norm": 1.1446711154327815,
+      "learning_rate": 0.003,
+      "loss": 4.076,
+      "step": 5859
+    },
+    {
+      "epoch": 0.0586,
+      "grad_norm": 0.9395157838833672,
+      "learning_rate": 0.003,
+      "loss": 4.0778,
+      "step": 5860
+    },
+    {
+      "epoch": 0.05861,
+      "grad_norm": 1.017783268004518,
+      "learning_rate": 0.003,
+      "loss": 4.0939,
+      "step": 5861
+    },
+    {
+      "epoch": 0.05862,
+      "grad_norm": 0.8461181473215044,
+      "learning_rate": 0.003,
+      "loss": 4.0831,
+      "step": 5862
+    },
+    {
+      "epoch": 0.05863,
+      "grad_norm": 0.727622134447459,
+      "learning_rate": 0.003,
+      "loss": 4.0781,
+      "step": 5863
+    },
+    {
+      "epoch": 0.05864,
+      "grad_norm": 0.7787266994865726,
+      "learning_rate": 0.003,
+      "loss": 4.0552,
+      "step": 5864
+    },
+    {
+      "epoch": 0.05865,
+      "grad_norm": 0.9457836140127373,
+      "learning_rate": 0.003,
+      "loss": 4.0523,
+      "step": 5865
+    },
+    {
+      "epoch": 0.05866,
+      "grad_norm": 1.1590380983819644,
+      "learning_rate": 0.003,
+      "loss": 4.079,
+      "step": 5866
+    },
+    {
+      "epoch": 0.05867,
+      "grad_norm": 0.8661814704210331,
+      "learning_rate": 0.003,
+      "loss": 4.0562,
+      "step": 5867
+    },
+    {
+      "epoch": 0.05868,
+      "grad_norm": 0.7841997946264219,
+      "learning_rate": 0.003,
+      "loss": 4.0416,
+      "step": 5868
+    },
+    {
+      "epoch": 0.05869,
+      "grad_norm": 0.9021971418732975,
+      "learning_rate": 0.003,
+      "loss": 4.0555,
+      "step": 5869
+    },
+    {
+      "epoch": 0.0587,
+      "grad_norm": 1.044429489265089,
+      "learning_rate": 0.003,
+      "loss": 4.0608,
+      "step": 5870
+    },
+    {
+      "epoch": 0.05871,
+      "grad_norm": 0.9528123707330137,
+      "learning_rate": 0.003,
+      "loss": 4.0593,
+      "step": 5871
+    },
+    {
+      "epoch": 0.05872,
+      "grad_norm": 1.0247575002414453,
+      "learning_rate": 0.003,
+      "loss": 4.0553,
+      "step": 5872
+    },
+    {
+      "epoch": 0.05873,
+      "grad_norm": 1.1855135281087743,
+      "learning_rate": 0.003,
+      "loss": 4.0769,
+      "step": 5873
+    },
+    {
+      "epoch": 0.05874,
+      "grad_norm": 0.9873466030307598,
+      "learning_rate": 0.003,
+      "loss": 4.0633,
+      "step": 5874
+    },
+    {
+      "epoch": 0.05875,
+      "grad_norm": 1.1663397858316737,
+      "learning_rate": 0.003,
+      "loss": 4.078,
+      "step": 5875
+    },
+    {
+      "epoch": 0.05876,
+      "grad_norm": 1.2258462137763335,
+      "learning_rate": 0.003,
+      "loss": 4.0736,
+      "step": 5876
+    },
+    {
+      "epoch": 0.05877,
+      "grad_norm": 1.0386697354012882,
+      "learning_rate": 0.003,
+      "loss": 4.0784,
+      "step": 5877
+    },
+    {
+      "epoch": 0.05878,
+      "grad_norm": 1.183742461564236,
+      "learning_rate": 0.003,
+      "loss": 4.0912,
+      "step": 5878
+    },
+    {
+      "epoch": 0.05879,
+      "grad_norm": 0.940962143351869,
+      "learning_rate": 0.003,
+      "loss": 4.0987,
+      "step": 5879
+    },
+    {
+      "epoch": 0.0588,
+      "grad_norm": 0.927043037474344,
+      "learning_rate": 0.003,
+      "loss": 4.0609,
+      "step": 5880
+    },
+    {
+      "epoch": 0.05881,
+      "grad_norm": 0.8962578128398271,
+      "learning_rate": 0.003,
+      "loss": 4.0608,
+      "step": 5881
+    },
+    {
+      "epoch": 0.05882,
+      "grad_norm": 0.9066915820041124,
+      "learning_rate": 0.003,
+      "loss": 4.0801,
+      "step": 5882
+    },
+    {
+      "epoch": 0.05883,
+      "grad_norm": 0.8598805143317192,
+      "learning_rate": 0.003,
+      "loss": 4.0725,
+      "step": 5883
+    },
+    {
+      "epoch": 0.05884,
+      "grad_norm": 1.0114326319466251,
+      "learning_rate": 0.003,
+      "loss": 4.063,
+      "step": 5884
+    },
+    {
+      "epoch": 0.05885,
+      "grad_norm": 1.1324207199773164,
+      "learning_rate": 0.003,
+      "loss": 4.084,
+      "step": 5885
+    },
+    {
+      "epoch": 0.05886,
+      "grad_norm": 0.9079205076328346,
+      "learning_rate": 0.003,
+      "loss": 4.0703,
+      "step": 5886
+    },
+    {
+      "epoch": 0.05887,
+      "grad_norm": 0.9499292804842742,
+      "learning_rate": 0.003,
+      "loss": 4.0646,
+      "step": 5887
+    },
+    {
+      "epoch": 0.05888,
+      "grad_norm": 0.9253742487047153,
+      "learning_rate": 0.003,
+      "loss": 4.0174,
+      "step": 5888
+    },
+    {
+      "epoch": 0.05889,
+      "grad_norm": 0.9756593540292708,
+      "learning_rate": 0.003,
+      "loss": 4.0521,
+      "step": 5889
+    },
+    {
+      "epoch": 0.0589,
+      "grad_norm": 1.2153968005149314,
+      "learning_rate": 0.003,
+      "loss": 4.0648,
+      "step": 5890
+    },
+    {
+      "epoch": 0.05891,
+      "grad_norm": 1.1050096751560372,
+      "learning_rate": 0.003,
+      "loss": 4.0613,
+      "step": 5891
+    },
+    {
+      "epoch": 0.05892,
+      "grad_norm": 1.0123025142968227,
+      "learning_rate": 0.003,
+      "loss": 4.0945,
+      "step": 5892
+    },
+    {
+      "epoch": 0.05893,
+      "grad_norm": 0.9835328234583312,
+      "learning_rate": 0.003,
+      "loss": 4.0688,
+      "step": 5893
+    },
+    {
+      "epoch": 0.05894,
+      "grad_norm": 0.9304265933341809,
+      "learning_rate": 0.003,
+      "loss": 4.0404,
+      "step": 5894
+    },
+    {
+      "epoch": 0.05895,
+      "grad_norm": 0.9274436106212893,
+      "learning_rate": 0.003,
+      "loss": 4.0485,
+      "step": 5895
+    },
+    {
+      "epoch": 0.05896,
+      "grad_norm": 0.8771867029491986,
+      "learning_rate": 0.003,
+      "loss": 4.0752,
+      "step": 5896
+    },
+    {
+      "epoch": 0.05897,
+      "grad_norm": 0.8218952866171018,
+      "learning_rate": 0.003,
+      "loss": 4.0808,
+      "step": 5897
+    },
+    {
+      "epoch": 0.05898,
+      "grad_norm": 0.9430573967743275,
+      "learning_rate": 0.003,
+      "loss": 4.0775,
+      "step": 5898
+    },
+    {
+      "epoch": 0.05899,
+      "grad_norm": 0.8911678825157316,
+      "learning_rate": 0.003,
+      "loss": 4.0408,
+      "step": 5899
+    },
+    {
+      "epoch": 0.059,
+      "grad_norm": 0.9174879047193627,
+      "learning_rate": 0.003,
+      "loss": 4.0694,
+      "step": 5900
+    },
+    {
+      "epoch": 0.05901,
+      "grad_norm": 0.9824565061579317,
+      "learning_rate": 0.003,
+      "loss": 4.0506,
+      "step": 5901
+    },
+    {
+      "epoch": 0.05902,
+      "grad_norm": 1.240010288158423,
+      "learning_rate": 0.003,
+      "loss": 4.0791,
+      "step": 5902
+    },
+    {
+      "epoch": 0.05903,
+      "grad_norm": 0.7639178515927444,
+      "learning_rate": 0.003,
+      "loss": 4.073,
+      "step": 5903
+    },
+    {
+      "epoch": 0.05904,
+      "grad_norm": 0.770306018643173,
+      "learning_rate": 0.003,
+      "loss": 4.0557,
+      "step": 5904
+    },
+    {
+      "epoch": 0.05905,
+      "grad_norm": 0.976148342284057,
+      "learning_rate": 0.003,
+      "loss": 4.0601,
+      "step": 5905
+    },
+    {
+      "epoch": 0.05906,
+      "grad_norm": 1.1812920524478123,
+      "learning_rate": 0.003,
+      "loss": 4.0651,
+      "step": 5906
+    },
+    {
+      "epoch": 0.05907,
+      "grad_norm": 1.1654915411869895,
+      "learning_rate": 0.003,
+      "loss": 4.0542,
+      "step": 5907
+    },
+    {
+      "epoch": 0.05908,
+      "grad_norm": 1.120465653132924,
+      "learning_rate": 0.003,
+      "loss": 4.0779,
+      "step": 5908
+    },
+    {
+      "epoch": 0.05909,
+      "grad_norm": 0.8539512494382139,
+      "learning_rate": 0.003,
+      "loss": 4.0826,
+      "step": 5909
+    },
+    {
+      "epoch": 0.0591,
+      "grad_norm": 0.8227369203330751,
+      "learning_rate": 0.003,
+      "loss": 4.0559,
+      "step": 5910
+    },
+    {
+      "epoch": 0.05911,
+      "grad_norm": 0.8906442288033303,
+      "learning_rate": 0.003,
+      "loss": 4.0425,
+      "step": 5911
+    },
+    {
+      "epoch": 0.05912,
+      "grad_norm": 0.9509533493887127,
+      "learning_rate": 0.003,
+      "loss": 4.0874,
+      "step": 5912
+    },
+    {
+      "epoch": 0.05913,
+      "grad_norm": 1.0981979298361981,
+      "learning_rate": 0.003,
+      "loss": 4.1101,
+      "step": 5913
+    },
+    {
+      "epoch": 0.05914,
+      "grad_norm": 0.9415620615571133,
+      "learning_rate": 0.003,
+      "loss": 4.0824,
+      "step": 5914
+    },
+    {
+      "epoch": 0.05915,
+      "grad_norm": 0.8478332715846145,
+      "learning_rate": 0.003,
+      "loss": 4.0783,
+      "step": 5915
+    },
+    {
+      "epoch": 0.05916,
+      "grad_norm": 0.8256880500590459,
+      "learning_rate": 0.003,
+      "loss": 4.0626,
+      "step": 5916
+    },
+    {
+      "epoch": 0.05917,
+      "grad_norm": 0.9790506827233469,
+      "learning_rate": 0.003,
+      "loss": 4.0487,
+      "step": 5917
+    },
+    {
+      "epoch": 0.05918,
+      "grad_norm": 1.0353580913196638,
+      "learning_rate": 0.003,
+      "loss": 4.0617,
+      "step": 5918
+    },
+    {
+      "epoch": 0.05919,
+      "grad_norm": 0.9651964696003633,
+      "learning_rate": 0.003,
+      "loss": 4.0576,
+      "step": 5919
+    },
+    {
+      "epoch": 0.0592,
+      "grad_norm": 0.937291761754371,
+      "learning_rate": 0.003,
+      "loss": 4.0363,
+      "step": 5920
+    },
+    {
+      "epoch": 0.05921,
+      "grad_norm": 1.0990872808141368,
+      "learning_rate": 0.003,
+      "loss": 4.0815,
+      "step": 5921
+    },
+    {
+      "epoch": 0.05922,
+      "grad_norm": 1.0468886911915498,
+      "learning_rate": 0.003,
+      "loss": 4.0695,
+      "step": 5922
+    },
+    {
+      "epoch": 0.05923,
+      "grad_norm": 1.0323600516250746,
+      "learning_rate": 0.003,
+      "loss": 4.0406,
+      "step": 5923
+    },
+    {
+      "epoch": 0.05924,
+      "grad_norm": 0.954016613180186,
+      "learning_rate": 0.003,
+      "loss": 4.0674,
+      "step": 5924
+    },
+    {
+      "epoch": 0.05925,
+      "grad_norm": 0.9475491801291553,
+      "learning_rate": 0.003,
+      "loss": 4.0541,
+      "step": 5925
+    },
+    {
+      "epoch": 0.05926,
+      "grad_norm": 0.9668145623340975,
+      "learning_rate": 0.003,
+      "loss": 4.0616,
+      "step": 5926
+    },
+    {
+      "epoch": 0.05927,
+      "grad_norm": 1.0595618769436952,
+      "learning_rate": 0.003,
+      "loss": 4.0479,
+      "step": 5927
+    },
+    {
+      "epoch": 0.05928,
+      "grad_norm": 0.9855002059444532,
+      "learning_rate": 0.003,
+      "loss": 4.065,
+      "step": 5928
+    },
+    {
+      "epoch": 0.05929,
+      "grad_norm": 0.945307973831574,
+      "learning_rate": 0.003,
+      "loss": 4.0726,
+      "step": 5929
+    },
+    {
+      "epoch": 0.0593,
+      "grad_norm": 0.9657299567775556,
+      "learning_rate": 0.003,
+      "loss": 4.0783,
+      "step": 5930
+    },
+    {
+      "epoch": 0.05931,
+      "grad_norm": 0.9827228555033615,
+      "learning_rate": 0.003,
+      "loss": 4.058,
+      "step": 5931
+    },
+    {
+      "epoch": 0.05932,
+      "grad_norm": 1.156077533419125,
+      "learning_rate": 0.003,
+      "loss": 4.0928,
+      "step": 5932
+    },
+    {
+      "epoch": 0.05933,
+      "grad_norm": 0.847492777721161,
+      "learning_rate": 0.003,
+      "loss": 4.0555,
+      "step": 5933
+    },
+    {
+      "epoch": 0.05934,
+      "grad_norm": 1.0021937329107191,
+      "learning_rate": 0.003,
+      "loss": 4.0961,
+      "step": 5934
+    },
+    {
+      "epoch": 0.05935,
+      "grad_norm": 1.1159732242811857,
+      "learning_rate": 0.003,
+      "loss": 4.0833,
+      "step": 5935
+    },
+    {
+      "epoch": 0.05936,
+      "grad_norm": 0.899401377373719,
+      "learning_rate": 0.003,
+      "loss": 4.0751,
+      "step": 5936
+    },
+    {
+      "epoch": 0.05937,
+      "grad_norm": 0.8301027331575037,
+      "learning_rate": 0.003,
+      "loss": 4.0704,
+      "step": 5937
+    },
+    {
+      "epoch": 0.05938,
+      "grad_norm": 0.8762725606178311,
+      "learning_rate": 0.003,
+      "loss": 4.0486,
+      "step": 5938
+    },
+    {
+      "epoch": 0.05939,
+      "grad_norm": 0.9126590668115435,
+      "learning_rate": 0.003,
+      "loss": 4.0604,
+      "step": 5939
+    },
+    {
+      "epoch": 0.0594,
+      "grad_norm": 1.0407767907462304,
+      "learning_rate": 0.003,
+      "loss": 4.0811,
+      "step": 5940
+    },
+    {
+      "epoch": 0.05941,
+      "grad_norm": 1.1143892345234092,
+      "learning_rate": 0.003,
+      "loss": 4.0837,
+      "step": 5941
+    },
+    {
+      "epoch": 0.05942,
+      "grad_norm": 1.0961434922540771,
+      "learning_rate": 0.003,
+      "loss": 4.0719,
+      "step": 5942
+    },
+    {
+      "epoch": 0.05943,
+      "grad_norm": 1.0471390550371067,
+      "learning_rate": 0.003,
+      "loss": 4.061,
+      "step": 5943
+    },
+    {
+      "epoch": 0.05944,
+      "grad_norm": 0.9579622530263362,
+      "learning_rate": 0.003,
+      "loss": 4.0172,
+      "step": 5944
+    },
+    {
+      "epoch": 0.05945,
+      "grad_norm": 0.9926199469417607,
+      "learning_rate": 0.003,
+      "loss": 4.0566,
+      "step": 5945
+    },
+    {
+      "epoch": 0.05946,
+      "grad_norm": 1.0156252547795441,
+      "learning_rate": 0.003,
+      "loss": 4.0531,
+      "step": 5946
+    },
+    {
+      "epoch": 0.05947,
+      "grad_norm": 1.2611887927782788,
+      "learning_rate": 0.003,
+      "loss": 4.0545,
+      "step": 5947
+    },
+    {
+      "epoch": 0.05948,
+      "grad_norm": 0.9815730257128612,
+      "learning_rate": 0.003,
+      "loss": 4.0662,
+      "step": 5948
+    },
+    {
+      "epoch": 0.05949,
+      "grad_norm": 1.0471598623999925,
+      "learning_rate": 0.003,
+      "loss": 4.0611,
+      "step": 5949
+    },
+    {
+      "epoch": 0.0595,
+      "grad_norm": 0.8842194048874816,
+      "learning_rate": 0.003,
+      "loss": 4.0679,
+      "step": 5950
+    },
+    {
+      "epoch": 0.05951,
+      "grad_norm": 0.8826262827071101,
+      "learning_rate": 0.003,
+      "loss": 4.0586,
+      "step": 5951
+    },
+    {
+      "epoch": 0.05952,
+      "grad_norm": 0.8874639709587795,
+      "learning_rate": 0.003,
+      "loss": 4.0719,
+      "step": 5952
+    },
+    {
+      "epoch": 0.05953,
+      "grad_norm": 1.094346129739514,
+      "learning_rate": 0.003,
+      "loss": 4.0532,
+      "step": 5953
+    },
+    {
+      "epoch": 0.05954,
+      "grad_norm": 1.0405342399228923,
+      "learning_rate": 0.003,
+      "loss": 4.0772,
+      "step": 5954
+    },
+    {
+      "epoch": 0.05955,
+      "grad_norm": 1.0084545393678892,
+      "learning_rate": 0.003,
+      "loss": 4.0575,
+      "step": 5955
+    },
+    {
+      "epoch": 0.05956,
+      "grad_norm": 0.8681329007495994,
+      "learning_rate": 0.003,
+      "loss": 4.0396,
+      "step": 5956
+    },
+    {
+      "epoch": 0.05957,
+      "grad_norm": 0.9787011417999288,
+      "learning_rate": 0.003,
+      "loss": 4.0496,
+      "step": 5957
+    },
+    {
+      "epoch": 0.05958,
+      "grad_norm": 0.983255913377767,
+      "learning_rate": 0.003,
+      "loss": 4.0624,
+      "step": 5958
+    },
+    {
+      "epoch": 0.05959,
+      "grad_norm": 0.9397652065747355,
+      "learning_rate": 0.003,
+      "loss": 4.0601,
+      "step": 5959
+    },
+    {
+      "epoch": 0.0596,
+      "grad_norm": 0.9197247886041665,
+      "learning_rate": 0.003,
+      "loss": 4.0784,
+      "step": 5960
+    },
+    {
+      "epoch": 0.05961,
+      "grad_norm": 0.975726486158997,
+      "learning_rate": 0.003,
+      "loss": 4.0669,
+      "step": 5961
+    },
+    {
+      "epoch": 0.05962,
+      "grad_norm": 1.0416902503797958,
+      "learning_rate": 0.003,
+      "loss": 4.0614,
+      "step": 5962
+    },
+    {
+      "epoch": 0.05963,
+      "grad_norm": 1.0805802820393233,
+      "learning_rate": 0.003,
+      "loss": 4.0699,
+      "step": 5963
+    },
+    {
+      "epoch": 0.05964,
+      "grad_norm": 0.9125810985534502,
+      "learning_rate": 0.003,
+      "loss": 4.0626,
+      "step": 5964
+    },
+    {
+      "epoch": 0.05965,
+      "grad_norm": 0.7839568155281216,
+      "learning_rate": 0.003,
+      "loss": 4.0562,
+      "step": 5965
+    },
+    {
+      "epoch": 0.05966,
+      "grad_norm": 0.645579903658523,
+      "learning_rate": 0.003,
+      "loss": 4.0657,
+      "step": 5966
+    },
+    {
+      "epoch": 0.05967,
+      "grad_norm": 0.6174059038324572,
+      "learning_rate": 0.003,
+      "loss": 4.0445,
+      "step": 5967
+    },
+    {
+      "epoch": 0.05968,
+      "grad_norm": 0.6892480108767447,
+      "learning_rate": 0.003,
+      "loss": 4.0443,
+      "step": 5968
+    },
+    {
+      "epoch": 0.05969,
+      "grad_norm": 0.7254045272144456,
+      "learning_rate": 0.003,
+      "loss": 4.0342,
+      "step": 5969
+    },
+    {
+      "epoch": 0.0597,
+      "grad_norm": 0.7953745734437678,
+      "learning_rate": 0.003,
+      "loss": 4.043,
+      "step": 5970
+    },
+    {
+      "epoch": 0.05971,
+      "grad_norm": 1.0581208502754886,
+      "learning_rate": 0.003,
+      "loss": 4.0682,
+      "step": 5971
+    },
+    {
+      "epoch": 0.05972,
+      "grad_norm": 1.1798253869062356,
+      "learning_rate": 0.003,
+      "loss": 4.0845,
+      "step": 5972
+    },
+    {
+      "epoch": 0.05973,
+      "grad_norm": 0.7544716901026084,
+      "learning_rate": 0.003,
+      "loss": 4.0372,
+      "step": 5973
+    },
+    {
+      "epoch": 0.05974,
+      "grad_norm": 0.7411406547879388,
+      "learning_rate": 0.003,
+      "loss": 4.0345,
+      "step": 5974
+    },
+    {
+      "epoch": 0.05975,
+      "grad_norm": 0.664439857328635,
+      "learning_rate": 0.003,
+      "loss": 4.0683,
+      "step": 5975
+    },
+    {
+      "epoch": 0.05976,
+      "grad_norm": 0.684227141132107,
+      "learning_rate": 0.003,
+      "loss": 4.0512,
+      "step": 5976
+    },
+    {
+      "epoch": 0.05977,
+      "grad_norm": 0.8888845505907672,
+      "learning_rate": 0.003,
+      "loss": 4.0621,
+      "step": 5977
+    },
+    {
+      "epoch": 0.05978,
+      "grad_norm": 1.1416281876950884,
+      "learning_rate": 0.003,
+      "loss": 4.0577,
+      "step": 5978
+    },
+    {
+      "epoch": 0.05979,
+      "grad_norm": 1.008454254020505,
+      "learning_rate": 0.003,
+      "loss": 4.0608,
+      "step": 5979
+    },
+    {
+      "epoch": 0.0598,
+      "grad_norm": 1.190632055338276,
+      "learning_rate": 0.003,
+      "loss": 4.045,
+      "step": 5980
+    },
+    {
+      "epoch": 0.05981,
+      "grad_norm": 1.015356754441631,
+      "learning_rate": 0.003,
+      "loss": 4.0442,
+      "step": 5981
+    },
+    {
+      "epoch": 0.05982,
+      "grad_norm": 0.9378130097416635,
+      "learning_rate": 0.003,
+      "loss": 4.0579,
+      "step": 5982
+    },
+    {
+      "epoch": 0.05983,
+      "grad_norm": 0.8053048489511819,
+      "learning_rate": 0.003,
+      "loss": 4.0703,
+      "step": 5983
+    },
+    {
+      "epoch": 0.05984,
+      "grad_norm": 0.9819349814850642,
+      "learning_rate": 0.003,
+      "loss": 4.0417,
+      "step": 5984
+    },
+    {
+      "epoch": 0.05985,
+      "grad_norm": 1.1131918756985963,
+      "learning_rate": 0.003,
+      "loss": 4.0802,
+      "step": 5985
+    },
+    {
+      "epoch": 0.05986,
+      "grad_norm": 0.9174366752153941,
+      "learning_rate": 0.003,
+      "loss": 4.0741,
+      "step": 5986
+    },
+    {
+      "epoch": 0.05987,
+      "grad_norm": 1.0378256913235493,
+      "learning_rate": 0.003,
+      "loss": 4.049,
+      "step": 5987
+    },
+    {
+      "epoch": 0.05988,
+      "grad_norm": 0.9258303467488307,
+      "learning_rate": 0.003,
+      "loss": 4.0565,
+      "step": 5988
+    },
+    {
+      "epoch": 0.05989,
+      "grad_norm": 0.9632304797304597,
+      "learning_rate": 0.003,
+      "loss": 4.0519,
+      "step": 5989
+    },
+    {
+      "epoch": 0.0599,
+      "grad_norm": 0.8935903017412864,
+      "learning_rate": 0.003,
+      "loss": 4.0597,
+      "step": 5990
+    },
+    {
+      "epoch": 0.05991,
+      "grad_norm": 0.6739742445063621,
+      "learning_rate": 0.003,
+      "loss": 4.0254,
+      "step": 5991
+    },
+    {
+      "epoch": 0.05992,
+      "grad_norm": 0.5915614354430911,
+      "learning_rate": 0.003,
+      "loss": 4.0496,
+      "step": 5992
+    },
+    {
+      "epoch": 0.05993,
+      "grad_norm": 0.741024166202095,
+      "learning_rate": 0.003,
+      "loss": 4.0576,
+      "step": 5993
+    },
+    {
+      "epoch": 0.05994,
+      "grad_norm": 0.8841179715786665,
+      "learning_rate": 0.003,
+      "loss": 4.0435,
+      "step": 5994
+    },
+    {
+      "epoch": 0.05995,
+      "grad_norm": 1.287538920218187,
+      "learning_rate": 0.003,
+      "loss": 4.0605,
+      "step": 5995
+    },
+    {
+      "epoch": 0.05996,
+      "grad_norm": 1.0684098332314382,
+      "learning_rate": 0.003,
+      "loss": 4.0921,
+      "step": 5996
+    },
+    {
+      "epoch": 0.05997,
+      "grad_norm": 0.9397674955044001,
+      "learning_rate": 0.003,
+      "loss": 4.0358,
+      "step": 5997
+    },
+    {
+      "epoch": 0.05998,
+      "grad_norm": 0.8272889396102959,
+      "learning_rate": 0.003,
+      "loss": 4.0826,
+      "step": 5998
+    },
+    {
+      "epoch": 0.05999,
+      "grad_norm": 0.8991148255094739,
+      "learning_rate": 0.003,
+      "loss": 4.0499,
+      "step": 5999
+    },
+    {
+      "epoch": 0.06,
+      "grad_norm": 0.9185220408167984,
+      "learning_rate": 0.003,
+      "loss": 4.0615,
+      "step": 6000
+    },
+    {
+      "epoch": 0.06001,
+      "grad_norm": 0.9908154950299144,
+      "learning_rate": 0.003,
+      "loss": 4.0527,
+      "step": 6001
+    },
+    {
+      "epoch": 0.06002,
+      "grad_norm": 0.9608103918830759,
+      "learning_rate": 0.003,
+      "loss": 4.0457,
+      "step": 6002
+    },
+    {
+      "epoch": 0.06003,
+      "grad_norm": 1.1215832189472665,
+      "learning_rate": 0.003,
+      "loss": 4.0693,
+      "step": 6003
+    },
+    {
+      "epoch": 0.06004,
+      "grad_norm": 1.0570354371095005,
+      "learning_rate": 0.003,
+      "loss": 4.0476,
+      "step": 6004
+    },
+    {
+      "epoch": 0.06005,
+      "grad_norm": 1.0877288777978935,
+      "learning_rate": 0.003,
+      "loss": 4.0638,
+      "step": 6005
+    },
+    {
+      "epoch": 0.06006,
+      "grad_norm": 1.0530975014128934,
+      "learning_rate": 0.003,
+      "loss": 4.0472,
+      "step": 6006
+    },
+    {
+      "epoch": 0.06007,
+      "grad_norm": 0.8755728290132648,
+      "learning_rate": 0.003,
+      "loss": 4.0921,
+      "step": 6007
+    },
+    {
+      "epoch": 0.06008,
+      "grad_norm": 0.9019088575660325,
+      "learning_rate": 0.003,
+      "loss": 4.0503,
+      "step": 6008
+    },
+    {
+      "epoch": 0.06009,
+      "grad_norm": 1.1772437890538816,
+      "learning_rate": 0.003,
+      "loss": 4.0662,
+      "step": 6009
+    },
+    {
+      "epoch": 0.0601,
+      "grad_norm": 0.9527109892195108,
+      "learning_rate": 0.003,
+      "loss": 4.0641,
+      "step": 6010
+    },
+    {
+      "epoch": 0.06011,
+      "grad_norm": 1.1738757885020588,
+      "learning_rate": 0.003,
+      "loss": 4.0738,
+      "step": 6011
+    },
+    {
+      "epoch": 0.06012,
+      "grad_norm": 0.8780691584223036,
+      "learning_rate": 0.003,
+      "loss": 4.0742,
+      "step": 6012
+    },
+    {
+      "epoch": 0.06013,
+      "grad_norm": 0.9179139070213036,
+      "learning_rate": 0.003,
+      "loss": 4.0669,
+      "step": 6013
+    },
+    {
+      "epoch": 0.06014,
+      "grad_norm": 1.00855448654704,
+      "learning_rate": 0.003,
+      "loss": 4.0576,
+      "step": 6014
+    },
+    {
+      "epoch": 0.06015,
+      "grad_norm": 0.9972560180447895,
+      "learning_rate": 0.003,
+      "loss": 4.0704,
+      "step": 6015
+    },
+    {
+      "epoch": 0.06016,
+      "grad_norm": 1.0775312684009026,
+      "learning_rate": 0.003,
+      "loss": 4.0771,
+      "step": 6016
+    },
+    {
+      "epoch": 0.06017,
+      "grad_norm": 1.33609316921924,
+      "learning_rate": 0.003,
+      "loss": 4.0869,
+      "step": 6017
+    },
+    {
+      "epoch": 0.06018,
+      "grad_norm": 1.0280471651185956,
+      "learning_rate": 0.003,
+      "loss": 4.1001,
+      "step": 6018
+    },
+    {
+      "epoch": 0.06019,
+      "grad_norm": 1.0585602447833204,
+      "learning_rate": 0.003,
+      "loss": 4.0658,
+      "step": 6019
+    },
+    {
+      "epoch": 0.0602,
+      "grad_norm": 0.9692400850494943,
+      "learning_rate": 0.003,
+      "loss": 4.0608,
+      "step": 6020
+    },
+    {
+      "epoch": 0.06021,
+      "grad_norm": 1.0415586611820176,
+      "learning_rate": 0.003,
+      "loss": 4.0803,
+      "step": 6021
+    },
+    {
+      "epoch": 0.06022,
+      "grad_norm": 1.055493914814788,
+      "learning_rate": 0.003,
+      "loss": 4.0875,
+      "step": 6022
+    },
+    {
+      "epoch": 0.06023,
+      "grad_norm": 1.1437368963667776,
+      "learning_rate": 0.003,
+      "loss": 4.0728,
+      "step": 6023
+    },
+    {
+      "epoch": 0.06024,
+      "grad_norm": 1.0461727220486359,
+      "learning_rate": 0.003,
+      "loss": 4.0503,
+      "step": 6024
+    },
+    {
+      "epoch": 0.06025,
+      "grad_norm": 0.8495924516845805,
+      "learning_rate": 0.003,
+      "loss": 4.0979,
+      "step": 6025
+    },
+    {
+      "epoch": 0.06026,
+      "grad_norm": 0.8387211090444705,
+      "learning_rate": 0.003,
+      "loss": 4.0653,
+      "step": 6026
+    },
+    {
+      "epoch": 0.06027,
+      "grad_norm": 0.9070801722224514,
+      "learning_rate": 0.003,
+      "loss": 4.0586,
+      "step": 6027
+    },
+    {
+      "epoch": 0.06028,
+      "grad_norm": 0.9625380625910851,
+      "learning_rate": 0.003,
+      "loss": 4.0843,
+      "step": 6028
+    },
+    {
+      "epoch": 0.06029,
+      "grad_norm": 1.0682457520452102,
+      "learning_rate": 0.003,
+      "loss": 4.0803,
+      "step": 6029
+    },
+    {
+      "epoch": 0.0603,
+      "grad_norm": 0.9976915475426508,
+      "learning_rate": 0.003,
+      "loss": 4.0725,
+      "step": 6030
+    },
+    {
+      "epoch": 0.06031,
+      "grad_norm": 1.0453095190042574,
+      "learning_rate": 0.003,
+      "loss": 4.0703,
+      "step": 6031
+    },
+    {
+      "epoch": 0.06032,
+      "grad_norm": 0.9786653778595836,
+      "learning_rate": 0.003,
+      "loss": 4.0693,
+      "step": 6032
+    },
+    {
+      "epoch": 0.06033,
+      "grad_norm": 1.0968968530547012,
+      "learning_rate": 0.003,
+      "loss": 4.0781,
+      "step": 6033
+    },
+    {
+      "epoch": 0.06034,
+      "grad_norm": 0.9514397371995961,
+      "learning_rate": 0.003,
+      "loss": 4.0905,
+      "step": 6034
+    },
+    {
+      "epoch": 0.06035,
+      "grad_norm": 1.0541817557308053,
+      "learning_rate": 0.003,
+      "loss": 4.0842,
+      "step": 6035
+    },
+    {
+      "epoch": 0.06036,
+      "grad_norm": 0.8780263970411091,
+      "learning_rate": 0.003,
+      "loss": 4.0307,
+      "step": 6036
+    },
+    {
+      "epoch": 0.06037,
+      "grad_norm": 1.0193481447935269,
+      "learning_rate": 0.003,
+      "loss": 4.0737,
+      "step": 6037
+    },
+    {
+      "epoch": 0.06038,
+      "grad_norm": 1.3774676565861503,
+      "learning_rate": 0.003,
+      "loss": 4.0672,
+      "step": 6038
+    },
+    {
+      "epoch": 0.06039,
+      "grad_norm": 0.7252838059348526,
+      "learning_rate": 0.003,
+      "loss": 4.0348,
+      "step": 6039
+    },
+    {
+      "epoch": 0.0604,
+      "grad_norm": 0.78601742451761,
+      "learning_rate": 0.003,
+      "loss": 4.0607,
+      "step": 6040
+    },
+    {
+      "epoch": 0.06041,
+      "grad_norm": 0.744786447804636,
+      "learning_rate": 0.003,
+      "loss": 4.0768,
+      "step": 6041
+    },
+    {
+      "epoch": 0.06042,
+      "grad_norm": 0.8660278386592278,
+      "learning_rate": 0.003,
+      "loss": 4.0541,
+      "step": 6042
+    },
+    {
+      "epoch": 0.06043,
+      "grad_norm": 1.1071573404697885,
+      "learning_rate": 0.003,
+      "loss": 4.0668,
+      "step": 6043
+    },
+    {
+      "epoch": 0.06044,
+      "grad_norm": 1.0894101622044046,
+      "learning_rate": 0.003,
+      "loss": 4.0661,
+      "step": 6044
+    },
+    {
+      "epoch": 0.06045,
+      "grad_norm": 1.0493254983595819,
+      "learning_rate": 0.003,
+      "loss": 4.0649,
+      "step": 6045
+    },
+    {
+      "epoch": 0.06046,
+      "grad_norm": 1.0049917198936318,
+      "learning_rate": 0.003,
+      "loss": 4.0694,
+      "step": 6046
+    },
+    {
+      "epoch": 0.06047,
+      "grad_norm": 1.0517277702815098,
+      "learning_rate": 0.003,
+      "loss": 4.0495,
+      "step": 6047
+    },
+    {
+      "epoch": 0.06048,
+      "grad_norm": 1.1809116249025131,
+      "learning_rate": 0.003,
+      "loss": 4.0507,
+      "step": 6048
+    },
+    {
+      "epoch": 0.06049,
+      "grad_norm": 0.9623062437443599,
+      "learning_rate": 0.003,
+      "loss": 4.0414,
+      "step": 6049
+    },
+    {
+      "epoch": 0.0605,
+      "grad_norm": 1.0923522103941121,
+      "learning_rate": 0.003,
+      "loss": 4.0716,
+      "step": 6050
+    },
+    {
+      "epoch": 0.06051,
+      "grad_norm": 1.0265427925273334,
+      "learning_rate": 0.003,
+      "loss": 4.0854,
+      "step": 6051
+    },
+    {
+      "epoch": 0.06052,
+      "grad_norm": 0.9414887816443991,
+      "learning_rate": 0.003,
+      "loss": 4.0823,
+      "step": 6052
+    },
+    {
+      "epoch": 0.06053,
+      "grad_norm": 1.006065905595286,
+      "learning_rate": 0.003,
+      "loss": 4.0399,
+      "step": 6053
+    },
+    {
+      "epoch": 0.06054,
+      "grad_norm": 1.095910787052634,
+      "learning_rate": 0.003,
+      "loss": 4.0807,
+      "step": 6054
+    },
+    {
+      "epoch": 0.06055,
+      "grad_norm": 0.8517266184871414,
+      "learning_rate": 0.003,
+      "loss": 4.0602,
+      "step": 6055
+    },
+    {
+      "epoch": 0.06056,
+      "grad_norm": 1.1390125640686093,
+      "learning_rate": 0.003,
+      "loss": 4.0632,
+      "step": 6056
+    },
+    {
+      "epoch": 0.06057,
+      "grad_norm": 1.0285564034956292,
+      "learning_rate": 0.003,
+      "loss": 4.0469,
+      "step": 6057
+    },
+    {
+      "epoch": 0.06058,
+      "grad_norm": 1.0733695934055711,
+      "learning_rate": 0.003,
+      "loss": 4.0643,
+      "step": 6058
+    },
+    {
+      "epoch": 0.06059,
+      "grad_norm": 0.8565845158282416,
+      "learning_rate": 0.003,
+      "loss": 4.0698,
+      "step": 6059
+    },
+    {
+      "epoch": 0.0606,
+      "grad_norm": 0.8864003378041345,
+      "learning_rate": 0.003,
+      "loss": 4.0667,
+      "step": 6060
+    },
+    {
+      "epoch": 0.06061,
+      "grad_norm": 1.0109993363665308,
+      "learning_rate": 0.003,
+      "loss": 4.0742,
+      "step": 6061
+    },
+    {
+      "epoch": 0.06062,
+      "grad_norm": 1.123107920080809,
+      "learning_rate": 0.003,
+      "loss": 4.083,
+      "step": 6062
+    },
+    {
+      "epoch": 0.06063,
+      "grad_norm": 0.949609814613259,
+      "learning_rate": 0.003,
+      "loss": 4.0303,
+      "step": 6063
+    },
+    {
+      "epoch": 0.06064,
+      "grad_norm": 0.9981845823778703,
+      "learning_rate": 0.003,
+      "loss": 4.0938,
+      "step": 6064
+    },
+    {
+      "epoch": 0.06065,
+      "grad_norm": 1.1017349391298685,
+      "learning_rate": 0.003,
+      "loss": 4.0688,
+      "step": 6065
+    },
+    {
+      "epoch": 0.06066,
+      "grad_norm": 1.0732527006387818,
+      "learning_rate": 0.003,
+      "loss": 4.098,
+      "step": 6066
+    },
+    {
+      "epoch": 0.06067,
+      "grad_norm": 0.9294025545570336,
+      "learning_rate": 0.003,
+      "loss": 4.0722,
+      "step": 6067
+    },
+    {
+      "epoch": 0.06068,
+      "grad_norm": 1.0576231179156892,
+      "learning_rate": 0.003,
+      "loss": 4.0594,
+      "step": 6068
+    },
+    {
+      "epoch": 0.06069,
+      "grad_norm": 1.0865552407589456,
+      "learning_rate": 0.003,
+      "loss": 4.0697,
+      "step": 6069
+    },
+    {
+      "epoch": 0.0607,
+      "grad_norm": 1.0230751617256044,
+      "learning_rate": 0.003,
+      "loss": 4.0713,
+      "step": 6070
+    },
+    {
+      "epoch": 0.06071,
+      "grad_norm": 0.999221755293697,
+      "learning_rate": 0.003,
+      "loss": 4.0444,
+      "step": 6071
+    },
+    {
+      "epoch": 0.06072,
+      "grad_norm": 1.066655819343783,
+      "learning_rate": 0.003,
+      "loss": 4.0503,
+      "step": 6072
+    },
+    {
+      "epoch": 0.06073,
+      "grad_norm": 1.0369526156758926,
+      "learning_rate": 0.003,
+      "loss": 4.0809,
+      "step": 6073
+    },
+    {
+      "epoch": 0.06074,
+      "grad_norm": 1.075598582264553,
+      "learning_rate": 0.003,
+      "loss": 4.0639,
+      "step": 6074
+    },
+    {
+      "epoch": 0.06075,
+      "grad_norm": 0.9953552796029778,
+      "learning_rate": 0.003,
+      "loss": 4.0863,
+      "step": 6075
+    },
+    {
+      "epoch": 0.06076,
+      "grad_norm": 0.9778883253174944,
+      "learning_rate": 0.003,
+      "loss": 4.0711,
+      "step": 6076
+    },
+    {
+      "epoch": 0.06077,
+      "grad_norm": 0.8468031501573927,
+      "learning_rate": 0.003,
+      "loss": 4.0618,
+      "step": 6077
+    },
+    {
+      "epoch": 0.06078,
+      "grad_norm": 0.8305408752084648,
+      "learning_rate": 0.003,
+      "loss": 4.0692,
+      "step": 6078
+    },
+    {
+      "epoch": 0.06079,
+      "grad_norm": 0.7960215844617263,
+      "learning_rate": 0.003,
+      "loss": 4.0589,
+      "step": 6079
+    },
+    {
+      "epoch": 0.0608,
+      "grad_norm": 0.8359710386518069,
+      "learning_rate": 0.003,
+      "loss": 4.059,
+      "step": 6080
+    },
+    {
+      "epoch": 0.06081,
+      "grad_norm": 1.0786121941538231,
+      "learning_rate": 0.003,
+      "loss": 4.0436,
+      "step": 6081
+    },
+    {
+      "epoch": 0.06082,
+      "grad_norm": 1.1823183295621211,
+      "learning_rate": 0.003,
+      "loss": 4.0945,
+      "step": 6082
+    },
+    {
+      "epoch": 0.06083,
+      "grad_norm": 0.8837771871886293,
+      "learning_rate": 0.003,
+      "loss": 4.0725,
+      "step": 6083
+    },
+    {
+      "epoch": 0.06084,
+      "grad_norm": 0.7204472009727627,
+      "learning_rate": 0.003,
+      "loss": 4.0334,
+      "step": 6084
+    },
+    {
+      "epoch": 0.06085,
+      "grad_norm": 0.7103714572162915,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 6085
+    },
+    {
+      "epoch": 0.06086,
+      "grad_norm": 0.811997878307769,
+      "learning_rate": 0.003,
+      "loss": 4.0277,
+      "step": 6086
+    },
+    {
+      "epoch": 0.06087,
+      "grad_norm": 0.8073780584157964,
+      "learning_rate": 0.003,
+      "loss": 4.0667,
+      "step": 6087
+    },
+    {
+      "epoch": 0.06088,
+      "grad_norm": 0.8544463123489077,
+      "learning_rate": 0.003,
+      "loss": 4.0438,
+      "step": 6088
+    },
+    {
+      "epoch": 0.06089,
+      "grad_norm": 0.9762980822164021,
+      "learning_rate": 0.003,
+      "loss": 4.0461,
+      "step": 6089
+    },
+    {
+      "epoch": 0.0609,
+      "grad_norm": 1.2142304992327353,
+      "learning_rate": 0.003,
+      "loss": 4.0934,
+      "step": 6090
+    },
+    {
+      "epoch": 0.06091,
+      "grad_norm": 1.082695490566579,
+      "learning_rate": 0.003,
+      "loss": 4.0638,
+      "step": 6091
+    },
+    {
+      "epoch": 0.06092,
+      "grad_norm": 0.9278093493414128,
+      "learning_rate": 0.003,
+      "loss": 4.0448,
+      "step": 6092
+    },
+    {
+      "epoch": 0.06093,
+      "grad_norm": 0.8690782403614105,
+      "learning_rate": 0.003,
+      "loss": 4.0568,
+      "step": 6093
+    },
+    {
+      "epoch": 0.06094,
+      "grad_norm": 0.8570778358605303,
+      "learning_rate": 0.003,
+      "loss": 4.0582,
+      "step": 6094
+    },
+    {
+      "epoch": 0.06095,
+      "grad_norm": 0.8138189413941755,
+      "learning_rate": 0.003,
+      "loss": 4.0583,
+      "step": 6095
+    },
+    {
+      "epoch": 0.06096,
+      "grad_norm": 0.9634421284092561,
+      "learning_rate": 0.003,
+      "loss": 4.0543,
+      "step": 6096
+    },
+    {
+      "epoch": 0.06097,
+      "grad_norm": 1.0632007099690335,
+      "learning_rate": 0.003,
+      "loss": 4.071,
+      "step": 6097
+    },
+    {
+      "epoch": 0.06098,
+      "grad_norm": 1.0871230047693712,
+      "learning_rate": 0.003,
+      "loss": 4.0427,
+      "step": 6098
+    },
+    {
+      "epoch": 0.06099,
+      "grad_norm": 1.0812665303937632,
+      "learning_rate": 0.003,
+      "loss": 4.0647,
+      "step": 6099
+    },
+    {
+      "epoch": 0.061,
+      "grad_norm": 0.9904572164775792,
+      "learning_rate": 0.003,
+      "loss": 4.0556,
+      "step": 6100
+    },
+    {
+      "epoch": 0.06101,
+      "grad_norm": 1.2471241797218284,
+      "learning_rate": 0.003,
+      "loss": 4.0956,
+      "step": 6101
+    },
+    {
+      "epoch": 0.06102,
+      "grad_norm": 0.9331918176411176,
+      "learning_rate": 0.003,
+      "loss": 4.066,
+      "step": 6102
+    },
+    {
+      "epoch": 0.06103,
+      "grad_norm": 1.0358350532613663,
+      "learning_rate": 0.003,
+      "loss": 4.0846,
+      "step": 6103
+    },
+    {
+      "epoch": 0.06104,
+      "grad_norm": 0.9863138814409834,
+      "learning_rate": 0.003,
+      "loss": 4.0527,
+      "step": 6104
+    },
+    {
+      "epoch": 0.06105,
+      "grad_norm": 1.2154265908459023,
+      "learning_rate": 0.003,
+      "loss": 4.0682,
+      "step": 6105
+    },
+    {
+      "epoch": 0.06106,
+      "grad_norm": 0.8917304789437333,
+      "learning_rate": 0.003,
+      "loss": 4.0784,
+      "step": 6106
+    },
+    {
+      "epoch": 0.06107,
+      "grad_norm": 0.9427677486805567,
+      "learning_rate": 0.003,
+      "loss": 4.0905,
+      "step": 6107
+    },
+    {
+      "epoch": 0.06108,
+      "grad_norm": 0.9771691970851738,
+      "learning_rate": 0.003,
+      "loss": 4.0566,
+      "step": 6108
+    },
+    {
+      "epoch": 0.06109,
+      "grad_norm": 1.0200829167924297,
+      "learning_rate": 0.003,
+      "loss": 4.034,
+      "step": 6109
+    },
+    {
+      "epoch": 0.0611,
+      "grad_norm": 1.0829855702592046,
+      "learning_rate": 0.003,
+      "loss": 4.0552,
+      "step": 6110
+    },
+    {
+      "epoch": 0.06111,
+      "grad_norm": 1.11913249179738,
+      "learning_rate": 0.003,
+      "loss": 4.0725,
+      "step": 6111
+    },
+    {
+      "epoch": 0.06112,
+      "grad_norm": 0.9956612041062181,
+      "learning_rate": 0.003,
+      "loss": 4.0665,
+      "step": 6112
+    },
+    {
+      "epoch": 0.06113,
+      "grad_norm": 1.1071104171417576,
+      "learning_rate": 0.003,
+      "loss": 4.0805,
+      "step": 6113
+    },
+    {
+      "epoch": 0.06114,
+      "grad_norm": 0.9095207699385705,
+      "learning_rate": 0.003,
+      "loss": 4.0361,
+      "step": 6114
+    },
+    {
+      "epoch": 0.06115,
+      "grad_norm": 1.044965318613425,
+      "learning_rate": 0.003,
+      "loss": 4.0397,
+      "step": 6115
+    },
+    {
+      "epoch": 0.06116,
+      "grad_norm": 1.2061603685848585,
+      "learning_rate": 0.003,
+      "loss": 4.0634,
+      "step": 6116
+    },
+    {
+      "epoch": 0.06117,
+      "grad_norm": 0.9331914752531237,
+      "learning_rate": 0.003,
+      "loss": 4.0513,
+      "step": 6117
+    },
+    {
+      "epoch": 0.06118,
+      "grad_norm": 0.9125472343562069,
+      "learning_rate": 0.003,
+      "loss": 4.0305,
+      "step": 6118
+    },
+    {
+      "epoch": 0.06119,
+      "grad_norm": 0.7893638895463538,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 6119
+    },
+    {
+      "epoch": 0.0612,
+      "grad_norm": 0.9244864031831111,
+      "learning_rate": 0.003,
+      "loss": 4.0676,
+      "step": 6120
+    },
+    {
+      "epoch": 0.06121,
+      "grad_norm": 0.9033631978733891,
+      "learning_rate": 0.003,
+      "loss": 4.0516,
+      "step": 6121
+    },
+    {
+      "epoch": 0.06122,
+      "grad_norm": 1.0308394729354522,
+      "learning_rate": 0.003,
+      "loss": 4.072,
+      "step": 6122
+    },
+    {
+      "epoch": 0.06123,
+      "grad_norm": 0.9549347100846058,
+      "learning_rate": 0.003,
+      "loss": 4.0668,
+      "step": 6123
+    },
+    {
+      "epoch": 0.06124,
+      "grad_norm": 1.018979699685598,
+      "learning_rate": 0.003,
+      "loss": 4.062,
+      "step": 6124
+    },
+    {
+      "epoch": 0.06125,
+      "grad_norm": 1.0396926470286123,
+      "learning_rate": 0.003,
+      "loss": 4.0273,
+      "step": 6125
+    },
+    {
+      "epoch": 0.06126,
+      "grad_norm": 0.970338770093608,
+      "learning_rate": 0.003,
+      "loss": 4.0434,
+      "step": 6126
+    },
+    {
+      "epoch": 0.06127,
+      "grad_norm": 0.9481470691411102,
+      "learning_rate": 0.003,
+      "loss": 4.0345,
+      "step": 6127
+    },
+    {
+      "epoch": 0.06128,
+      "grad_norm": 1.0848389247188306,
+      "learning_rate": 0.003,
+      "loss": 4.0555,
+      "step": 6128
+    },
+    {
+      "epoch": 0.06129,
+      "grad_norm": 1.0580408480527546,
+      "learning_rate": 0.003,
+      "loss": 4.0657,
+      "step": 6129
+    },
+    {
+      "epoch": 0.0613,
+      "grad_norm": 0.9682253621336249,
+      "learning_rate": 0.003,
+      "loss": 4.0703,
+      "step": 6130
+    },
+    {
+      "epoch": 0.06131,
+      "grad_norm": 0.9210655524658186,
+      "learning_rate": 0.003,
+      "loss": 4.064,
+      "step": 6131
+    },
+    {
+      "epoch": 0.06132,
+      "grad_norm": 0.9155041280961395,
+      "learning_rate": 0.003,
+      "loss": 4.0639,
+      "step": 6132
+    },
+    {
+      "epoch": 0.06133,
+      "grad_norm": 0.9789482932440452,
+      "learning_rate": 0.003,
+      "loss": 4.0567,
+      "step": 6133
+    },
+    {
+      "epoch": 0.06134,
+      "grad_norm": 1.1135362136558498,
+      "learning_rate": 0.003,
+      "loss": 4.0534,
+      "step": 6134
+    },
+    {
+      "epoch": 0.06135,
+      "grad_norm": 1.0670150504515052,
+      "learning_rate": 0.003,
+      "loss": 4.0778,
+      "step": 6135
+    },
+    {
+      "epoch": 0.06136,
+      "grad_norm": 1.009306468929253,
+      "learning_rate": 0.003,
+      "loss": 4.0579,
+      "step": 6136
+    },
+    {
+      "epoch": 0.06137,
+      "grad_norm": 1.1199332436482432,
+      "learning_rate": 0.003,
+      "loss": 4.0599,
+      "step": 6137
+    },
+    {
+      "epoch": 0.06138,
+      "grad_norm": 0.9822960858920264,
+      "learning_rate": 0.003,
+      "loss": 4.0507,
+      "step": 6138
+    },
+    {
+      "epoch": 0.06139,
+      "grad_norm": 1.061123881008166,
+      "learning_rate": 0.003,
+      "loss": 4.0757,
+      "step": 6139
+    },
+    {
+      "epoch": 0.0614,
+      "grad_norm": 1.1067582756640688,
+      "learning_rate": 0.003,
+      "loss": 4.0344,
+      "step": 6140
+    },
+    {
+      "epoch": 0.06141,
+      "grad_norm": 1.1197623616073298,
+      "learning_rate": 0.003,
+      "loss": 4.042,
+      "step": 6141
+    },
+    {
+      "epoch": 0.06142,
+      "grad_norm": 0.9621463377982679,
+      "learning_rate": 0.003,
+      "loss": 4.0578,
+      "step": 6142
+    },
+    {
+      "epoch": 0.06143,
+      "grad_norm": 0.9823524748170486,
+      "learning_rate": 0.003,
+      "loss": 4.068,
+      "step": 6143
+    },
+    {
+      "epoch": 0.06144,
+      "grad_norm": 1.1256975016048572,
+      "learning_rate": 0.003,
+      "loss": 4.0513,
+      "step": 6144
+    },
+    {
+      "epoch": 0.06145,
+      "grad_norm": 1.0964390916024676,
+      "learning_rate": 0.003,
+      "loss": 4.0708,
+      "step": 6145
+    },
+    {
+      "epoch": 0.06146,
+      "grad_norm": 0.9340305536724448,
+      "learning_rate": 0.003,
+      "loss": 4.0359,
+      "step": 6146
+    },
+    {
+      "epoch": 0.06147,
+      "grad_norm": 0.9639235550819623,
+      "learning_rate": 0.003,
+      "loss": 4.0905,
+      "step": 6147
+    },
+    {
+      "epoch": 0.06148,
+      "grad_norm": 1.1141344848316928,
+      "learning_rate": 0.003,
+      "loss": 4.0649,
+      "step": 6148
+    },
+    {
+      "epoch": 0.06149,
+      "grad_norm": 1.0008166080107452,
+      "learning_rate": 0.003,
+      "loss": 4.0554,
+      "step": 6149
+    },
+    {
+      "epoch": 0.0615,
+      "grad_norm": 1.291300378500113,
+      "learning_rate": 0.003,
+      "loss": 4.077,
+      "step": 6150
+    },
+    {
+      "epoch": 0.06151,
+      "grad_norm": 0.9179128052204282,
+      "learning_rate": 0.003,
+      "loss": 4.0654,
+      "step": 6151
+    },
+    {
+      "epoch": 0.06152,
+      "grad_norm": 0.83811806335343,
+      "learning_rate": 0.003,
+      "loss": 4.0494,
+      "step": 6152
+    },
+    {
+      "epoch": 0.06153,
+      "grad_norm": 1.0325588836325448,
+      "learning_rate": 0.003,
+      "loss": 4.0784,
+      "step": 6153
+    },
+    {
+      "epoch": 0.06154,
+      "grad_norm": 1.0531062692602984,
+      "learning_rate": 0.003,
+      "loss": 4.0249,
+      "step": 6154
+    },
+    {
+      "epoch": 0.06155,
+      "grad_norm": 0.9258868598309281,
+      "learning_rate": 0.003,
+      "loss": 4.0669,
+      "step": 6155
+    },
+    {
+      "epoch": 0.06156,
+      "grad_norm": 0.8537558991823246,
+      "learning_rate": 0.003,
+      "loss": 4.0514,
+      "step": 6156
+    },
+    {
+      "epoch": 0.06157,
+      "grad_norm": 0.915762781550664,
+      "learning_rate": 0.003,
+      "loss": 4.0722,
+      "step": 6157
+    },
+    {
+      "epoch": 0.06158,
+      "grad_norm": 0.8548951465188689,
+      "learning_rate": 0.003,
+      "loss": 4.0914,
+      "step": 6158
+    },
+    {
+      "epoch": 0.06159,
+      "grad_norm": 0.8765672907860784,
+      "learning_rate": 0.003,
+      "loss": 4.0516,
+      "step": 6159
+    },
+    {
+      "epoch": 0.0616,
+      "grad_norm": 0.9374358381023599,
+      "learning_rate": 0.003,
+      "loss": 4.0471,
+      "step": 6160
+    },
+    {
+      "epoch": 0.06161,
+      "grad_norm": 1.1299972768267692,
+      "learning_rate": 0.003,
+      "loss": 4.0791,
+      "step": 6161
+    },
+    {
+      "epoch": 0.06162,
+      "grad_norm": 0.9317143658408135,
+      "learning_rate": 0.003,
+      "loss": 4.052,
+      "step": 6162
+    },
+    {
+      "epoch": 0.06163,
+      "grad_norm": 0.8061151783232723,
+      "learning_rate": 0.003,
+      "loss": 4.0883,
+      "step": 6163
+    },
+    {
+      "epoch": 0.06164,
+      "grad_norm": 0.7615643215210022,
+      "learning_rate": 0.003,
+      "loss": 4.0629,
+      "step": 6164
+    },
+    {
+      "epoch": 0.06165,
+      "grad_norm": 0.7769881554996289,
+      "learning_rate": 0.003,
+      "loss": 4.0458,
+      "step": 6165
+    },
+    {
+      "epoch": 0.06166,
+      "grad_norm": 0.8085532275590331,
+      "learning_rate": 0.003,
+      "loss": 4.0754,
+      "step": 6166
+    },
+    {
+      "epoch": 0.06167,
+      "grad_norm": 0.855796871730468,
+      "learning_rate": 0.003,
+      "loss": 4.0468,
+      "step": 6167
+    },
+    {
+      "epoch": 0.06168,
+      "grad_norm": 1.138731517223366,
+      "learning_rate": 0.003,
+      "loss": 4.065,
+      "step": 6168
+    },
+    {
+      "epoch": 0.06169,
+      "grad_norm": 1.368396718368792,
+      "learning_rate": 0.003,
+      "loss": 4.068,
+      "step": 6169
+    },
+    {
+      "epoch": 0.0617,
+      "grad_norm": 0.7455936313181897,
+      "learning_rate": 0.003,
+      "loss": 4.0551,
+      "step": 6170
+    },
+    {
+      "epoch": 0.06171,
+      "grad_norm": 0.7443727028390393,
+      "learning_rate": 0.003,
+      "loss": 4.0588,
+      "step": 6171
+    },
+    {
+      "epoch": 0.06172,
+      "grad_norm": 0.9360131411969993,
+      "learning_rate": 0.003,
+      "loss": 4.038,
+      "step": 6172
+    },
+    {
+      "epoch": 0.06173,
+      "grad_norm": 1.0491839760469293,
+      "learning_rate": 0.003,
+      "loss": 4.0727,
+      "step": 6173
+    },
+    {
+      "epoch": 0.06174,
+      "grad_norm": 0.9907442372695112,
+      "learning_rate": 0.003,
+      "loss": 4.0637,
+      "step": 6174
+    },
+    {
+      "epoch": 0.06175,
+      "grad_norm": 1.0447539234835719,
+      "learning_rate": 0.003,
+      "loss": 4.0655,
+      "step": 6175
+    },
+    {
+      "epoch": 0.06176,
+      "grad_norm": 1.010073314902336,
+      "learning_rate": 0.003,
+      "loss": 4.0622,
+      "step": 6176
+    },
+    {
+      "epoch": 0.06177,
+      "grad_norm": 1.1559422707750275,
+      "learning_rate": 0.003,
+      "loss": 4.0625,
+      "step": 6177
+    },
+    {
+      "epoch": 0.06178,
+      "grad_norm": 0.9526053774670727,
+      "learning_rate": 0.003,
+      "loss": 4.0514,
+      "step": 6178
+    },
+    {
+      "epoch": 0.06179,
+      "grad_norm": 1.022987693024061,
+      "learning_rate": 0.003,
+      "loss": 4.0685,
+      "step": 6179
+    },
+    {
+      "epoch": 0.0618,
+      "grad_norm": 1.0210445225525222,
+      "learning_rate": 0.003,
+      "loss": 4.0649,
+      "step": 6180
+    },
+    {
+      "epoch": 0.06181,
+      "grad_norm": 0.9583722005427316,
+      "learning_rate": 0.003,
+      "loss": 4.0827,
+      "step": 6181
+    },
+    {
+      "epoch": 0.06182,
+      "grad_norm": 1.0777328688245935,
+      "learning_rate": 0.003,
+      "loss": 4.0446,
+      "step": 6182
+    },
+    {
+      "epoch": 0.06183,
+      "grad_norm": 1.1218720835460165,
+      "learning_rate": 0.003,
+      "loss": 4.0603,
+      "step": 6183
+    },
+    {
+      "epoch": 0.06184,
+      "grad_norm": 0.8901736945496374,
+      "learning_rate": 0.003,
+      "loss": 4.055,
+      "step": 6184
+    },
+    {
+      "epoch": 0.06185,
+      "grad_norm": 0.7821970283133759,
+      "learning_rate": 0.003,
+      "loss": 4.0442,
+      "step": 6185
+    },
+    {
+      "epoch": 0.06186,
+      "grad_norm": 0.8340351304034461,
+      "learning_rate": 0.003,
+      "loss": 4.0706,
+      "step": 6186
+    },
+    {
+      "epoch": 0.06187,
+      "grad_norm": 0.826969525348011,
+      "learning_rate": 0.003,
+      "loss": 4.0755,
+      "step": 6187
+    },
+    {
+      "epoch": 0.06188,
+      "grad_norm": 0.8321316542169037,
+      "learning_rate": 0.003,
+      "loss": 4.0532,
+      "step": 6188
+    },
+    {
+      "epoch": 0.06189,
+      "grad_norm": 1.0085697971600716,
+      "learning_rate": 0.003,
+      "loss": 4.0459,
+      "step": 6189
+    },
+    {
+      "epoch": 0.0619,
+      "grad_norm": 1.2088449819616511,
+      "learning_rate": 0.003,
+      "loss": 4.098,
+      "step": 6190
+    },
+    {
+      "epoch": 0.06191,
+      "grad_norm": 1.052029182606652,
+      "learning_rate": 0.003,
+      "loss": 4.0455,
+      "step": 6191
+    },
+    {
+      "epoch": 0.06192,
+      "grad_norm": 0.9984353604886477,
+      "learning_rate": 0.003,
+      "loss": 4.0488,
+      "step": 6192
+    },
+    {
+      "epoch": 0.06193,
+      "grad_norm": 1.2795032941383804,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 6193
+    },
+    {
+      "epoch": 0.06194,
+      "grad_norm": 0.7998102276672221,
+      "learning_rate": 0.003,
+      "loss": 4.0387,
+      "step": 6194
+    },
+    {
+      "epoch": 0.06195,
+      "grad_norm": 0.7435037294270525,
+      "learning_rate": 0.003,
+      "loss": 4.0562,
+      "step": 6195
+    },
+    {
+      "epoch": 0.06196,
+      "grad_norm": 0.9240469454378734,
+      "learning_rate": 0.003,
+      "loss": 4.0715,
+      "step": 6196
+    },
+    {
+      "epoch": 0.06197,
+      "grad_norm": 1.1696222467202784,
+      "learning_rate": 0.003,
+      "loss": 4.0628,
+      "step": 6197
+    },
+    {
+      "epoch": 0.06198,
+      "grad_norm": 0.9340202086200383,
+      "learning_rate": 0.003,
+      "loss": 4.0852,
+      "step": 6198
+    },
+    {
+      "epoch": 0.06199,
+      "grad_norm": 0.8407582922074252,
+      "learning_rate": 0.003,
+      "loss": 4.0732,
+      "step": 6199
+    },
+    {
+      "epoch": 0.062,
+      "grad_norm": 0.8797469108168021,
+      "learning_rate": 0.003,
+      "loss": 4.0781,
+      "step": 6200
+    },
+    {
+      "epoch": 0.06201,
+      "grad_norm": 0.9885503711260389,
+      "learning_rate": 0.003,
+      "loss": 4.0471,
+      "step": 6201
+    },
+    {
+      "epoch": 0.06202,
+      "grad_norm": 1.033995787879555,
+      "learning_rate": 0.003,
+      "loss": 4.0951,
+      "step": 6202
+    },
+    {
+      "epoch": 0.06203,
+      "grad_norm": 0.9934884285362043,
+      "learning_rate": 0.003,
+      "loss": 4.0768,
+      "step": 6203
+    },
+    {
+      "epoch": 0.06204,
+      "grad_norm": 1.2510710857513951,
+      "learning_rate": 0.003,
+      "loss": 4.0786,
+      "step": 6204
+    },
+    {
+      "epoch": 0.06205,
+      "grad_norm": 0.9427630454595206,
+      "learning_rate": 0.003,
+      "loss": 4.0766,
+      "step": 6205
+    },
+    {
+      "epoch": 0.06206,
+      "grad_norm": 0.9038925258511812,
+      "learning_rate": 0.003,
+      "loss": 4.0634,
+      "step": 6206
+    },
+    {
+      "epoch": 0.06207,
+      "grad_norm": 0.86210908819428,
+      "learning_rate": 0.003,
+      "loss": 4.0249,
+      "step": 6207
+    },
+    {
+      "epoch": 0.06208,
+      "grad_norm": 0.8083534046824736,
+      "learning_rate": 0.003,
+      "loss": 4.0518,
+      "step": 6208
+    },
+    {
+      "epoch": 0.06209,
+      "grad_norm": 0.906637525628039,
+      "learning_rate": 0.003,
+      "loss": 4.038,
+      "step": 6209
+    },
+    {
+      "epoch": 0.0621,
+      "grad_norm": 1.1049751485709722,
+      "learning_rate": 0.003,
+      "loss": 4.1017,
+      "step": 6210
+    },
+    {
+      "epoch": 0.06211,
+      "grad_norm": 0.8465215117222046,
+      "learning_rate": 0.003,
+      "loss": 4.0576,
+      "step": 6211
+    },
+    {
+      "epoch": 0.06212,
+      "grad_norm": 0.926150960560891,
+      "learning_rate": 0.003,
+      "loss": 4.0666,
+      "step": 6212
+    },
+    {
+      "epoch": 0.06213,
+      "grad_norm": 1.1848974987642524,
+      "learning_rate": 0.003,
+      "loss": 4.0533,
+      "step": 6213
+    },
+    {
+      "epoch": 0.06214,
+      "grad_norm": 0.9608260526041966,
+      "learning_rate": 0.003,
+      "loss": 4.0503,
+      "step": 6214
+    },
+    {
+      "epoch": 0.06215,
+      "grad_norm": 1.0487901084020896,
+      "learning_rate": 0.003,
+      "loss": 4.0698,
+      "step": 6215
+    },
+    {
+      "epoch": 0.06216,
+      "grad_norm": 1.0157866198312488,
+      "learning_rate": 0.003,
+      "loss": 4.0986,
+      "step": 6216
+    },
+    {
+      "epoch": 0.06217,
+      "grad_norm": 1.1412151022720478,
+      "learning_rate": 0.003,
+      "loss": 4.0637,
+      "step": 6217
+    },
+    {
+      "epoch": 0.06218,
+      "grad_norm": 1.006247649190211,
+      "learning_rate": 0.003,
+      "loss": 4.0467,
+      "step": 6218
+    },
+    {
+      "epoch": 0.06219,
+      "grad_norm": 1.2294549857751111,
+      "learning_rate": 0.003,
+      "loss": 4.0871,
+      "step": 6219
+    },
+    {
+      "epoch": 0.0622,
+      "grad_norm": 0.995208064028826,
+      "learning_rate": 0.003,
+      "loss": 4.0748,
+      "step": 6220
+    },
+    {
+      "epoch": 0.06221,
+      "grad_norm": 0.9162212484056992,
+      "learning_rate": 0.003,
+      "loss": 4.0763,
+      "step": 6221
+    },
+    {
+      "epoch": 0.06222,
+      "grad_norm": 0.8454292925737381,
+      "learning_rate": 0.003,
+      "loss": 4.0584,
+      "step": 6222
+    },
+    {
+      "epoch": 0.06223,
+      "grad_norm": 0.8608341935906129,
+      "learning_rate": 0.003,
+      "loss": 4.0659,
+      "step": 6223
+    },
+    {
+      "epoch": 0.06224,
+      "grad_norm": 1.0517375571967738,
+      "learning_rate": 0.003,
+      "loss": 4.0411,
+      "step": 6224
+    },
+    {
+      "epoch": 0.06225,
+      "grad_norm": 1.167553303659172,
+      "learning_rate": 0.003,
+      "loss": 4.072,
+      "step": 6225
+    },
+    {
+      "epoch": 0.06226,
+      "grad_norm": 0.9413390695324375,
+      "learning_rate": 0.003,
+      "loss": 4.0469,
+      "step": 6226
+    },
+    {
+      "epoch": 0.06227,
+      "grad_norm": 1.1275346673821272,
+      "learning_rate": 0.003,
+      "loss": 4.0645,
+      "step": 6227
+    },
+    {
+      "epoch": 0.06228,
+      "grad_norm": 1.071326538063058,
+      "learning_rate": 0.003,
+      "loss": 4.0725,
+      "step": 6228
+    },
+    {
+      "epoch": 0.06229,
+      "grad_norm": 1.0923671993434136,
+      "learning_rate": 0.003,
+      "loss": 4.0553,
+      "step": 6229
+    },
+    {
+      "epoch": 0.0623,
+      "grad_norm": 1.1044316515319128,
+      "learning_rate": 0.003,
+      "loss": 4.0685,
+      "step": 6230
+    },
+    {
+      "epoch": 0.06231,
+      "grad_norm": 1.1192386233956613,
+      "learning_rate": 0.003,
+      "loss": 4.0587,
+      "step": 6231
+    },
+    {
+      "epoch": 0.06232,
+      "grad_norm": 1.0749004721163167,
+      "learning_rate": 0.003,
+      "loss": 4.0525,
+      "step": 6232
+    },
+    {
+      "epoch": 0.06233,
+      "grad_norm": 1.136441483294293,
+      "learning_rate": 0.003,
+      "loss": 4.0522,
+      "step": 6233
+    },
+    {
+      "epoch": 0.06234,
+      "grad_norm": 1.096293829074208,
+      "learning_rate": 0.003,
+      "loss": 4.057,
+      "step": 6234
+    },
+    {
+      "epoch": 0.06235,
+      "grad_norm": 0.9421513147723747,
+      "learning_rate": 0.003,
+      "loss": 4.0564,
+      "step": 6235
+    },
+    {
+      "epoch": 0.06236,
+      "grad_norm": 0.9920661776334326,
+      "learning_rate": 0.003,
+      "loss": 4.0702,
+      "step": 6236
+    },
+    {
+      "epoch": 0.06237,
+      "grad_norm": 1.009784196859223,
+      "learning_rate": 0.003,
+      "loss": 4.0601,
+      "step": 6237
+    },
+    {
+      "epoch": 0.06238,
+      "grad_norm": 0.9870780579077241,
+      "learning_rate": 0.003,
+      "loss": 4.0585,
+      "step": 6238
+    },
+    {
+      "epoch": 0.06239,
+      "grad_norm": 1.2624770661972455,
+      "learning_rate": 0.003,
+      "loss": 4.0485,
+      "step": 6239
+    },
+    {
+      "epoch": 0.0624,
+      "grad_norm": 0.8345122687975308,
+      "learning_rate": 0.003,
+      "loss": 4.053,
+      "step": 6240
+    },
+    {
+      "epoch": 0.06241,
+      "grad_norm": 0.8767674013310682,
+      "learning_rate": 0.003,
+      "loss": 4.0328,
+      "step": 6241
+    },
+    {
+      "epoch": 0.06242,
+      "grad_norm": 0.8694164086668165,
+      "learning_rate": 0.003,
+      "loss": 4.0588,
+      "step": 6242
+    },
+    {
+      "epoch": 0.06243,
+      "grad_norm": 0.8968698662271256,
+      "learning_rate": 0.003,
+      "loss": 4.0507,
+      "step": 6243
+    },
+    {
+      "epoch": 0.06244,
+      "grad_norm": 1.0969155124189833,
+      "learning_rate": 0.003,
+      "loss": 4.0874,
+      "step": 6244
+    },
+    {
+      "epoch": 0.06245,
+      "grad_norm": 0.9267212634506801,
+      "learning_rate": 0.003,
+      "loss": 4.059,
+      "step": 6245
+    },
+    {
+      "epoch": 0.06246,
+      "grad_norm": 1.0070455660568638,
+      "learning_rate": 0.003,
+      "loss": 4.0765,
+      "step": 6246
+    },
+    {
+      "epoch": 0.06247,
+      "grad_norm": 1.1508646699867722,
+      "learning_rate": 0.003,
+      "loss": 4.0655,
+      "step": 6247
+    },
+    {
+      "epoch": 0.06248,
+      "grad_norm": 0.9383548414378287,
+      "learning_rate": 0.003,
+      "loss": 4.0617,
+      "step": 6248
+    },
+    {
+      "epoch": 0.06249,
+      "grad_norm": 0.8885352213986607,
+      "learning_rate": 0.003,
+      "loss": 4.0748,
+      "step": 6249
+    },
+    {
+      "epoch": 0.0625,
+      "grad_norm": 0.8342972923726877,
+      "learning_rate": 0.003,
+      "loss": 4.0557,
+      "step": 6250
+    },
+    {
+      "epoch": 0.06251,
+      "grad_norm": 0.9237428978300573,
+      "learning_rate": 0.003,
+      "loss": 4.0477,
+      "step": 6251
+    },
+    {
+      "epoch": 0.06252,
+      "grad_norm": 1.084993649871814,
+      "learning_rate": 0.003,
+      "loss": 4.0707,
+      "step": 6252
+    },
+    {
+      "epoch": 0.06253,
+      "grad_norm": 1.0323949223042157,
+      "learning_rate": 0.003,
+      "loss": 4.0613,
+      "step": 6253
+    },
+    {
+      "epoch": 0.06254,
+      "grad_norm": 0.971393898605744,
+      "learning_rate": 0.003,
+      "loss": 4.0551,
+      "step": 6254
+    },
+    {
+      "epoch": 0.06255,
+      "grad_norm": 0.9706954283600536,
+      "learning_rate": 0.003,
+      "loss": 4.0673,
+      "step": 6255
+    },
+    {
+      "epoch": 0.06256,
+      "grad_norm": 0.9415015290908161,
+      "learning_rate": 0.003,
+      "loss": 4.0411,
+      "step": 6256
+    },
+    {
+      "epoch": 0.06257,
+      "grad_norm": 0.8621578985318906,
+      "learning_rate": 0.003,
+      "loss": 4.0182,
+      "step": 6257
+    },
+    {
+      "epoch": 0.06258,
+      "grad_norm": 0.9548440225051628,
+      "learning_rate": 0.003,
+      "loss": 4.0723,
+      "step": 6258
+    },
+    {
+      "epoch": 0.06259,
+      "grad_norm": 0.9263497464951767,
+      "learning_rate": 0.003,
+      "loss": 4.088,
+      "step": 6259
+    },
+    {
+      "epoch": 0.0626,
+      "grad_norm": 0.8908902137670806,
+      "learning_rate": 0.003,
+      "loss": 4.0587,
+      "step": 6260
+    },
+    {
+      "epoch": 0.06261,
+      "grad_norm": 0.8327652914912796,
+      "learning_rate": 0.003,
+      "loss": 4.0418,
+      "step": 6261
+    },
+    {
+      "epoch": 0.06262,
+      "grad_norm": 0.9538403925841564,
+      "learning_rate": 0.003,
+      "loss": 4.0491,
+      "step": 6262
+    },
+    {
+      "epoch": 0.06263,
+      "grad_norm": 1.1599473881323492,
+      "learning_rate": 0.003,
+      "loss": 4.0877,
+      "step": 6263
+    },
+    {
+      "epoch": 0.06264,
+      "grad_norm": 1.0110607933412514,
+      "learning_rate": 0.003,
+      "loss": 4.0557,
+      "step": 6264
+    },
+    {
+      "epoch": 0.06265,
+      "grad_norm": 1.0076482190602307,
+      "learning_rate": 0.003,
+      "loss": 4.0781,
+      "step": 6265
+    },
+    {
+      "epoch": 0.06266,
+      "grad_norm": 0.8737903635942322,
+      "learning_rate": 0.003,
+      "loss": 4.056,
+      "step": 6266
+    },
+    {
+      "epoch": 0.06267,
+      "grad_norm": 0.8712992228434959,
+      "learning_rate": 0.003,
+      "loss": 4.0625,
+      "step": 6267
+    },
+    {
+      "epoch": 0.06268,
+      "grad_norm": 1.017260911789287,
+      "learning_rate": 0.003,
+      "loss": 4.0365,
+      "step": 6268
+    },
+    {
+      "epoch": 0.06269,
+      "grad_norm": 0.997967424239097,
+      "learning_rate": 0.003,
+      "loss": 4.0447,
+      "step": 6269
+    },
+    {
+      "epoch": 0.0627,
+      "grad_norm": 1.0751801208147977,
+      "learning_rate": 0.003,
+      "loss": 4.0655,
+      "step": 6270
+    },
+    {
+      "epoch": 0.06271,
+      "grad_norm": 1.0371657284207103,
+      "learning_rate": 0.003,
+      "loss": 4.0772,
+      "step": 6271
+    },
+    {
+      "epoch": 0.06272,
+      "grad_norm": 1.2803244076660563,
+      "learning_rate": 0.003,
+      "loss": 4.0965,
+      "step": 6272
+    },
+    {
+      "epoch": 0.06273,
+      "grad_norm": 0.9500833272967772,
+      "learning_rate": 0.003,
+      "loss": 4.0848,
+      "step": 6273
+    },
+    {
+      "epoch": 0.06274,
+      "grad_norm": 1.0958729001288292,
+      "learning_rate": 0.003,
+      "loss": 4.0689,
+      "step": 6274
+    },
+    {
+      "epoch": 0.06275,
+      "grad_norm": 1.141758768594646,
+      "learning_rate": 0.003,
+      "loss": 4.0612,
+      "step": 6275
+    },
+    {
+      "epoch": 0.06276,
+      "grad_norm": 0.9628716788683748,
+      "learning_rate": 0.003,
+      "loss": 4.0732,
+      "step": 6276
+    },
+    {
+      "epoch": 0.06277,
+      "grad_norm": 1.0976060075916712,
+      "learning_rate": 0.003,
+      "loss": 4.0789,
+      "step": 6277
+    },
+    {
+      "epoch": 0.06278,
+      "grad_norm": 1.016066298115058,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 6278
+    },
+    {
+      "epoch": 0.06279,
+      "grad_norm": 0.8769401740306906,
+      "learning_rate": 0.003,
+      "loss": 4.0856,
+      "step": 6279
+    },
+    {
+      "epoch": 0.0628,
+      "grad_norm": 0.9170763425191215,
+      "learning_rate": 0.003,
+      "loss": 4.0569,
+      "step": 6280
+    },
+    {
+      "epoch": 0.06281,
+      "grad_norm": 1.111920165775686,
+      "learning_rate": 0.003,
+      "loss": 4.0385,
+      "step": 6281
+    },
+    {
+      "epoch": 0.06282,
+      "grad_norm": 1.0224143144159263,
+      "learning_rate": 0.003,
+      "loss": 4.0638,
+      "step": 6282
+    },
+    {
+      "epoch": 0.06283,
+      "grad_norm": 1.1015506226196767,
+      "learning_rate": 0.003,
+      "loss": 4.0439,
+      "step": 6283
+    },
+    {
+      "epoch": 0.06284,
+      "grad_norm": 0.9844477787774102,
+      "learning_rate": 0.003,
+      "loss": 4.066,
+      "step": 6284
+    },
+    {
+      "epoch": 0.06285,
+      "grad_norm": 1.0020352933530818,
+      "learning_rate": 0.003,
+      "loss": 4.0678,
+      "step": 6285
+    },
+    {
+      "epoch": 0.06286,
+      "grad_norm": 0.8631477379490902,
+      "learning_rate": 0.003,
+      "loss": 4.0516,
+      "step": 6286
+    },
+    {
+      "epoch": 0.06287,
+      "grad_norm": 0.8888287717900007,
+      "learning_rate": 0.003,
+      "loss": 4.0439,
+      "step": 6287
+    },
+    {
+      "epoch": 0.06288,
+      "grad_norm": 0.8183385522469628,
+      "learning_rate": 0.003,
+      "loss": 4.0729,
+      "step": 6288
+    },
+    {
+      "epoch": 0.06289,
+      "grad_norm": 0.7548987473458644,
+      "learning_rate": 0.003,
+      "loss": 4.0244,
+      "step": 6289
+    },
+    {
+      "epoch": 0.0629,
+      "grad_norm": 0.839173523396433,
+      "learning_rate": 0.003,
+      "loss": 4.0495,
+      "step": 6290
+    },
+    {
+      "epoch": 0.06291,
+      "grad_norm": 0.8617471545841462,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 6291
+    },
+    {
+      "epoch": 0.06292,
+      "grad_norm": 0.8638474112750509,
+      "learning_rate": 0.003,
+      "loss": 4.05,
+      "step": 6292
+    },
+    {
+      "epoch": 0.06293,
+      "grad_norm": 0.9541204019425185,
+      "learning_rate": 0.003,
+      "loss": 4.0118,
+      "step": 6293
+    },
+    {
+      "epoch": 0.06294,
+      "grad_norm": 1.1723643539699384,
+      "learning_rate": 0.003,
+      "loss": 4.0692,
+      "step": 6294
+    },
+    {
+      "epoch": 0.06295,
+      "grad_norm": 1.103442251330168,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 6295
+    },
+    {
+      "epoch": 0.06296,
+      "grad_norm": 1.0703680632965407,
+      "learning_rate": 0.003,
+      "loss": 4.086,
+      "step": 6296
+    },
+    {
+      "epoch": 0.06297,
+      "grad_norm": 1.0274650320053247,
+      "learning_rate": 0.003,
+      "loss": 4.0578,
+      "step": 6297
+    },
+    {
+      "epoch": 0.06298,
+      "grad_norm": 0.89154419061986,
+      "learning_rate": 0.003,
+      "loss": 4.0563,
+      "step": 6298
+    },
+    {
+      "epoch": 0.06299,
+      "grad_norm": 0.9135944828728849,
+      "learning_rate": 0.003,
+      "loss": 4.054,
+      "step": 6299
+    },
+    {
+      "epoch": 0.063,
+      "grad_norm": 0.8267113445895482,
+      "learning_rate": 0.003,
+      "loss": 4.1012,
+      "step": 6300
+    },
+    {
+      "epoch": 0.06301,
+      "grad_norm": 1.0037936149372095,
+      "learning_rate": 0.003,
+      "loss": 4.0812,
+      "step": 6301
+    },
+    {
+      "epoch": 0.06302,
+      "grad_norm": 1.2860071610961534,
+      "learning_rate": 0.003,
+      "loss": 4.0776,
+      "step": 6302
+    },
+    {
+      "epoch": 0.06303,
+      "grad_norm": 1.0162453119057104,
+      "learning_rate": 0.003,
+      "loss": 4.073,
+      "step": 6303
+    },
+    {
+      "epoch": 0.06304,
+      "grad_norm": 1.0450143375967518,
+      "learning_rate": 0.003,
+      "loss": 4.0866,
+      "step": 6304
+    },
+    {
+      "epoch": 0.06305,
+      "grad_norm": 1.1401136900936193,
+      "learning_rate": 0.003,
+      "loss": 4.0966,
+      "step": 6305
+    },
+    {
+      "epoch": 0.06306,
+      "grad_norm": 1.0466179358101595,
+      "learning_rate": 0.003,
+      "loss": 4.0546,
+      "step": 6306
+    },
+    {
+      "epoch": 0.06307,
+      "grad_norm": 1.1366527233292367,
+      "learning_rate": 0.003,
+      "loss": 4.0551,
+      "step": 6307
+    },
+    {
+      "epoch": 0.06308,
+      "grad_norm": 0.8947523569057309,
+      "learning_rate": 0.003,
+      "loss": 4.0677,
+      "step": 6308
+    },
+    {
+      "epoch": 0.06309,
+      "grad_norm": 0.9787036236305489,
+      "learning_rate": 0.003,
+      "loss": 4.0577,
+      "step": 6309
+    },
+    {
+      "epoch": 0.0631,
+      "grad_norm": 1.1060093552214123,
+      "learning_rate": 0.003,
+      "loss": 4.0632,
+      "step": 6310
+    },
+    {
+      "epoch": 0.06311,
+      "grad_norm": 0.934221683083381,
+      "learning_rate": 0.003,
+      "loss": 4.0527,
+      "step": 6311
+    },
+    {
+      "epoch": 0.06312,
+      "grad_norm": 1.0377549576803702,
+      "learning_rate": 0.003,
+      "loss": 4.0502,
+      "step": 6312
+    },
+    {
+      "epoch": 0.06313,
+      "grad_norm": 0.9581325400036763,
+      "learning_rate": 0.003,
+      "loss": 4.0732,
+      "step": 6313
+    },
+    {
+      "epoch": 0.06314,
+      "grad_norm": 1.1035319349717054,
+      "learning_rate": 0.003,
+      "loss": 4.0827,
+      "step": 6314
+    },
+    {
+      "epoch": 0.06315,
+      "grad_norm": 0.9924113339745636,
+      "learning_rate": 0.003,
+      "loss": 4.0596,
+      "step": 6315
+    },
+    {
+      "epoch": 0.06316,
+      "grad_norm": 1.0717645296978155,
+      "learning_rate": 0.003,
+      "loss": 4.0332,
+      "step": 6316
+    },
+    {
+      "epoch": 0.06317,
+      "grad_norm": 1.0557719176935128,
+      "learning_rate": 0.003,
+      "loss": 4.0608,
+      "step": 6317
+    },
+    {
+      "epoch": 0.06318,
+      "grad_norm": 1.151290740982202,
+      "learning_rate": 0.003,
+      "loss": 4.1051,
+      "step": 6318
+    },
+    {
+      "epoch": 0.06319,
+      "grad_norm": 1.1038168869223615,
+      "learning_rate": 0.003,
+      "loss": 4.1059,
+      "step": 6319
+    },
+    {
+      "epoch": 0.0632,
+      "grad_norm": 0.9945691257477356,
+      "learning_rate": 0.003,
+      "loss": 4.0637,
+      "step": 6320
+    },
+    {
+      "epoch": 0.06321,
+      "grad_norm": 1.0211879444413088,
+      "learning_rate": 0.003,
+      "loss": 4.0764,
+      "step": 6321
+    },
+    {
+      "epoch": 0.06322,
+      "grad_norm": 1.074648930120513,
+      "learning_rate": 0.003,
+      "loss": 4.065,
+      "step": 6322
+    },
+    {
+      "epoch": 0.06323,
+      "grad_norm": 1.0531931604114575,
+      "learning_rate": 0.003,
+      "loss": 4.0547,
+      "step": 6323
+    },
+    {
+      "epoch": 0.06324,
+      "grad_norm": 1.0525740567574657,
+      "learning_rate": 0.003,
+      "loss": 4.0737,
+      "step": 6324
+    },
+    {
+      "epoch": 0.06325,
+      "grad_norm": 1.0156601469799102,
+      "learning_rate": 0.003,
+      "loss": 4.0545,
+      "step": 6325
+    },
+    {
+      "epoch": 0.06326,
+      "grad_norm": 1.0885712514526227,
+      "learning_rate": 0.003,
+      "loss": 4.0693,
+      "step": 6326
+    },
+    {
+      "epoch": 0.06327,
+      "grad_norm": 0.9064082305136328,
+      "learning_rate": 0.003,
+      "loss": 4.0338,
+      "step": 6327
+    },
+    {
+      "epoch": 0.06328,
+      "grad_norm": 0.9294707681271361,
+      "learning_rate": 0.003,
+      "loss": 4.084,
+      "step": 6328
+    },
+    {
+      "epoch": 0.06329,
+      "grad_norm": 1.3371931683837754,
+      "learning_rate": 0.003,
+      "loss": 4.0765,
+      "step": 6329
+    },
+    {
+      "epoch": 0.0633,
+      "grad_norm": 0.8781010739013081,
+      "learning_rate": 0.003,
+      "loss": 4.0438,
+      "step": 6330
+    },
+    {
+      "epoch": 0.06331,
+      "grad_norm": 0.8544193926722347,
+      "learning_rate": 0.003,
+      "loss": 4.0408,
+      "step": 6331
+    },
+    {
+      "epoch": 0.06332,
+      "grad_norm": 0.8011724000742859,
+      "learning_rate": 0.003,
+      "loss": 4.0395,
+      "step": 6332
+    },
+    {
+      "epoch": 0.06333,
+      "grad_norm": 0.7119981509484664,
+      "learning_rate": 0.003,
+      "loss": 4.0413,
+      "step": 6333
+    },
+    {
+      "epoch": 0.06334,
+      "grad_norm": 0.6668806340124519,
+      "learning_rate": 0.003,
+      "loss": 4.0426,
+      "step": 6334
+    },
+    {
+      "epoch": 0.06335,
+      "grad_norm": 0.8212002598201582,
+      "learning_rate": 0.003,
+      "loss": 4.0129,
+      "step": 6335
+    },
+    {
+      "epoch": 0.06336,
+      "grad_norm": 0.8088000326714004,
+      "learning_rate": 0.003,
+      "loss": 4.0736,
+      "step": 6336
+    },
+    {
+      "epoch": 0.06337,
+      "grad_norm": 0.8536839468376971,
+      "learning_rate": 0.003,
+      "loss": 4.0773,
+      "step": 6337
+    },
+    {
+      "epoch": 0.06338,
+      "grad_norm": 1.1037572047297124,
+      "learning_rate": 0.003,
+      "loss": 4.065,
+      "step": 6338
+    },
+    {
+      "epoch": 0.06339,
+      "grad_norm": 1.3377815021844122,
+      "learning_rate": 0.003,
+      "loss": 4.048,
+      "step": 6339
+    },
+    {
+      "epoch": 0.0634,
+      "grad_norm": 0.6048765308025956,
+      "learning_rate": 0.003,
+      "loss": 4.032,
+      "step": 6340
+    },
+    {
+      "epoch": 0.06341,
+      "grad_norm": 0.7799193061695396,
+      "learning_rate": 0.003,
+      "loss": 4.0294,
+      "step": 6341
+    },
+    {
+      "epoch": 0.06342,
+      "grad_norm": 1.1151041355238953,
+      "learning_rate": 0.003,
+      "loss": 4.0621,
+      "step": 6342
+    },
+    {
+      "epoch": 0.06343,
+      "grad_norm": 1.1674486948458083,
+      "learning_rate": 0.003,
+      "loss": 4.0746,
+      "step": 6343
+    },
+    {
+      "epoch": 0.06344,
+      "grad_norm": 1.031837089911801,
+      "learning_rate": 0.003,
+      "loss": 4.0284,
+      "step": 6344
+    },
+    {
+      "epoch": 0.06345,
+      "grad_norm": 0.8615252116136146,
+      "learning_rate": 0.003,
+      "loss": 4.0436,
+      "step": 6345
+    },
+    {
+      "epoch": 0.06346,
+      "grad_norm": 0.8103372667424406,
+      "learning_rate": 0.003,
+      "loss": 4.0604,
+      "step": 6346
+    },
+    {
+      "epoch": 0.06347,
+      "grad_norm": 0.8933549042384386,
+      "learning_rate": 0.003,
+      "loss": 4.0482,
+      "step": 6347
+    },
+    {
+      "epoch": 0.06348,
+      "grad_norm": 0.9637829939992113,
+      "learning_rate": 0.003,
+      "loss": 4.0467,
+      "step": 6348
+    },
+    {
+      "epoch": 0.06349,
+      "grad_norm": 0.9135048719984086,
+      "learning_rate": 0.003,
+      "loss": 4.0421,
+      "step": 6349
+    },
+    {
+      "epoch": 0.0635,
+      "grad_norm": 1.1242591431500657,
+      "learning_rate": 0.003,
+      "loss": 4.0444,
+      "step": 6350
+    },
+    {
+      "epoch": 0.06351,
+      "grad_norm": 0.9980317399940769,
+      "learning_rate": 0.003,
+      "loss": 4.0697,
+      "step": 6351
+    },
+    {
+      "epoch": 0.06352,
+      "grad_norm": 1.0451922994538014,
+      "learning_rate": 0.003,
+      "loss": 4.0468,
+      "step": 6352
+    },
+    {
+      "epoch": 0.06353,
+      "grad_norm": 0.9845927560222498,
+      "learning_rate": 0.003,
+      "loss": 4.0626,
+      "step": 6353
+    },
+    {
+      "epoch": 0.06354,
+      "grad_norm": 1.096403795880879,
+      "learning_rate": 0.003,
+      "loss": 4.059,
+      "step": 6354
+    },
+    {
+      "epoch": 0.06355,
+      "grad_norm": 0.9983219209770657,
+      "learning_rate": 0.003,
+      "loss": 4.0678,
+      "step": 6355
+    },
+    {
+      "epoch": 0.06356,
+      "grad_norm": 1.187703733940572,
+      "learning_rate": 0.003,
+      "loss": 4.0629,
+      "step": 6356
+    },
+    {
+      "epoch": 0.06357,
+      "grad_norm": 0.9615101296213491,
+      "learning_rate": 0.003,
+      "loss": 4.0776,
+      "step": 6357
+    },
+    {
+      "epoch": 0.06358,
+      "grad_norm": 1.1451194898172852,
+      "learning_rate": 0.003,
+      "loss": 4.0503,
+      "step": 6358
+    },
+    {
+      "epoch": 0.06359,
+      "grad_norm": 1.0483116074347743,
+      "learning_rate": 0.003,
+      "loss": 4.0521,
+      "step": 6359
+    },
+    {
+      "epoch": 0.0636,
+      "grad_norm": 0.9340547243285371,
+      "learning_rate": 0.003,
+      "loss": 4.075,
+      "step": 6360
+    },
+    {
+      "epoch": 0.06361,
+      "grad_norm": 0.8836803848175515,
+      "learning_rate": 0.003,
+      "loss": 4.0662,
+      "step": 6361
+    },
+    {
+      "epoch": 0.06362,
+      "grad_norm": 0.8297069107111867,
+      "learning_rate": 0.003,
+      "loss": 4.0722,
+      "step": 6362
+    },
+    {
+      "epoch": 0.06363,
+      "grad_norm": 0.9691564696742783,
+      "learning_rate": 0.003,
+      "loss": 4.0568,
+      "step": 6363
+    },
+    {
+      "epoch": 0.06364,
+      "grad_norm": 1.0194286737320961,
+      "learning_rate": 0.003,
+      "loss": 4.0563,
+      "step": 6364
+    },
+    {
+      "epoch": 0.06365,
+      "grad_norm": 1.0411408567959355,
+      "learning_rate": 0.003,
+      "loss": 4.0462,
+      "step": 6365
+    },
+    {
+      "epoch": 0.06366,
+      "grad_norm": 0.9916919363829898,
+      "learning_rate": 0.003,
+      "loss": 4.0733,
+      "step": 6366
+    },
+    {
+      "epoch": 0.06367,
+      "grad_norm": 0.9560672684375412,
+      "learning_rate": 0.003,
+      "loss": 4.0537,
+      "step": 6367
+    },
+    {
+      "epoch": 0.06368,
+      "grad_norm": 1.1782070353358838,
+      "learning_rate": 0.003,
+      "loss": 4.092,
+      "step": 6368
+    },
+    {
+      "epoch": 0.06369,
+      "grad_norm": 1.133684868527638,
+      "learning_rate": 0.003,
+      "loss": 4.099,
+      "step": 6369
+    },
+    {
+      "epoch": 0.0637,
+      "grad_norm": 0.8986796581542652,
+      "learning_rate": 0.003,
+      "loss": 4.0545,
+      "step": 6370
+    },
+    {
+      "epoch": 0.06371,
+      "grad_norm": 0.798117831160487,
+      "learning_rate": 0.003,
+      "loss": 4.0688,
+      "step": 6371
+    },
+    {
+      "epoch": 0.06372,
+      "grad_norm": 0.841679881049868,
+      "learning_rate": 0.003,
+      "loss": 4.0662,
+      "step": 6372
+    },
+    {
+      "epoch": 0.06373,
+      "grad_norm": 0.912755996468821,
+      "learning_rate": 0.003,
+      "loss": 4.0594,
+      "step": 6373
+    },
+    {
+      "epoch": 0.06374,
+      "grad_norm": 0.8902631081176816,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 6374
+    },
+    {
+      "epoch": 0.06375,
+      "grad_norm": 0.9176190979451236,
+      "learning_rate": 0.003,
+      "loss": 4.0379,
+      "step": 6375
+    },
+    {
+      "epoch": 0.06376,
+      "grad_norm": 1.0740748411500862,
+      "learning_rate": 0.003,
+      "loss": 4.0668,
+      "step": 6376
+    },
+    {
+      "epoch": 0.06377,
+      "grad_norm": 1.1585312174412248,
+      "learning_rate": 0.003,
+      "loss": 4.0384,
+      "step": 6377
+    },
+    {
+      "epoch": 0.06378,
+      "grad_norm": 1.0560699306688008,
+      "learning_rate": 0.003,
+      "loss": 4.0711,
+      "step": 6378
+    },
+    {
+      "epoch": 0.06379,
+      "grad_norm": 1.2860487561573273,
+      "learning_rate": 0.003,
+      "loss": 4.0802,
+      "step": 6379
+    },
+    {
+      "epoch": 0.0638,
+      "grad_norm": 0.8690291787190746,
+      "learning_rate": 0.003,
+      "loss": 4.0469,
+      "step": 6380
+    },
+    {
+      "epoch": 0.06381,
+      "grad_norm": 0.9436829595338491,
+      "learning_rate": 0.003,
+      "loss": 4.0324,
+      "step": 6381
+    },
+    {
+      "epoch": 0.06382,
+      "grad_norm": 1.0685626630336664,
+      "learning_rate": 0.003,
+      "loss": 4.0431,
+      "step": 6382
+    },
+    {
+      "epoch": 0.06383,
+      "grad_norm": 0.8979552535870967,
+      "learning_rate": 0.003,
+      "loss": 4.0704,
+      "step": 6383
+    },
+    {
+      "epoch": 0.06384,
+      "grad_norm": 0.9809724782902665,
+      "learning_rate": 0.003,
+      "loss": 4.0712,
+      "step": 6384
+    },
+    {
+      "epoch": 0.06385,
+      "grad_norm": 0.9328427920083763,
+      "learning_rate": 0.003,
+      "loss": 4.0317,
+      "step": 6385
+    },
+    {
+      "epoch": 0.06386,
+      "grad_norm": 0.8685113357756273,
+      "learning_rate": 0.003,
+      "loss": 4.0395,
+      "step": 6386
+    },
+    {
+      "epoch": 0.06387,
+      "grad_norm": 0.9155556766963706,
+      "learning_rate": 0.003,
+      "loss": 4.0539,
+      "step": 6387
+    },
+    {
+      "epoch": 0.06388,
+      "grad_norm": 0.9520115095678209,
+      "learning_rate": 0.003,
+      "loss": 4.0455,
+      "step": 6388
+    },
+    {
+      "epoch": 0.06389,
+      "grad_norm": 0.9650777255338291,
+      "learning_rate": 0.003,
+      "loss": 4.0313,
+      "step": 6389
+    },
+    {
+      "epoch": 0.0639,
+      "grad_norm": 0.9711913071461878,
+      "learning_rate": 0.003,
+      "loss": 4.0587,
+      "step": 6390
+    },
+    {
+      "epoch": 0.06391,
+      "grad_norm": 1.0582916428545706,
+      "learning_rate": 0.003,
+      "loss": 4.0773,
+      "step": 6391
+    },
+    {
+      "epoch": 0.06392,
+      "grad_norm": 1.1228513540493317,
+      "learning_rate": 0.003,
+      "loss": 4.0545,
+      "step": 6392
+    },
+    {
+      "epoch": 0.06393,
+      "grad_norm": 0.9824467176596088,
+      "learning_rate": 0.003,
+      "loss": 4.0659,
+      "step": 6393
+    },
+    {
+      "epoch": 0.06394,
+      "grad_norm": 1.0748961169419036,
+      "learning_rate": 0.003,
+      "loss": 4.0628,
+      "step": 6394
+    },
+    {
+      "epoch": 0.06395,
+      "grad_norm": 1.2122348207004991,
+      "learning_rate": 0.003,
+      "loss": 4.0914,
+      "step": 6395
+    },
+    {
+      "epoch": 0.06396,
+      "grad_norm": 0.9525125913234459,
+      "learning_rate": 0.003,
+      "loss": 4.095,
+      "step": 6396
+    },
+    {
+      "epoch": 0.06397,
+      "grad_norm": 1.1157249696277263,
+      "learning_rate": 0.003,
+      "loss": 4.0515,
+      "step": 6397
+    },
+    {
+      "epoch": 0.06398,
+      "grad_norm": 1.0962231868724095,
+      "learning_rate": 0.003,
+      "loss": 4.0479,
+      "step": 6398
+    },
+    {
+      "epoch": 0.06399,
+      "grad_norm": 0.7475394505526716,
+      "learning_rate": 0.003,
+      "loss": 4.0687,
+      "step": 6399
+    },
+    {
+      "epoch": 0.064,
+      "grad_norm": 0.7790863228862557,
+      "learning_rate": 0.003,
+      "loss": 4.0587,
+      "step": 6400
+    },
+    {
+      "epoch": 0.06401,
+      "grad_norm": 0.9202551013299898,
+      "learning_rate": 0.003,
+      "loss": 4.0455,
+      "step": 6401
+    },
+    {
+      "epoch": 0.06402,
+      "grad_norm": 0.9324453718148813,
+      "learning_rate": 0.003,
+      "loss": 4.0463,
+      "step": 6402
+    },
+    {
+      "epoch": 0.06403,
+      "grad_norm": 1.0178636443747968,
+      "learning_rate": 0.003,
+      "loss": 4.0452,
+      "step": 6403
+    },
+    {
+      "epoch": 0.06404,
+      "grad_norm": 1.0889588831558945,
+      "learning_rate": 0.003,
+      "loss": 4.1007,
+      "step": 6404
+    },
+    {
+      "epoch": 0.06405,
+      "grad_norm": 1.0511307110083934,
+      "learning_rate": 0.003,
+      "loss": 4.0581,
+      "step": 6405
+    },
+    {
+      "epoch": 0.06406,
+      "grad_norm": 0.8386442367077682,
+      "learning_rate": 0.003,
+      "loss": 4.0437,
+      "step": 6406
+    },
+    {
+      "epoch": 0.06407,
+      "grad_norm": 0.9385442318393312,
+      "learning_rate": 0.003,
+      "loss": 4.045,
+      "step": 6407
+    },
+    {
+      "epoch": 0.06408,
+      "grad_norm": 1.1880536233711294,
+      "learning_rate": 0.003,
+      "loss": 4.0323,
+      "step": 6408
+    },
+    {
+      "epoch": 0.06409,
+      "grad_norm": 1.0715566126733156,
+      "learning_rate": 0.003,
+      "loss": 4.0723,
+      "step": 6409
+    },
+    {
+      "epoch": 0.0641,
+      "grad_norm": 1.0226807648909895,
+      "learning_rate": 0.003,
+      "loss": 4.0641,
+      "step": 6410
+    },
+    {
+      "epoch": 0.06411,
+      "grad_norm": 1.0144667419036346,
+      "learning_rate": 0.003,
+      "loss": 4.0552,
+      "step": 6411
+    },
+    {
+      "epoch": 0.06412,
+      "grad_norm": 1.0953706363475204,
+      "learning_rate": 0.003,
+      "loss": 4.05,
+      "step": 6412
+    },
+    {
+      "epoch": 0.06413,
+      "grad_norm": 1.0397462803376565,
+      "learning_rate": 0.003,
+      "loss": 4.0847,
+      "step": 6413
+    },
+    {
+      "epoch": 0.06414,
+      "grad_norm": 0.9792075931298253,
+      "learning_rate": 0.003,
+      "loss": 4.0424,
+      "step": 6414
+    },
+    {
+      "epoch": 0.06415,
+      "grad_norm": 1.0282547055284612,
+      "learning_rate": 0.003,
+      "loss": 4.0344,
+      "step": 6415
+    },
+    {
+      "epoch": 0.06416,
+      "grad_norm": 1.159533810090572,
+      "learning_rate": 0.003,
+      "loss": 4.0805,
+      "step": 6416
+    },
+    {
+      "epoch": 0.06417,
+      "grad_norm": 1.1321519339699178,
+      "learning_rate": 0.003,
+      "loss": 4.0535,
+      "step": 6417
+    },
+    {
+      "epoch": 0.06418,
+      "grad_norm": 0.9713020938212722,
+      "learning_rate": 0.003,
+      "loss": 4.0407,
+      "step": 6418
+    },
+    {
+      "epoch": 0.06419,
+      "grad_norm": 1.2376733723226676,
+      "learning_rate": 0.003,
+      "loss": 4.0535,
+      "step": 6419
+    },
+    {
+      "epoch": 0.0642,
+      "grad_norm": 0.9221140531392306,
+      "learning_rate": 0.003,
+      "loss": 4.0576,
+      "step": 6420
+    },
+    {
+      "epoch": 0.06421,
+      "grad_norm": 0.9969702997793933,
+      "learning_rate": 0.003,
+      "loss": 4.0748,
+      "step": 6421
+    },
+    {
+      "epoch": 0.06422,
+      "grad_norm": 1.0567219061372177,
+      "learning_rate": 0.003,
+      "loss": 4.0614,
+      "step": 6422
+    },
+    {
+      "epoch": 0.06423,
+      "grad_norm": 0.991239045933805,
+      "learning_rate": 0.003,
+      "loss": 4.077,
+      "step": 6423
+    },
+    {
+      "epoch": 0.06424,
+      "grad_norm": 1.0308086601832684,
+      "learning_rate": 0.003,
+      "loss": 4.0376,
+      "step": 6424
+    },
+    {
+      "epoch": 0.06425,
+      "grad_norm": 1.0347557359010413,
+      "learning_rate": 0.003,
+      "loss": 4.062,
+      "step": 6425
+    },
+    {
+      "epoch": 0.06426,
+      "grad_norm": 1.180251291539417,
+      "learning_rate": 0.003,
+      "loss": 4.0315,
+      "step": 6426
+    },
+    {
+      "epoch": 0.06427,
+      "grad_norm": 1.0107602148306123,
+      "learning_rate": 0.003,
+      "loss": 4.0432,
+      "step": 6427
+    },
+    {
+      "epoch": 0.06428,
+      "grad_norm": 1.0541298404940496,
+      "learning_rate": 0.003,
+      "loss": 4.0675,
+      "step": 6428
+    },
+    {
+      "epoch": 0.06429,
+      "grad_norm": 1.0253882188929047,
+      "learning_rate": 0.003,
+      "loss": 4.0667,
+      "step": 6429
+    },
+    {
+      "epoch": 0.0643,
+      "grad_norm": 1.143653137235699,
+      "learning_rate": 0.003,
+      "loss": 4.0425,
+      "step": 6430
+    },
+    {
+      "epoch": 0.06431,
+      "grad_norm": 0.9421049550422983,
+      "learning_rate": 0.003,
+      "loss": 4.0533,
+      "step": 6431
+    },
+    {
+      "epoch": 0.06432,
+      "grad_norm": 1.1073171928356662,
+      "learning_rate": 0.003,
+      "loss": 4.0506,
+      "step": 6432
+    },
+    {
+      "epoch": 0.06433,
+      "grad_norm": 1.0407008893715575,
+      "learning_rate": 0.003,
+      "loss": 4.0789,
+      "step": 6433
+    },
+    {
+      "epoch": 0.06434,
+      "grad_norm": 1.1395794148637106,
+      "learning_rate": 0.003,
+      "loss": 4.0278,
+      "step": 6434
+    },
+    {
+      "epoch": 0.06435,
+      "grad_norm": 1.026318482136573,
+      "learning_rate": 0.003,
+      "loss": 4.0456,
+      "step": 6435
+    },
+    {
+      "epoch": 0.06436,
+      "grad_norm": 1.1702677902712457,
+      "learning_rate": 0.003,
+      "loss": 4.0676,
+      "step": 6436
+    },
+    {
+      "epoch": 0.06437,
+      "grad_norm": 1.025273476921687,
+      "learning_rate": 0.003,
+      "loss": 4.0658,
+      "step": 6437
+    },
+    {
+      "epoch": 0.06438,
+      "grad_norm": 1.0119572577344156,
+      "learning_rate": 0.003,
+      "loss": 4.0603,
+      "step": 6438
+    },
+    {
+      "epoch": 0.06439,
+      "grad_norm": 0.8412430606835657,
+      "learning_rate": 0.003,
+      "loss": 4.0513,
+      "step": 6439
+    },
+    {
+      "epoch": 0.0644,
+      "grad_norm": 0.9507548790037242,
+      "learning_rate": 0.003,
+      "loss": 4.0497,
+      "step": 6440
+    },
+    {
+      "epoch": 0.06441,
+      "grad_norm": 0.9244681652798047,
+      "learning_rate": 0.003,
+      "loss": 4.0295,
+      "step": 6441
+    },
+    {
+      "epoch": 0.06442,
+      "grad_norm": 0.8807778875679908,
+      "learning_rate": 0.003,
+      "loss": 4.0463,
+      "step": 6442
+    },
+    {
+      "epoch": 0.06443,
+      "grad_norm": 0.827270815956522,
+      "learning_rate": 0.003,
+      "loss": 4.0343,
+      "step": 6443
+    },
+    {
+      "epoch": 0.06444,
+      "grad_norm": 0.803325818969944,
+      "learning_rate": 0.003,
+      "loss": 4.0427,
+      "step": 6444
+    },
+    {
+      "epoch": 0.06445,
+      "grad_norm": 0.9385724441228014,
+      "learning_rate": 0.003,
+      "loss": 4.0606,
+      "step": 6445
+    },
+    {
+      "epoch": 0.06446,
+      "grad_norm": 1.2005144304193571,
+      "learning_rate": 0.003,
+      "loss": 4.0558,
+      "step": 6446
+    },
+    {
+      "epoch": 0.06447,
+      "grad_norm": 0.9501919917514117,
+      "learning_rate": 0.003,
+      "loss": 4.0645,
+      "step": 6447
+    },
+    {
+      "epoch": 0.06448,
+      "grad_norm": 1.0236274372742054,
+      "learning_rate": 0.003,
+      "loss": 4.0678,
+      "step": 6448
+    },
+    {
+      "epoch": 0.06449,
+      "grad_norm": 1.0925032591156019,
+      "learning_rate": 0.003,
+      "loss": 4.0986,
+      "step": 6449
+    },
+    {
+      "epoch": 0.0645,
+      "grad_norm": 0.9065417419150241,
+      "learning_rate": 0.003,
+      "loss": 4.0421,
+      "step": 6450
+    },
+    {
+      "epoch": 0.06451,
+      "grad_norm": 0.8998787170517907,
+      "learning_rate": 0.003,
+      "loss": 4.087,
+      "step": 6451
+    },
+    {
+      "epoch": 0.06452,
+      "grad_norm": 0.9437442931525397,
+      "learning_rate": 0.003,
+      "loss": 4.0501,
+      "step": 6452
+    },
+    {
+      "epoch": 0.06453,
+      "grad_norm": 1.0928923944168785,
+      "learning_rate": 0.003,
+      "loss": 4.0923,
+      "step": 6453
+    },
+    {
+      "epoch": 0.06454,
+      "grad_norm": 1.1495081243301128,
+      "learning_rate": 0.003,
+      "loss": 4.0495,
+      "step": 6454
+    },
+    {
+      "epoch": 0.06455,
+      "grad_norm": 0.9528175723610092,
+      "learning_rate": 0.003,
+      "loss": 4.057,
+      "step": 6455
+    },
+    {
+      "epoch": 0.06456,
+      "grad_norm": 1.0833738182788575,
+      "learning_rate": 0.003,
+      "loss": 4.0747,
+      "step": 6456
+    },
+    {
+      "epoch": 0.06457,
+      "grad_norm": 1.1107357171312071,
+      "learning_rate": 0.003,
+      "loss": 4.055,
+      "step": 6457
+    },
+    {
+      "epoch": 0.06458,
+      "grad_norm": 1.1510295337322964,
+      "learning_rate": 0.003,
+      "loss": 4.068,
+      "step": 6458
+    },
+    {
+      "epoch": 0.06459,
+      "grad_norm": 1.0262159806305138,
+      "learning_rate": 0.003,
+      "loss": 4.0744,
+      "step": 6459
+    },
+    {
+      "epoch": 0.0646,
+      "grad_norm": 1.014576196236502,
+      "learning_rate": 0.003,
+      "loss": 4.0515,
+      "step": 6460
+    },
+    {
+      "epoch": 0.06461,
+      "grad_norm": 1.064471879938064,
+      "learning_rate": 0.003,
+      "loss": 4.0942,
+      "step": 6461
+    },
+    {
+      "epoch": 0.06462,
+      "grad_norm": 0.9222498176393344,
+      "learning_rate": 0.003,
+      "loss": 4.0416,
+      "step": 6462
+    },
+    {
+      "epoch": 0.06463,
+      "grad_norm": 0.8981156217551731,
+      "learning_rate": 0.003,
+      "loss": 4.0411,
+      "step": 6463
+    },
+    {
+      "epoch": 0.06464,
+      "grad_norm": 1.1107391697536575,
+      "learning_rate": 0.003,
+      "loss": 4.0423,
+      "step": 6464
+    },
+    {
+      "epoch": 0.06465,
+      "grad_norm": 1.0063336191833832,
+      "learning_rate": 0.003,
+      "loss": 4.0391,
+      "step": 6465
+    },
+    {
+      "epoch": 0.06466,
+      "grad_norm": 1.1961559729990558,
+      "learning_rate": 0.003,
+      "loss": 4.0585,
+      "step": 6466
+    },
+    {
+      "epoch": 0.06467,
+      "grad_norm": 0.8793978826576908,
+      "learning_rate": 0.003,
+      "loss": 4.0408,
+      "step": 6467
+    },
+    {
+      "epoch": 0.06468,
+      "grad_norm": 1.0427733287660477,
+      "learning_rate": 0.003,
+      "loss": 4.0667,
+      "step": 6468
+    },
+    {
+      "epoch": 0.06469,
+      "grad_norm": 1.1251364660455194,
+      "learning_rate": 0.003,
+      "loss": 4.0713,
+      "step": 6469
+    },
+    {
+      "epoch": 0.0647,
+      "grad_norm": 1.0043010097247658,
+      "learning_rate": 0.003,
+      "loss": 4.0757,
+      "step": 6470
+    },
+    {
+      "epoch": 0.06471,
+      "grad_norm": 1.1207301616281806,
+      "learning_rate": 0.003,
+      "loss": 4.0556,
+      "step": 6471
+    },
+    {
+      "epoch": 0.06472,
+      "grad_norm": 0.9991580557027551,
+      "learning_rate": 0.003,
+      "loss": 4.0676,
+      "step": 6472
+    },
+    {
+      "epoch": 0.06473,
+      "grad_norm": 1.1171158410309772,
+      "learning_rate": 0.003,
+      "loss": 4.0736,
+      "step": 6473
+    },
+    {
+      "epoch": 0.06474,
+      "grad_norm": 0.8529112213695773,
+      "learning_rate": 0.003,
+      "loss": 4.0862,
+      "step": 6474
+    },
+    {
+      "epoch": 0.06475,
+      "grad_norm": 0.8417759233289197,
+      "learning_rate": 0.003,
+      "loss": 4.0795,
+      "step": 6475
+    },
+    {
+      "epoch": 0.06476,
+      "grad_norm": 0.9302953810525731,
+      "learning_rate": 0.003,
+      "loss": 4.0523,
+      "step": 6476
+    },
+    {
+      "epoch": 0.06477,
+      "grad_norm": 1.0176820311771924,
+      "learning_rate": 0.003,
+      "loss": 4.0408,
+      "step": 6477
+    },
+    {
+      "epoch": 0.06478,
+      "grad_norm": 1.1064098044871353,
+      "learning_rate": 0.003,
+      "loss": 4.0346,
+      "step": 6478
+    },
+    {
+      "epoch": 0.06479,
+      "grad_norm": 1.0450987906387426,
+      "learning_rate": 0.003,
+      "loss": 4.0524,
+      "step": 6479
+    },
+    {
+      "epoch": 0.0648,
+      "grad_norm": 1.0076941555756993,
+      "learning_rate": 0.003,
+      "loss": 4.0439,
+      "step": 6480
+    },
+    {
+      "epoch": 0.06481,
+      "grad_norm": 0.8795541683622259,
+      "learning_rate": 0.003,
+      "loss": 4.0527,
+      "step": 6481
+    },
+    {
+      "epoch": 0.06482,
+      "grad_norm": 0.9850429100701444,
+      "learning_rate": 0.003,
+      "loss": 4.0513,
+      "step": 6482
+    },
+    {
+      "epoch": 0.06483,
+      "grad_norm": 1.104728714549408,
+      "learning_rate": 0.003,
+      "loss": 4.0675,
+      "step": 6483
+    },
+    {
+      "epoch": 0.06484,
+      "grad_norm": 1.0625023539317728,
+      "learning_rate": 0.003,
+      "loss": 4.0617,
+      "step": 6484
+    },
+    {
+      "epoch": 0.06485,
+      "grad_norm": 0.9421033844694122,
+      "learning_rate": 0.003,
+      "loss": 4.0836,
+      "step": 6485
+    },
+    {
+      "epoch": 0.06486,
+      "grad_norm": 0.8988596473281786,
+      "learning_rate": 0.003,
+      "loss": 4.0808,
+      "step": 6486
+    },
+    {
+      "epoch": 0.06487,
+      "grad_norm": 0.963516647410238,
+      "learning_rate": 0.003,
+      "loss": 4.0686,
+      "step": 6487
+    },
+    {
+      "epoch": 0.06488,
+      "grad_norm": 1.0333879473021825,
+      "learning_rate": 0.003,
+      "loss": 4.0782,
+      "step": 6488
+    },
+    {
+      "epoch": 0.06489,
+      "grad_norm": 1.0586786599679208,
+      "learning_rate": 0.003,
+      "loss": 4.0635,
+      "step": 6489
+    },
+    {
+      "epoch": 0.0649,
+      "grad_norm": 0.9937287592814337,
+      "learning_rate": 0.003,
+      "loss": 4.0541,
+      "step": 6490
+    },
+    {
+      "epoch": 0.06491,
+      "grad_norm": 1.1726633804612512,
+      "learning_rate": 0.003,
+      "loss": 4.0615,
+      "step": 6491
+    },
+    {
+      "epoch": 0.06492,
+      "grad_norm": 1.0927920818435795,
+      "learning_rate": 0.003,
+      "loss": 4.075,
+      "step": 6492
+    },
+    {
+      "epoch": 0.06493,
+      "grad_norm": 1.0227562445548077,
+      "learning_rate": 0.003,
+      "loss": 4.0604,
+      "step": 6493
+    },
+    {
+      "epoch": 0.06494,
+      "grad_norm": 1.0831459762005358,
+      "learning_rate": 0.003,
+      "loss": 4.072,
+      "step": 6494
+    },
+    {
+      "epoch": 0.06495,
+      "grad_norm": 1.0262098572577179,
+      "learning_rate": 0.003,
+      "loss": 4.0624,
+      "step": 6495
+    },
+    {
+      "epoch": 0.06496,
+      "grad_norm": 1.2236916592367486,
+      "learning_rate": 0.003,
+      "loss": 4.0312,
+      "step": 6496
+    },
+    {
+      "epoch": 0.06497,
+      "grad_norm": 1.0285867021198758,
+      "learning_rate": 0.003,
+      "loss": 4.0559,
+      "step": 6497
+    },
+    {
+      "epoch": 0.06498,
+      "grad_norm": 1.1733459098787142,
+      "learning_rate": 0.003,
+      "loss": 4.0519,
+      "step": 6498
+    },
+    {
+      "epoch": 0.06499,
+      "grad_norm": 1.0531898340962396,
+      "learning_rate": 0.003,
+      "loss": 4.0806,
+      "step": 6499
+    },
+    {
+      "epoch": 0.065,
+      "grad_norm": 1.060199193118051,
+      "learning_rate": 0.003,
+      "loss": 4.0563,
+      "step": 6500
+    },
+    {
+      "epoch": 0.06501,
+      "grad_norm": 1.1527686386988991,
+      "learning_rate": 0.003,
+      "loss": 4.0544,
+      "step": 6501
+    },
+    {
+      "epoch": 0.06502,
+      "grad_norm": 1.029295109562913,
+      "learning_rate": 0.003,
+      "loss": 4.0603,
+      "step": 6502
+    },
+    {
+      "epoch": 0.06503,
+      "grad_norm": 0.9836711412915636,
+      "learning_rate": 0.003,
+      "loss": 4.0756,
+      "step": 6503
+    },
+    {
+      "epoch": 0.06504,
+      "grad_norm": 0.9285828574317885,
+      "learning_rate": 0.003,
+      "loss": 4.0537,
+      "step": 6504
+    },
+    {
+      "epoch": 0.06505,
+      "grad_norm": 0.975483354586866,
+      "learning_rate": 0.003,
+      "loss": 4.0505,
+      "step": 6505
+    },
+    {
+      "epoch": 0.06506,
+      "grad_norm": 1.2115222037214375,
+      "learning_rate": 0.003,
+      "loss": 4.045,
+      "step": 6506
+    },
+    {
+      "epoch": 0.06507,
+      "grad_norm": 0.9656432384586594,
+      "learning_rate": 0.003,
+      "loss": 4.0169,
+      "step": 6507
+    },
+    {
+      "epoch": 0.06508,
+      "grad_norm": 1.0366546849866198,
+      "learning_rate": 0.003,
+      "loss": 4.0537,
+      "step": 6508
+    },
+    {
+      "epoch": 0.06509,
+      "grad_norm": 1.009479145061525,
+      "learning_rate": 0.003,
+      "loss": 4.0457,
+      "step": 6509
+    },
+    {
+      "epoch": 0.0651,
+      "grad_norm": 1.0366020609012134,
+      "learning_rate": 0.003,
+      "loss": 4.0833,
+      "step": 6510
+    },
+    {
+      "epoch": 0.06511,
+      "grad_norm": 0.9709239930407754,
+      "learning_rate": 0.003,
+      "loss": 4.0474,
+      "step": 6511
+    },
+    {
+      "epoch": 0.06512,
+      "grad_norm": 0.8644707502418649,
+      "learning_rate": 0.003,
+      "loss": 4.0261,
+      "step": 6512
+    },
+    {
+      "epoch": 0.06513,
+      "grad_norm": 0.8946018112148616,
+      "learning_rate": 0.003,
+      "loss": 4.0577,
+      "step": 6513
+    },
+    {
+      "epoch": 0.06514,
+      "grad_norm": 1.032708726685494,
+      "learning_rate": 0.003,
+      "loss": 4.05,
+      "step": 6514
+    },
+    {
+      "epoch": 0.06515,
+      "grad_norm": 1.0409138603347203,
+      "learning_rate": 0.003,
+      "loss": 4.0346,
+      "step": 6515
+    },
+    {
+      "epoch": 0.06516,
+      "grad_norm": 0.9892609384071499,
+      "learning_rate": 0.003,
+      "loss": 4.0555,
+      "step": 6516
+    },
+    {
+      "epoch": 0.06517,
+      "grad_norm": 0.9148700446008358,
+      "learning_rate": 0.003,
+      "loss": 4.0507,
+      "step": 6517
+    },
+    {
+      "epoch": 0.06518,
+      "grad_norm": 0.9979358270843701,
+      "learning_rate": 0.003,
+      "loss": 4.0718,
+      "step": 6518
+    },
+    {
+      "epoch": 0.06519,
+      "grad_norm": 1.185271592002419,
+      "learning_rate": 0.003,
+      "loss": 4.054,
+      "step": 6519
+    },
+    {
+      "epoch": 0.0652,
+      "grad_norm": 0.9704305412861196,
+      "learning_rate": 0.003,
+      "loss": 4.0173,
+      "step": 6520
+    },
+    {
+      "epoch": 0.06521,
+      "grad_norm": 1.1014185224929063,
+      "learning_rate": 0.003,
+      "loss": 4.0695,
+      "step": 6521
+    },
+    {
+      "epoch": 0.06522,
+      "grad_norm": 1.0703081501278155,
+      "learning_rate": 0.003,
+      "loss": 4.0729,
+      "step": 6522
+    },
+    {
+      "epoch": 0.06523,
+      "grad_norm": 0.8771352485540366,
+      "learning_rate": 0.003,
+      "loss": 4.0593,
+      "step": 6523
+    },
+    {
+      "epoch": 0.06524,
+      "grad_norm": 0.9130638198055455,
+      "learning_rate": 0.003,
+      "loss": 4.0526,
+      "step": 6524
+    },
+    {
+      "epoch": 0.06525,
+      "grad_norm": 1.1326914793850837,
+      "learning_rate": 0.003,
+      "loss": 4.078,
+      "step": 6525
+    },
+    {
+      "epoch": 0.06526,
+      "grad_norm": 1.021254454818802,
+      "learning_rate": 0.003,
+      "loss": 4.061,
+      "step": 6526
+    },
+    {
+      "epoch": 0.06527,
+      "grad_norm": 1.073497230759006,
+      "learning_rate": 0.003,
+      "loss": 4.0713,
+      "step": 6527
+    },
+    {
+      "epoch": 0.06528,
+      "grad_norm": 0.9631221418357153,
+      "learning_rate": 0.003,
+      "loss": 4.0684,
+      "step": 6528
+    },
+    {
+      "epoch": 0.06529,
+      "grad_norm": 0.8809083485174808,
+      "learning_rate": 0.003,
+      "loss": 4.0536,
+      "step": 6529
+    },
+    {
+      "epoch": 0.0653,
+      "grad_norm": 0.9688255439544828,
+      "learning_rate": 0.003,
+      "loss": 4.0512,
+      "step": 6530
+    },
+    {
+      "epoch": 0.06531,
+      "grad_norm": 0.9824939601057029,
+      "learning_rate": 0.003,
+      "loss": 4.0425,
+      "step": 6531
+    },
+    {
+      "epoch": 0.06532,
+      "grad_norm": 0.9436605093416098,
+      "learning_rate": 0.003,
+      "loss": 4.0731,
+      "step": 6532
+    },
+    {
+      "epoch": 0.06533,
+      "grad_norm": 0.8767433235215197,
+      "learning_rate": 0.003,
+      "loss": 4.0679,
+      "step": 6533
+    },
+    {
+      "epoch": 0.06534,
+      "grad_norm": 1.0523478236740318,
+      "learning_rate": 0.003,
+      "loss": 4.0691,
+      "step": 6534
+    },
+    {
+      "epoch": 0.06535,
+      "grad_norm": 1.0000197595533407,
+      "learning_rate": 0.003,
+      "loss": 4.0277,
+      "step": 6535
+    },
+    {
+      "epoch": 0.06536,
+      "grad_norm": 1.2508564779795237,
+      "learning_rate": 0.003,
+      "loss": 4.0528,
+      "step": 6536
+    },
+    {
+      "epoch": 0.06537,
+      "grad_norm": 0.789891181606252,
+      "learning_rate": 0.003,
+      "loss": 4.061,
+      "step": 6537
+    },
+    {
+      "epoch": 0.06538,
+      "grad_norm": 0.7387596226051653,
+      "learning_rate": 0.003,
+      "loss": 4.0626,
+      "step": 6538
+    },
+    {
+      "epoch": 0.06539,
+      "grad_norm": 0.6182415296014235,
+      "learning_rate": 0.003,
+      "loss": 4.0295,
+      "step": 6539
+    },
+    {
+      "epoch": 0.0654,
+      "grad_norm": 0.6896044138716849,
+      "learning_rate": 0.003,
+      "loss": 4.0853,
+      "step": 6540
+    },
+    {
+      "epoch": 0.06541,
+      "grad_norm": 0.9475899668837449,
+      "learning_rate": 0.003,
+      "loss": 4.0505,
+      "step": 6541
+    },
+    {
+      "epoch": 0.06542,
+      "grad_norm": 1.214498343082983,
+      "learning_rate": 0.003,
+      "loss": 4.0417,
+      "step": 6542
+    },
+    {
+      "epoch": 0.06543,
+      "grad_norm": 0.8105078624490595,
+      "learning_rate": 0.003,
+      "loss": 4.0645,
+      "step": 6543
+    },
+    {
+      "epoch": 0.06544,
+      "grad_norm": 0.7169187786636381,
+      "learning_rate": 0.003,
+      "loss": 4.0513,
+      "step": 6544
+    },
+    {
+      "epoch": 0.06545,
+      "grad_norm": 0.7645714804284779,
+      "learning_rate": 0.003,
+      "loss": 4.0612,
+      "step": 6545
+    },
+    {
+      "epoch": 0.06546,
+      "grad_norm": 0.7984074302017922,
+      "learning_rate": 0.003,
+      "loss": 4.0442,
+      "step": 6546
+    },
+    {
+      "epoch": 0.06547,
+      "grad_norm": 0.898561711007871,
+      "learning_rate": 0.003,
+      "loss": 4.0444,
+      "step": 6547
+    },
+    {
+      "epoch": 0.06548,
+      "grad_norm": 1.0356346515332355,
+      "learning_rate": 0.003,
+      "loss": 4.0478,
+      "step": 6548
+    },
+    {
+      "epoch": 0.06549,
+      "grad_norm": 1.2065842171050336,
+      "learning_rate": 0.003,
+      "loss": 4.0803,
+      "step": 6549
+    },
+    {
+      "epoch": 0.0655,
+      "grad_norm": 0.9647919793019113,
+      "learning_rate": 0.003,
+      "loss": 4.0296,
+      "step": 6550
+    },
+    {
+      "epoch": 0.06551,
+      "grad_norm": 0.9830823232743289,
+      "learning_rate": 0.003,
+      "loss": 4.0339,
+      "step": 6551
+    },
+    {
+      "epoch": 0.06552,
+      "grad_norm": 1.145414200189479,
+      "learning_rate": 0.003,
+      "loss": 4.08,
+      "step": 6552
+    },
+    {
+      "epoch": 0.06553,
+      "grad_norm": 1.1282546842302874,
+      "learning_rate": 0.003,
+      "loss": 4.0556,
+      "step": 6553
+    },
+    {
+      "epoch": 0.06554,
+      "grad_norm": 1.0724517832235836,
+      "learning_rate": 0.003,
+      "loss": 4.0633,
+      "step": 6554
+    },
+    {
+      "epoch": 0.06555,
+      "grad_norm": 0.9247118456778185,
+      "learning_rate": 0.003,
+      "loss": 4.0518,
+      "step": 6555
+    },
+    {
+      "epoch": 0.06556,
+      "grad_norm": 0.8809168909392743,
+      "learning_rate": 0.003,
+      "loss": 4.0701,
+      "step": 6556
+    },
+    {
+      "epoch": 0.06557,
+      "grad_norm": 1.0027045686558371,
+      "learning_rate": 0.003,
+      "loss": 4.0494,
+      "step": 6557
+    },
+    {
+      "epoch": 0.06558,
+      "grad_norm": 1.1521312374668213,
+      "learning_rate": 0.003,
+      "loss": 4.0846,
+      "step": 6558
+    },
+    {
+      "epoch": 0.06559,
+      "grad_norm": 0.9170120064963928,
+      "learning_rate": 0.003,
+      "loss": 4.0576,
+      "step": 6559
+    },
+    {
+      "epoch": 0.0656,
+      "grad_norm": 0.8925486266426595,
+      "learning_rate": 0.003,
+      "loss": 4.0567,
+      "step": 6560
+    },
+    {
+      "epoch": 0.06561,
+      "grad_norm": 0.9709022873633115,
+      "learning_rate": 0.003,
+      "loss": 4.0775,
+      "step": 6561
+    },
+    {
+      "epoch": 0.06562,
+      "grad_norm": 1.0699148932589904,
+      "learning_rate": 0.003,
+      "loss": 4.0352,
+      "step": 6562
+    },
+    {
+      "epoch": 0.06563,
+      "grad_norm": 1.018722166079644,
+      "learning_rate": 0.003,
+      "loss": 4.0553,
+      "step": 6563
+    },
+    {
+      "epoch": 0.06564,
+      "grad_norm": 1.0104563015086112,
+      "learning_rate": 0.003,
+      "loss": 4.0637,
+      "step": 6564
+    },
+    {
+      "epoch": 0.06565,
+      "grad_norm": 0.9932591703232511,
+      "learning_rate": 0.003,
+      "loss": 4.0309,
+      "step": 6565
+    },
+    {
+      "epoch": 0.06566,
+      "grad_norm": 0.9531047599334086,
+      "learning_rate": 0.003,
+      "loss": 4.0593,
+      "step": 6566
+    },
+    {
+      "epoch": 0.06567,
+      "grad_norm": 1.0533507364705403,
+      "learning_rate": 0.003,
+      "loss": 4.0771,
+      "step": 6567
+    },
+    {
+      "epoch": 0.06568,
+      "grad_norm": 1.2316178931504538,
+      "learning_rate": 0.003,
+      "loss": 4.0863,
+      "step": 6568
+    },
+    {
+      "epoch": 0.06569,
+      "grad_norm": 0.9719523712712863,
+      "learning_rate": 0.003,
+      "loss": 4.031,
+      "step": 6569
+    },
+    {
+      "epoch": 0.0657,
+      "grad_norm": 0.9929746213073647,
+      "learning_rate": 0.003,
+      "loss": 4.0546,
+      "step": 6570
+    },
+    {
+      "epoch": 0.06571,
+      "grad_norm": 1.2226520601321686,
+      "learning_rate": 0.003,
+      "loss": 4.0682,
+      "step": 6571
+    },
+    {
+      "epoch": 0.06572,
+      "grad_norm": 1.0093561514710232,
+      "learning_rate": 0.003,
+      "loss": 4.0298,
+      "step": 6572
+    },
+    {
+      "epoch": 0.06573,
+      "grad_norm": 1.0716061347239403,
+      "learning_rate": 0.003,
+      "loss": 4.0618,
+      "step": 6573
+    },
+    {
+      "epoch": 0.06574,
+      "grad_norm": 0.9809951043877417,
+      "learning_rate": 0.003,
+      "loss": 4.0477,
+      "step": 6574
+    },
+    {
+      "epoch": 0.06575,
+      "grad_norm": 0.9974953620162613,
+      "learning_rate": 0.003,
+      "loss": 4.0405,
+      "step": 6575
+    },
+    {
+      "epoch": 0.06576,
+      "grad_norm": 0.9179282772036231,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 6576
+    },
+    {
+      "epoch": 0.06577,
+      "grad_norm": 1.0424561456811468,
+      "learning_rate": 0.003,
+      "loss": 4.0419,
+      "step": 6577
+    },
+    {
+      "epoch": 0.06578,
+      "grad_norm": 1.0920709972772817,
+      "learning_rate": 0.003,
+      "loss": 4.0464,
+      "step": 6578
+    },
+    {
+      "epoch": 0.06579,
+      "grad_norm": 1.2580611117333567,
+      "learning_rate": 0.003,
+      "loss": 4.0425,
+      "step": 6579
+    },
+    {
+      "epoch": 0.0658,
+      "grad_norm": 0.799965431122157,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 6580
+    },
+    {
+      "epoch": 0.06581,
+      "grad_norm": 0.781504567533229,
+      "learning_rate": 0.003,
+      "loss": 4.0676,
+      "step": 6581
+    },
+    {
+      "epoch": 0.06582,
+      "grad_norm": 0.8493771135199062,
+      "learning_rate": 0.003,
+      "loss": 4.0634,
+      "step": 6582
+    },
+    {
+      "epoch": 0.06583,
+      "grad_norm": 1.08382714627964,
+      "learning_rate": 0.003,
+      "loss": 4.0376,
+      "step": 6583
+    },
+    {
+      "epoch": 0.06584,
+      "grad_norm": 0.9060976759116695,
+      "learning_rate": 0.003,
+      "loss": 4.0261,
+      "step": 6584
+    },
+    {
+      "epoch": 0.06585,
+      "grad_norm": 1.0250567461266544,
+      "learning_rate": 0.003,
+      "loss": 4.052,
+      "step": 6585
+    },
+    {
+      "epoch": 0.06586,
+      "grad_norm": 1.2416572702479831,
+      "learning_rate": 0.003,
+      "loss": 4.069,
+      "step": 6586
+    },
+    {
+      "epoch": 0.06587,
+      "grad_norm": 0.8978047159555252,
+      "learning_rate": 0.003,
+      "loss": 4.0645,
+      "step": 6587
+    },
+    {
+      "epoch": 0.06588,
+      "grad_norm": 0.827033589453339,
+      "learning_rate": 0.003,
+      "loss": 4.0814,
+      "step": 6588
+    },
+    {
+      "epoch": 0.06589,
+      "grad_norm": 0.8211186750627676,
+      "learning_rate": 0.003,
+      "loss": 4.07,
+      "step": 6589
+    },
+    {
+      "epoch": 0.0659,
+      "grad_norm": 0.8442088987694915,
+      "learning_rate": 0.003,
+      "loss": 4.0763,
+      "step": 6590
+    },
+    {
+      "epoch": 0.06591,
+      "grad_norm": 1.0094167846920752,
+      "learning_rate": 0.003,
+      "loss": 4.1007,
+      "step": 6591
+    },
+    {
+      "epoch": 0.06592,
+      "grad_norm": 1.13531615910892,
+      "learning_rate": 0.003,
+      "loss": 4.0707,
+      "step": 6592
+    },
+    {
+      "epoch": 0.06593,
+      "grad_norm": 0.8982297119321998,
+      "learning_rate": 0.003,
+      "loss": 4.046,
+      "step": 6593
+    },
+    {
+      "epoch": 0.06594,
+      "grad_norm": 0.946141231020595,
+      "learning_rate": 0.003,
+      "loss": 4.0711,
+      "step": 6594
+    },
+    {
+      "epoch": 0.06595,
+      "grad_norm": 0.9136182259899146,
+      "learning_rate": 0.003,
+      "loss": 4.0213,
+      "step": 6595
+    },
+    {
+      "epoch": 0.06596,
+      "grad_norm": 0.8744151642325578,
+      "learning_rate": 0.003,
+      "loss": 4.0723,
+      "step": 6596
+    },
+    {
+      "epoch": 0.06597,
+      "grad_norm": 0.9533603618553095,
+      "learning_rate": 0.003,
+      "loss": 4.0511,
+      "step": 6597
+    },
+    {
+      "epoch": 0.06598,
+      "grad_norm": 1.0604427104690746,
+      "learning_rate": 0.003,
+      "loss": 4.0864,
+      "step": 6598
+    },
+    {
+      "epoch": 0.06599,
+      "grad_norm": 1.067472601559575,
+      "learning_rate": 0.003,
+      "loss": 4.0692,
+      "step": 6599
+    },
+    {
+      "epoch": 0.066,
+      "grad_norm": 1.0838031919616027,
+      "learning_rate": 0.003,
+      "loss": 4.0752,
+      "step": 6600
+    },
+    {
+      "epoch": 0.06601,
+      "grad_norm": 1.0258280341899866,
+      "learning_rate": 0.003,
+      "loss": 4.0584,
+      "step": 6601
+    },
+    {
+      "epoch": 0.06602,
+      "grad_norm": 1.0270369431845496,
+      "learning_rate": 0.003,
+      "loss": 4.0576,
+      "step": 6602
+    },
+    {
+      "epoch": 0.06603,
+      "grad_norm": 1.0254464189303953,
+      "learning_rate": 0.003,
+      "loss": 4.0547,
+      "step": 6603
+    },
+    {
+      "epoch": 0.06604,
+      "grad_norm": 1.0679326763588755,
+      "learning_rate": 0.003,
+      "loss": 4.0513,
+      "step": 6604
+    },
+    {
+      "epoch": 0.06605,
+      "grad_norm": 0.9415976678420839,
+      "learning_rate": 0.003,
+      "loss": 4.0742,
+      "step": 6605
+    },
+    {
+      "epoch": 0.06606,
+      "grad_norm": 0.9849735102421308,
+      "learning_rate": 0.003,
+      "loss": 4.0563,
+      "step": 6606
+    },
+    {
+      "epoch": 0.06607,
+      "grad_norm": 1.1302001807963977,
+      "learning_rate": 0.003,
+      "loss": 4.0492,
+      "step": 6607
+    },
+    {
+      "epoch": 0.06608,
+      "grad_norm": 1.0851366334623165,
+      "learning_rate": 0.003,
+      "loss": 4.0481,
+      "step": 6608
+    },
+    {
+      "epoch": 0.06609,
+      "grad_norm": 1.2878538978737437,
+      "learning_rate": 0.003,
+      "loss": 4.07,
+      "step": 6609
+    },
+    {
+      "epoch": 0.0661,
+      "grad_norm": 0.9809189178062531,
+      "learning_rate": 0.003,
+      "loss": 4.0659,
+      "step": 6610
+    },
+    {
+      "epoch": 0.06611,
+      "grad_norm": 0.9774112014402608,
+      "learning_rate": 0.003,
+      "loss": 4.0533,
+      "step": 6611
+    },
+    {
+      "epoch": 0.06612,
+      "grad_norm": 1.074206882798712,
+      "learning_rate": 0.003,
+      "loss": 4.0509,
+      "step": 6612
+    },
+    {
+      "epoch": 0.06613,
+      "grad_norm": 0.9119362233622336,
+      "learning_rate": 0.003,
+      "loss": 4.0625,
+      "step": 6613
+    },
+    {
+      "epoch": 0.06614,
+      "grad_norm": 0.9092970755783418,
+      "learning_rate": 0.003,
+      "loss": 4.0614,
+      "step": 6614
+    },
+    {
+      "epoch": 0.06615,
+      "grad_norm": 0.997529056172159,
+      "learning_rate": 0.003,
+      "loss": 4.0404,
+      "step": 6615
+    },
+    {
+      "epoch": 0.06616,
+      "grad_norm": 1.0127096432931733,
+      "learning_rate": 0.003,
+      "loss": 4.0954,
+      "step": 6616
+    },
+    {
+      "epoch": 0.06617,
+      "grad_norm": 1.047029183095652,
+      "learning_rate": 0.003,
+      "loss": 4.0718,
+      "step": 6617
+    },
+    {
+      "epoch": 0.06618,
+      "grad_norm": 0.9374810538498864,
+      "learning_rate": 0.003,
+      "loss": 4.0809,
+      "step": 6618
+    },
+    {
+      "epoch": 0.06619,
+      "grad_norm": 0.9483864493653071,
+      "learning_rate": 0.003,
+      "loss": 4.0465,
+      "step": 6619
+    },
+    {
+      "epoch": 0.0662,
+      "grad_norm": 1.0543118381581815,
+      "learning_rate": 0.003,
+      "loss": 4.0415,
+      "step": 6620
+    },
+    {
+      "epoch": 0.06621,
+      "grad_norm": 0.9495585297143883,
+      "learning_rate": 0.003,
+      "loss": 4.0715,
+      "step": 6621
+    },
+    {
+      "epoch": 0.06622,
+      "grad_norm": 1.1120232278985485,
+      "learning_rate": 0.003,
+      "loss": 4.0701,
+      "step": 6622
+    },
+    {
+      "epoch": 0.06623,
+      "grad_norm": 1.01560273953621,
+      "learning_rate": 0.003,
+      "loss": 4.0537,
+      "step": 6623
+    },
+    {
+      "epoch": 0.06624,
+      "grad_norm": 1.1763569739613489,
+      "learning_rate": 0.003,
+      "loss": 4.0782,
+      "step": 6624
+    },
+    {
+      "epoch": 0.06625,
+      "grad_norm": 1.1262447069862451,
+      "learning_rate": 0.003,
+      "loss": 4.062,
+      "step": 6625
+    },
+    {
+      "epoch": 0.06626,
+      "grad_norm": 0.9382970303663091,
+      "learning_rate": 0.003,
+      "loss": 4.0798,
+      "step": 6626
+    },
+    {
+      "epoch": 0.06627,
+      "grad_norm": 1.031419952696058,
+      "learning_rate": 0.003,
+      "loss": 4.0696,
+      "step": 6627
+    },
+    {
+      "epoch": 0.06628,
+      "grad_norm": 1.1000233296997506,
+      "learning_rate": 0.003,
+      "loss": 4.073,
+      "step": 6628
+    },
+    {
+      "epoch": 0.06629,
+      "grad_norm": 1.0494015088404511,
+      "learning_rate": 0.003,
+      "loss": 4.0658,
+      "step": 6629
+    },
+    {
+      "epoch": 0.0663,
+      "grad_norm": 0.9410676376451073,
+      "learning_rate": 0.003,
+      "loss": 4.0782,
+      "step": 6630
+    },
+    {
+      "epoch": 0.06631,
+      "grad_norm": 1.0689157449200972,
+      "learning_rate": 0.003,
+      "loss": 4.0584,
+      "step": 6631
+    },
+    {
+      "epoch": 0.06632,
+      "grad_norm": 0.9674796376390861,
+      "learning_rate": 0.003,
+      "loss": 4.0578,
+      "step": 6632
+    },
+    {
+      "epoch": 0.06633,
+      "grad_norm": 1.0093941937371331,
+      "learning_rate": 0.003,
+      "loss": 4.0764,
+      "step": 6633
+    },
+    {
+      "epoch": 0.06634,
+      "grad_norm": 1.2337187993906655,
+      "learning_rate": 0.003,
+      "loss": 4.0716,
+      "step": 6634
+    },
+    {
+      "epoch": 0.06635,
+      "grad_norm": 0.9607834095606316,
+      "learning_rate": 0.003,
+      "loss": 4.0848,
+      "step": 6635
+    },
+    {
+      "epoch": 0.06636,
+      "grad_norm": 0.9317793609051973,
+      "learning_rate": 0.003,
+      "loss": 4.0366,
+      "step": 6636
+    },
+    {
+      "epoch": 0.06637,
+      "grad_norm": 0.9290660104219854,
+      "learning_rate": 0.003,
+      "loss": 4.0584,
+      "step": 6637
+    },
+    {
+      "epoch": 0.06638,
+      "grad_norm": 0.8475879085595186,
+      "learning_rate": 0.003,
+      "loss": 4.0255,
+      "step": 6638
+    },
+    {
+      "epoch": 0.06639,
+      "grad_norm": 0.7941589369777531,
+      "learning_rate": 0.003,
+      "loss": 4.0585,
+      "step": 6639
+    },
+    {
+      "epoch": 0.0664,
+      "grad_norm": 0.8043535640617332,
+      "learning_rate": 0.003,
+      "loss": 4.0372,
+      "step": 6640
+    },
+    {
+      "epoch": 0.06641,
+      "grad_norm": 0.9938652956113684,
+      "learning_rate": 0.003,
+      "loss": 4.0603,
+      "step": 6641
+    },
+    {
+      "epoch": 0.06642,
+      "grad_norm": 1.1838239696711994,
+      "learning_rate": 0.003,
+      "loss": 4.0674,
+      "step": 6642
+    },
+    {
+      "epoch": 0.06643,
+      "grad_norm": 0.8933675359063811,
+      "learning_rate": 0.003,
+      "loss": 4.0511,
+      "step": 6643
+    },
+    {
+      "epoch": 0.06644,
+      "grad_norm": 0.7456469496808978,
+      "learning_rate": 0.003,
+      "loss": 4.0491,
+      "step": 6644
+    },
+    {
+      "epoch": 0.06645,
+      "grad_norm": 0.7991018018930186,
+      "learning_rate": 0.003,
+      "loss": 4.0345,
+      "step": 6645
+    },
+    {
+      "epoch": 0.06646,
+      "grad_norm": 0.9578935057690124,
+      "learning_rate": 0.003,
+      "loss": 4.0388,
+      "step": 6646
+    },
+    {
+      "epoch": 0.06647,
+      "grad_norm": 1.0975543814373472,
+      "learning_rate": 0.003,
+      "loss": 4.0708,
+      "step": 6647
+    },
+    {
+      "epoch": 0.06648,
+      "grad_norm": 0.9324693403913995,
+      "learning_rate": 0.003,
+      "loss": 4.0434,
+      "step": 6648
+    },
+    {
+      "epoch": 0.06649,
+      "grad_norm": 0.783828708420081,
+      "learning_rate": 0.003,
+      "loss": 4.0494,
+      "step": 6649
+    },
+    {
+      "epoch": 0.0665,
+      "grad_norm": 0.8171820344699803,
+      "learning_rate": 0.003,
+      "loss": 4.0472,
+      "step": 6650
+    },
+    {
+      "epoch": 0.06651,
+      "grad_norm": 1.0398397978025293,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 6651
+    },
+    {
+      "epoch": 0.06652,
+      "grad_norm": 1.1078725880727078,
+      "learning_rate": 0.003,
+      "loss": 4.0505,
+      "step": 6652
+    },
+    {
+      "epoch": 0.06653,
+      "grad_norm": 1.05746120078045,
+      "learning_rate": 0.003,
+      "loss": 4.0734,
+      "step": 6653
+    },
+    {
+      "epoch": 0.06654,
+      "grad_norm": 1.1809835092262086,
+      "learning_rate": 0.003,
+      "loss": 4.0424,
+      "step": 6654
+    },
+    {
+      "epoch": 0.06655,
+      "grad_norm": 0.9734402094947942,
+      "learning_rate": 0.003,
+      "loss": 4.0535,
+      "step": 6655
+    },
+    {
+      "epoch": 0.06656,
+      "grad_norm": 1.1076276318277252,
+      "learning_rate": 0.003,
+      "loss": 4.0381,
+      "step": 6656
+    },
+    {
+      "epoch": 0.06657,
+      "grad_norm": 0.977985457890705,
+      "learning_rate": 0.003,
+      "loss": 4.0594,
+      "step": 6657
+    },
+    {
+      "epoch": 0.06658,
+      "grad_norm": 0.9396247788800478,
+      "learning_rate": 0.003,
+      "loss": 4.0496,
+      "step": 6658
+    },
+    {
+      "epoch": 0.06659,
+      "grad_norm": 1.0889586998264722,
+      "learning_rate": 0.003,
+      "loss": 4.0686,
+      "step": 6659
+    },
+    {
+      "epoch": 0.0666,
+      "grad_norm": 1.0252758829952033,
+      "learning_rate": 0.003,
+      "loss": 4.0535,
+      "step": 6660
+    },
+    {
+      "epoch": 0.06661,
+      "grad_norm": 1.0468601933896104,
+      "learning_rate": 0.003,
+      "loss": 4.0506,
+      "step": 6661
+    },
+    {
+      "epoch": 0.06662,
+      "grad_norm": 1.1000476753001847,
+      "learning_rate": 0.003,
+      "loss": 4.0454,
+      "step": 6662
+    },
+    {
+      "epoch": 0.06663,
+      "grad_norm": 1.1085660118615401,
+      "learning_rate": 0.003,
+      "loss": 4.1162,
+      "step": 6663
+    },
+    {
+      "epoch": 0.06664,
+      "grad_norm": 0.9874276413847503,
+      "learning_rate": 0.003,
+      "loss": 4.071,
+      "step": 6664
+    },
+    {
+      "epoch": 0.06665,
+      "grad_norm": 1.0456032019182708,
+      "learning_rate": 0.003,
+      "loss": 4.1,
+      "step": 6665
+    },
+    {
+      "epoch": 0.06666,
+      "grad_norm": 1.0094947821010092,
+      "learning_rate": 0.003,
+      "loss": 4.0315,
+      "step": 6666
+    },
+    {
+      "epoch": 0.06667,
+      "grad_norm": 1.109076869997086,
+      "learning_rate": 0.003,
+      "loss": 4.0554,
+      "step": 6667
+    },
+    {
+      "epoch": 0.06668,
+      "grad_norm": 1.0088933647503096,
+      "learning_rate": 0.003,
+      "loss": 4.0536,
+      "step": 6668
+    },
+    {
+      "epoch": 0.06669,
+      "grad_norm": 1.1968841584443815,
+      "learning_rate": 0.003,
+      "loss": 4.0625,
+      "step": 6669
+    },
+    {
+      "epoch": 0.0667,
+      "grad_norm": 1.1241320277325377,
+      "learning_rate": 0.003,
+      "loss": 4.0753,
+      "step": 6670
+    },
+    {
+      "epoch": 0.06671,
+      "grad_norm": 1.141904583401614,
+      "learning_rate": 0.003,
+      "loss": 4.0694,
+      "step": 6671
+    },
+    {
+      "epoch": 0.06672,
+      "grad_norm": 1.062629879867344,
+      "learning_rate": 0.003,
+      "loss": 4.0501,
+      "step": 6672
+    },
+    {
+      "epoch": 0.06673,
+      "grad_norm": 0.905571170038888,
+      "learning_rate": 0.003,
+      "loss": 4.0438,
+      "step": 6673
+    },
+    {
+      "epoch": 0.06674,
+      "grad_norm": 0.9626950086254233,
+      "learning_rate": 0.003,
+      "loss": 4.0725,
+      "step": 6674
+    },
+    {
+      "epoch": 0.06675,
+      "grad_norm": 1.0975398719716734,
+      "learning_rate": 0.003,
+      "loss": 4.0188,
+      "step": 6675
+    },
+    {
+      "epoch": 0.06676,
+      "grad_norm": 1.3284543966412756,
+      "learning_rate": 0.003,
+      "loss": 4.0755,
+      "step": 6676
+    },
+    {
+      "epoch": 0.06677,
+      "grad_norm": 0.8172955955655188,
+      "learning_rate": 0.003,
+      "loss": 4.0737,
+      "step": 6677
+    },
+    {
+      "epoch": 0.06678,
+      "grad_norm": 0.9850016002008366,
+      "learning_rate": 0.003,
+      "loss": 4.0845,
+      "step": 6678
+    },
+    {
+      "epoch": 0.06679,
+      "grad_norm": 1.0932275223248946,
+      "learning_rate": 0.003,
+      "loss": 4.0605,
+      "step": 6679
+    },
+    {
+      "epoch": 0.0668,
+      "grad_norm": 1.148365923927585,
+      "learning_rate": 0.003,
+      "loss": 4.0949,
+      "step": 6680
+    },
+    {
+      "epoch": 0.06681,
+      "grad_norm": 1.0338287959641415,
+      "learning_rate": 0.003,
+      "loss": 4.0461,
+      "step": 6681
+    },
+    {
+      "epoch": 0.06682,
+      "grad_norm": 1.179731822743392,
+      "learning_rate": 0.003,
+      "loss": 4.0778,
+      "step": 6682
+    },
+    {
+      "epoch": 0.06683,
+      "grad_norm": 0.8843991533234951,
+      "learning_rate": 0.003,
+      "loss": 4.0609,
+      "step": 6683
+    },
+    {
+      "epoch": 0.06684,
+      "grad_norm": 0.9108762252979913,
+      "learning_rate": 0.003,
+      "loss": 4.0613,
+      "step": 6684
+    },
+    {
+      "epoch": 0.06685,
+      "grad_norm": 1.0436969205506577,
+      "learning_rate": 0.003,
+      "loss": 4.0553,
+      "step": 6685
+    },
+    {
+      "epoch": 0.06686,
+      "grad_norm": 1.0267003835359743,
+      "learning_rate": 0.003,
+      "loss": 4.0408,
+      "step": 6686
+    },
+    {
+      "epoch": 0.06687,
+      "grad_norm": 0.9909041191744213,
+      "learning_rate": 0.003,
+      "loss": 4.03,
+      "step": 6687
+    },
+    {
+      "epoch": 0.06688,
+      "grad_norm": 1.0186735696395426,
+      "learning_rate": 0.003,
+      "loss": 4.0453,
+      "step": 6688
+    },
+    {
+      "epoch": 0.06689,
+      "grad_norm": 1.0837124330823527,
+      "learning_rate": 0.003,
+      "loss": 4.0853,
+      "step": 6689
+    },
+    {
+      "epoch": 0.0669,
+      "grad_norm": 0.9374724932078298,
+      "learning_rate": 0.003,
+      "loss": 4.0444,
+      "step": 6690
+    },
+    {
+      "epoch": 0.06691,
+      "grad_norm": 0.9609019141278521,
+      "learning_rate": 0.003,
+      "loss": 4.0589,
+      "step": 6691
+    },
+    {
+      "epoch": 0.06692,
+      "grad_norm": 0.9532183639646817,
+      "learning_rate": 0.003,
+      "loss": 4.0347,
+      "step": 6692
+    },
+    {
+      "epoch": 0.06693,
+      "grad_norm": 0.9948461347977686,
+      "learning_rate": 0.003,
+      "loss": 4.0517,
+      "step": 6693
+    },
+    {
+      "epoch": 0.06694,
+      "grad_norm": 0.927563382693705,
+      "learning_rate": 0.003,
+      "loss": 4.0461,
+      "step": 6694
+    },
+    {
+      "epoch": 0.06695,
+      "grad_norm": 0.8983506006216038,
+      "learning_rate": 0.003,
+      "loss": 4.0479,
+      "step": 6695
+    },
+    {
+      "epoch": 0.06696,
+      "grad_norm": 1.0195737792831439,
+      "learning_rate": 0.003,
+      "loss": 4.0536,
+      "step": 6696
+    },
+    {
+      "epoch": 0.06697,
+      "grad_norm": 1.0461424467719074,
+      "learning_rate": 0.003,
+      "loss": 4.0638,
+      "step": 6697
+    },
+    {
+      "epoch": 0.06698,
+      "grad_norm": 1.1896402560053234,
+      "learning_rate": 0.003,
+      "loss": 4.0544,
+      "step": 6698
+    },
+    {
+      "epoch": 0.06699,
+      "grad_norm": 1.0331768787858662,
+      "learning_rate": 0.003,
+      "loss": 4.0533,
+      "step": 6699
+    },
+    {
+      "epoch": 0.067,
+      "grad_norm": 1.0760979816453098,
+      "learning_rate": 0.003,
+      "loss": 4.0713,
+      "step": 6700
+    },
+    {
+      "epoch": 0.06701,
+      "grad_norm": 0.9030506574904259,
+      "learning_rate": 0.003,
+      "loss": 4.0689,
+      "step": 6701
+    },
+    {
+      "epoch": 0.06702,
+      "grad_norm": 0.9590465759481133,
+      "learning_rate": 0.003,
+      "loss": 4.056,
+      "step": 6702
+    },
+    {
+      "epoch": 0.06703,
+      "grad_norm": 1.0377738190882686,
+      "learning_rate": 0.003,
+      "loss": 4.0731,
+      "step": 6703
+    },
+    {
+      "epoch": 0.06704,
+      "grad_norm": 0.9878474206212229,
+      "learning_rate": 0.003,
+      "loss": 4.0415,
+      "step": 6704
+    },
+    {
+      "epoch": 0.06705,
+      "grad_norm": 1.266667575265577,
+      "learning_rate": 0.003,
+      "loss": 4.0564,
+      "step": 6705
+    },
+    {
+      "epoch": 0.06706,
+      "grad_norm": 0.8290481300791042,
+      "learning_rate": 0.003,
+      "loss": 4.0916,
+      "step": 6706
+    },
+    {
+      "epoch": 0.06707,
+      "grad_norm": 0.8267971520194893,
+      "learning_rate": 0.003,
+      "loss": 4.0267,
+      "step": 6707
+    },
+    {
+      "epoch": 0.06708,
+      "grad_norm": 0.7961467186413875,
+      "learning_rate": 0.003,
+      "loss": 4.0421,
+      "step": 6708
+    },
+    {
+      "epoch": 0.06709,
+      "grad_norm": 0.9824457399373988,
+      "learning_rate": 0.003,
+      "loss": 4.0429,
+      "step": 6709
+    },
+    {
+      "epoch": 0.0671,
+      "grad_norm": 1.1961534967718657,
+      "learning_rate": 0.003,
+      "loss": 4.078,
+      "step": 6710
+    },
+    {
+      "epoch": 0.06711,
+      "grad_norm": 0.9265435402662816,
+      "learning_rate": 0.003,
+      "loss": 4.0779,
+      "step": 6711
+    },
+    {
+      "epoch": 0.06712,
+      "grad_norm": 1.0157357369850826,
+      "learning_rate": 0.003,
+      "loss": 4.0332,
+      "step": 6712
+    },
+    {
+      "epoch": 0.06713,
+      "grad_norm": 0.9690324224443148,
+      "learning_rate": 0.003,
+      "loss": 4.0463,
+      "step": 6713
+    },
+    {
+      "epoch": 0.06714,
+      "grad_norm": 0.897252797835452,
+      "learning_rate": 0.003,
+      "loss": 4.0479,
+      "step": 6714
+    },
+    {
+      "epoch": 0.06715,
+      "grad_norm": 1.0170717220350938,
+      "learning_rate": 0.003,
+      "loss": 4.0526,
+      "step": 6715
+    },
+    {
+      "epoch": 0.06716,
+      "grad_norm": 1.0531655759972556,
+      "learning_rate": 0.003,
+      "loss": 4.0599,
+      "step": 6716
+    },
+    {
+      "epoch": 0.06717,
+      "grad_norm": 1.083960596464857,
+      "learning_rate": 0.003,
+      "loss": 4.0828,
+      "step": 6717
+    },
+    {
+      "epoch": 0.06718,
+      "grad_norm": 0.9551481210518388,
+      "learning_rate": 0.003,
+      "loss": 4.0399,
+      "step": 6718
+    },
+    {
+      "epoch": 0.06719,
+      "grad_norm": 0.935024318023197,
+      "learning_rate": 0.003,
+      "loss": 4.0446,
+      "step": 6719
+    },
+    {
+      "epoch": 0.0672,
+      "grad_norm": 0.9498980025125131,
+      "learning_rate": 0.003,
+      "loss": 4.0574,
+      "step": 6720
+    },
+    {
+      "epoch": 0.06721,
+      "grad_norm": 1.2015504874657223,
+      "learning_rate": 0.003,
+      "loss": 4.0779,
+      "step": 6721
+    },
+    {
+      "epoch": 0.06722,
+      "grad_norm": 0.9347001938513728,
+      "learning_rate": 0.003,
+      "loss": 4.0776,
+      "step": 6722
+    },
+    {
+      "epoch": 0.06723,
+      "grad_norm": 1.018756831416971,
+      "learning_rate": 0.003,
+      "loss": 4.07,
+      "step": 6723
+    },
+    {
+      "epoch": 0.06724,
+      "grad_norm": 1.0987676870246745,
+      "learning_rate": 0.003,
+      "loss": 4.0345,
+      "step": 6724
+    },
+    {
+      "epoch": 0.06725,
+      "grad_norm": 1.0112178470596225,
+      "learning_rate": 0.003,
+      "loss": 4.0651,
+      "step": 6725
+    },
+    {
+      "epoch": 0.06726,
+      "grad_norm": 1.112944901032879,
+      "learning_rate": 0.003,
+      "loss": 4.0933,
+      "step": 6726
+    },
+    {
+      "epoch": 0.06727,
+      "grad_norm": 1.163602379560122,
+      "learning_rate": 0.003,
+      "loss": 4.0595,
+      "step": 6727
+    },
+    {
+      "epoch": 0.06728,
+      "grad_norm": 0.9360909552819368,
+      "learning_rate": 0.003,
+      "loss": 4.026,
+      "step": 6728
+    },
+    {
+      "epoch": 0.06729,
+      "grad_norm": 1.0605315281502332,
+      "learning_rate": 0.003,
+      "loss": 4.0485,
+      "step": 6729
+    },
+    {
+      "epoch": 0.0673,
+      "grad_norm": 1.0349633126182776,
+      "learning_rate": 0.003,
+      "loss": 4.0591,
+      "step": 6730
+    },
+    {
+      "epoch": 0.06731,
+      "grad_norm": 1.2616197215123148,
+      "learning_rate": 0.003,
+      "loss": 4.0356,
+      "step": 6731
+    },
+    {
+      "epoch": 0.06732,
+      "grad_norm": 1.0836805120817457,
+      "learning_rate": 0.003,
+      "loss": 4.0542,
+      "step": 6732
+    },
+    {
+      "epoch": 0.06733,
+      "grad_norm": 0.9975329556874161,
+      "learning_rate": 0.003,
+      "loss": 4.066,
+      "step": 6733
+    },
+    {
+      "epoch": 0.06734,
+      "grad_norm": 1.0166554668806964,
+      "learning_rate": 0.003,
+      "loss": 4.063,
+      "step": 6734
+    },
+    {
+      "epoch": 0.06735,
+      "grad_norm": 0.9860686766932066,
+      "learning_rate": 0.003,
+      "loss": 4.0374,
+      "step": 6735
+    },
+    {
+      "epoch": 0.06736,
+      "grad_norm": 1.1900474439099948,
+      "learning_rate": 0.003,
+      "loss": 4.0457,
+      "step": 6736
+    },
+    {
+      "epoch": 0.06737,
+      "grad_norm": 0.8977382894817897,
+      "learning_rate": 0.003,
+      "loss": 4.0514,
+      "step": 6737
+    },
+    {
+      "epoch": 0.06738,
+      "grad_norm": 0.8986515344833599,
+      "learning_rate": 0.003,
+      "loss": 4.0445,
+      "step": 6738
+    },
+    {
+      "epoch": 0.06739,
+      "grad_norm": 1.0321723301672268,
+      "learning_rate": 0.003,
+      "loss": 4.0536,
+      "step": 6739
+    },
+    {
+      "epoch": 0.0674,
+      "grad_norm": 1.1818737667535166,
+      "learning_rate": 0.003,
+      "loss": 4.0502,
+      "step": 6740
+    },
+    {
+      "epoch": 0.06741,
+      "grad_norm": 0.7306130026600984,
+      "learning_rate": 0.003,
+      "loss": 4.0318,
+      "step": 6741
+    },
+    {
+      "epoch": 0.06742,
+      "grad_norm": 0.671364374797458,
+      "learning_rate": 0.003,
+      "loss": 4.0552,
+      "step": 6742
+    },
+    {
+      "epoch": 0.06743,
+      "grad_norm": 0.7657736082400693,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 6743
+    },
+    {
+      "epoch": 0.06744,
+      "grad_norm": 0.8251136224218921,
+      "learning_rate": 0.003,
+      "loss": 4.0626,
+      "step": 6744
+    },
+    {
+      "epoch": 0.06745,
+      "grad_norm": 0.7693016914924291,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 6745
+    },
+    {
+      "epoch": 0.06746,
+      "grad_norm": 0.8468866611491973,
+      "learning_rate": 0.003,
+      "loss": 4.0394,
+      "step": 6746
+    },
+    {
+      "epoch": 0.06747,
+      "grad_norm": 1.0246297168064884,
+      "learning_rate": 0.003,
+      "loss": 4.0477,
+      "step": 6747
+    },
+    {
+      "epoch": 0.06748,
+      "grad_norm": 1.1135872084646647,
+      "learning_rate": 0.003,
+      "loss": 4.0558,
+      "step": 6748
+    },
+    {
+      "epoch": 0.06749,
+      "grad_norm": 0.8843107432077526,
+      "learning_rate": 0.003,
+      "loss": 4.0447,
+      "step": 6749
+    },
+    {
+      "epoch": 0.0675,
+      "grad_norm": 0.9912073777138863,
+      "learning_rate": 0.003,
+      "loss": 4.0344,
+      "step": 6750
+    },
+    {
+      "epoch": 0.06751,
+      "grad_norm": 1.2591178077510798,
+      "learning_rate": 0.003,
+      "loss": 4.08,
+      "step": 6751
+    },
+    {
+      "epoch": 0.06752,
+      "grad_norm": 0.8082384535393943,
+      "learning_rate": 0.003,
+      "loss": 4.065,
+      "step": 6752
+    },
+    {
+      "epoch": 0.06753,
+      "grad_norm": 0.7511358793452372,
+      "learning_rate": 0.003,
+      "loss": 4.0259,
+      "step": 6753
+    },
+    {
+      "epoch": 0.06754,
+      "grad_norm": 0.867990155758509,
+      "learning_rate": 0.003,
+      "loss": 4.0551,
+      "step": 6754
+    },
+    {
+      "epoch": 0.06755,
+      "grad_norm": 0.9955414763715997,
+      "learning_rate": 0.003,
+      "loss": 4.0421,
+      "step": 6755
+    },
+    {
+      "epoch": 0.06756,
+      "grad_norm": 1.2073694787838065,
+      "learning_rate": 0.003,
+      "loss": 4.0637,
+      "step": 6756
+    },
+    {
+      "epoch": 0.06757,
+      "grad_norm": 1.0707480255468453,
+      "learning_rate": 0.003,
+      "loss": 4.0647,
+      "step": 6757
+    },
+    {
+      "epoch": 0.06758,
+      "grad_norm": 1.1122443531121051,
+      "learning_rate": 0.003,
+      "loss": 4.0549,
+      "step": 6758
+    },
+    {
+      "epoch": 0.06759,
+      "grad_norm": 0.9489611527342554,
+      "learning_rate": 0.003,
+      "loss": 4.0803,
+      "step": 6759
+    },
+    {
+      "epoch": 0.0676,
+      "grad_norm": 1.0041886074885658,
+      "learning_rate": 0.003,
+      "loss": 4.0671,
+      "step": 6760
+    },
+    {
+      "epoch": 0.06761,
+      "grad_norm": 1.355937161252965,
+      "learning_rate": 0.003,
+      "loss": 4.0569,
+      "step": 6761
+    },
+    {
+      "epoch": 0.06762,
+      "grad_norm": 0.9894898503815498,
+      "learning_rate": 0.003,
+      "loss": 4.0571,
+      "step": 6762
+    },
+    {
+      "epoch": 0.06763,
+      "grad_norm": 1.050921117069599,
+      "learning_rate": 0.003,
+      "loss": 4.0595,
+      "step": 6763
+    },
+    {
+      "epoch": 0.06764,
+      "grad_norm": 1.067765228974404,
+      "learning_rate": 0.003,
+      "loss": 4.0435,
+      "step": 6764
+    },
+    {
+      "epoch": 0.06765,
+      "grad_norm": 0.9545145035844784,
+      "learning_rate": 0.003,
+      "loss": 4.0543,
+      "step": 6765
+    },
+    {
+      "epoch": 0.06766,
+      "grad_norm": 1.0023904128414394,
+      "learning_rate": 0.003,
+      "loss": 4.0698,
+      "step": 6766
+    },
+    {
+      "epoch": 0.06767,
+      "grad_norm": 0.9593720771665164,
+      "learning_rate": 0.003,
+      "loss": 4.0679,
+      "step": 6767
+    },
+    {
+      "epoch": 0.06768,
+      "grad_norm": 1.1273025407257062,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 6768
+    },
+    {
+      "epoch": 0.06769,
+      "grad_norm": 1.087476946360311,
+      "learning_rate": 0.003,
+      "loss": 4.0532,
+      "step": 6769
+    },
+    {
+      "epoch": 0.0677,
+      "grad_norm": 1.1529163947763248,
+      "learning_rate": 0.003,
+      "loss": 4.0672,
+      "step": 6770
+    },
+    {
+      "epoch": 0.06771,
+      "grad_norm": 0.7314271498801576,
+      "learning_rate": 0.003,
+      "loss": 4.0448,
+      "step": 6771
+    },
+    {
+      "epoch": 0.06772,
+      "grad_norm": 0.7723436133611571,
+      "learning_rate": 0.003,
+      "loss": 4.052,
+      "step": 6772
+    },
+    {
+      "epoch": 0.06773,
+      "grad_norm": 0.9678136783714796,
+      "learning_rate": 0.003,
+      "loss": 4.0706,
+      "step": 6773
+    },
+    {
+      "epoch": 0.06774,
+      "grad_norm": 1.2406355654915857,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 6774
+    },
+    {
+      "epoch": 0.06775,
+      "grad_norm": 0.8156597708290578,
+      "learning_rate": 0.003,
+      "loss": 4.0551,
+      "step": 6775
+    },
+    {
+      "epoch": 0.06776,
+      "grad_norm": 0.9530712850122685,
+      "learning_rate": 0.003,
+      "loss": 4.0865,
+      "step": 6776
+    },
+    {
+      "epoch": 0.06777,
+      "grad_norm": 1.081652928525485,
+      "learning_rate": 0.003,
+      "loss": 4.0735,
+      "step": 6777
+    },
+    {
+      "epoch": 0.06778,
+      "grad_norm": 0.9997360227306185,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 6778
+    },
+    {
+      "epoch": 0.06779,
+      "grad_norm": 0.9050972292743207,
+      "learning_rate": 0.003,
+      "loss": 4.0605,
+      "step": 6779
+    },
+    {
+      "epoch": 0.0678,
+      "grad_norm": 0.899616606188489,
+      "learning_rate": 0.003,
+      "loss": 4.0218,
+      "step": 6780
+    },
+    {
+      "epoch": 0.06781,
+      "grad_norm": 1.035269328464622,
+      "learning_rate": 0.003,
+      "loss": 4.0934,
+      "step": 6781
+    },
+    {
+      "epoch": 0.06782,
+      "grad_norm": 1.204532202978388,
+      "learning_rate": 0.003,
+      "loss": 4.0741,
+      "step": 6782
+    },
+    {
+      "epoch": 0.06783,
+      "grad_norm": 0.9655678343763224,
+      "learning_rate": 0.003,
+      "loss": 4.0772,
+      "step": 6783
+    },
+    {
+      "epoch": 0.06784,
+      "grad_norm": 0.900065330135723,
+      "learning_rate": 0.003,
+      "loss": 4.0477,
+      "step": 6784
+    },
+    {
+      "epoch": 0.06785,
+      "grad_norm": 0.9737356124056987,
+      "learning_rate": 0.003,
+      "loss": 4.0711,
+      "step": 6785
+    },
+    {
+      "epoch": 0.06786,
+      "grad_norm": 0.9974183872061985,
+      "learning_rate": 0.003,
+      "loss": 4.0404,
+      "step": 6786
+    },
+    {
+      "epoch": 0.06787,
+      "grad_norm": 1.041494125314001,
+      "learning_rate": 0.003,
+      "loss": 4.0279,
+      "step": 6787
+    },
+    {
+      "epoch": 0.06788,
+      "grad_norm": 1.083171903315331,
+      "learning_rate": 0.003,
+      "loss": 4.0753,
+      "step": 6788
+    },
+    {
+      "epoch": 0.06789,
+      "grad_norm": 1.3039936977749973,
+      "learning_rate": 0.003,
+      "loss": 4.0455,
+      "step": 6789
+    },
+    {
+      "epoch": 0.0679,
+      "grad_norm": 0.9733172088754207,
+      "learning_rate": 0.003,
+      "loss": 4.0678,
+      "step": 6790
+    },
+    {
+      "epoch": 0.06791,
+      "grad_norm": 0.8961119962612736,
+      "learning_rate": 0.003,
+      "loss": 4.0487,
+      "step": 6791
+    },
+    {
+      "epoch": 0.06792,
+      "grad_norm": 0.9367283411485818,
+      "learning_rate": 0.003,
+      "loss": 4.0677,
+      "step": 6792
+    },
+    {
+      "epoch": 0.06793,
+      "grad_norm": 1.2662871097017818,
+      "learning_rate": 0.003,
+      "loss": 4.0711,
+      "step": 6793
+    },
+    {
+      "epoch": 0.06794,
+      "grad_norm": 0.9235411725065059,
+      "learning_rate": 0.003,
+      "loss": 4.0799,
+      "step": 6794
+    },
+    {
+      "epoch": 0.06795,
+      "grad_norm": 1.0564098546065013,
+      "learning_rate": 0.003,
+      "loss": 4.0331,
+      "step": 6795
+    },
+    {
+      "epoch": 0.06796,
+      "grad_norm": 1.024685829459076,
+      "learning_rate": 0.003,
+      "loss": 4.0689,
+      "step": 6796
+    },
+    {
+      "epoch": 0.06797,
+      "grad_norm": 1.120134124815487,
+      "learning_rate": 0.003,
+      "loss": 4.0852,
+      "step": 6797
+    },
+    {
+      "epoch": 0.06798,
+      "grad_norm": 0.9517110352700969,
+      "learning_rate": 0.003,
+      "loss": 4.0493,
+      "step": 6798
+    },
+    {
+      "epoch": 0.06799,
+      "grad_norm": 0.9692745939925019,
+      "learning_rate": 0.003,
+      "loss": 4.0861,
+      "step": 6799
+    },
+    {
+      "epoch": 0.068,
+      "grad_norm": 1.0264417600972477,
+      "learning_rate": 0.003,
+      "loss": 4.0594,
+      "step": 6800
+    },
+    {
+      "epoch": 0.06801,
+      "grad_norm": 1.2302920860016315,
+      "learning_rate": 0.003,
+      "loss": 4.0393,
+      "step": 6801
+    },
+    {
+      "epoch": 0.06802,
+      "grad_norm": 1.0369213141884206,
+      "learning_rate": 0.003,
+      "loss": 4.0543,
+      "step": 6802
+    },
+    {
+      "epoch": 0.06803,
+      "grad_norm": 1.0524636853985736,
+      "learning_rate": 0.003,
+      "loss": 4.0596,
+      "step": 6803
+    },
+    {
+      "epoch": 0.06804,
+      "grad_norm": 1.0387365140475044,
+      "learning_rate": 0.003,
+      "loss": 4.0736,
+      "step": 6804
+    },
+    {
+      "epoch": 0.06805,
+      "grad_norm": 1.2150483769149856,
+      "learning_rate": 0.003,
+      "loss": 4.0535,
+      "step": 6805
+    },
+    {
+      "epoch": 0.06806,
+      "grad_norm": 0.8807324303080953,
+      "learning_rate": 0.003,
+      "loss": 4.0473,
+      "step": 6806
+    },
+    {
+      "epoch": 0.06807,
+      "grad_norm": 0.8373757284517774,
+      "learning_rate": 0.003,
+      "loss": 4.0471,
+      "step": 6807
+    },
+    {
+      "epoch": 0.06808,
+      "grad_norm": 0.8400667752675757,
+      "learning_rate": 0.003,
+      "loss": 4.0541,
+      "step": 6808
+    },
+    {
+      "epoch": 0.06809,
+      "grad_norm": 0.8369993186398413,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 6809
+    },
+    {
+      "epoch": 0.0681,
+      "grad_norm": 0.9006507393196466,
+      "learning_rate": 0.003,
+      "loss": 4.0526,
+      "step": 6810
+    },
+    {
+      "epoch": 0.06811,
+      "grad_norm": 1.0077180114145714,
+      "learning_rate": 0.003,
+      "loss": 4.0836,
+      "step": 6811
+    },
+    {
+      "epoch": 0.06812,
+      "grad_norm": 1.1521055645070202,
+      "learning_rate": 0.003,
+      "loss": 4.0528,
+      "step": 6812
+    },
+    {
+      "epoch": 0.06813,
+      "grad_norm": 1.0625428176141143,
+      "learning_rate": 0.003,
+      "loss": 4.0286,
+      "step": 6813
+    },
+    {
+      "epoch": 0.06814,
+      "grad_norm": 1.178996563449822,
+      "learning_rate": 0.003,
+      "loss": 4.0658,
+      "step": 6814
+    },
+    {
+      "epoch": 0.06815,
+      "grad_norm": 0.8797766928663225,
+      "learning_rate": 0.003,
+      "loss": 4.0709,
+      "step": 6815
+    },
+    {
+      "epoch": 0.06816,
+      "grad_norm": 0.8289246518934655,
+      "learning_rate": 0.003,
+      "loss": 4.0688,
+      "step": 6816
+    },
+    {
+      "epoch": 0.06817,
+      "grad_norm": 0.8654988446799349,
+      "learning_rate": 0.003,
+      "loss": 4.0388,
+      "step": 6817
+    },
+    {
+      "epoch": 0.06818,
+      "grad_norm": 1.0798387368738107,
+      "learning_rate": 0.003,
+      "loss": 4.0627,
+      "step": 6818
+    },
+    {
+      "epoch": 0.06819,
+      "grad_norm": 1.1105690600865603,
+      "learning_rate": 0.003,
+      "loss": 4.0581,
+      "step": 6819
+    },
+    {
+      "epoch": 0.0682,
+      "grad_norm": 1.0813449551126753,
+      "learning_rate": 0.003,
+      "loss": 4.0843,
+      "step": 6820
+    },
+    {
+      "epoch": 0.06821,
+      "grad_norm": 0.96077795414745,
+      "learning_rate": 0.003,
+      "loss": 4.0757,
+      "step": 6821
+    },
+    {
+      "epoch": 0.06822,
+      "grad_norm": 0.9659380763604074,
+      "learning_rate": 0.003,
+      "loss": 4.0354,
+      "step": 6822
+    },
+    {
+      "epoch": 0.06823,
+      "grad_norm": 1.10215696178397,
+      "learning_rate": 0.003,
+      "loss": 4.0554,
+      "step": 6823
+    },
+    {
+      "epoch": 0.06824,
+      "grad_norm": 1.034364867848419,
+      "learning_rate": 0.003,
+      "loss": 4.0327,
+      "step": 6824
+    },
+    {
+      "epoch": 0.06825,
+      "grad_norm": 0.9937444135494871,
+      "learning_rate": 0.003,
+      "loss": 4.0489,
+      "step": 6825
+    },
+    {
+      "epoch": 0.06826,
+      "grad_norm": 0.9764377761142184,
+      "learning_rate": 0.003,
+      "loss": 4.0402,
+      "step": 6826
+    },
+    {
+      "epoch": 0.06827,
+      "grad_norm": 0.969374372811426,
+      "learning_rate": 0.003,
+      "loss": 4.0308,
+      "step": 6827
+    },
+    {
+      "epoch": 0.06828,
+      "grad_norm": 1.0350502895060627,
+      "learning_rate": 0.003,
+      "loss": 4.0583,
+      "step": 6828
+    },
+    {
+      "epoch": 0.06829,
+      "grad_norm": 1.241961679572367,
+      "learning_rate": 0.003,
+      "loss": 4.0667,
+      "step": 6829
+    },
+    {
+      "epoch": 0.0683,
+      "grad_norm": 1.025448801706437,
+      "learning_rate": 0.003,
+      "loss": 4.0532,
+      "step": 6830
+    },
+    {
+      "epoch": 0.06831,
+      "grad_norm": 0.9950647336149414,
+      "learning_rate": 0.003,
+      "loss": 4.0784,
+      "step": 6831
+    },
+    {
+      "epoch": 0.06832,
+      "grad_norm": 1.1012052576359934,
+      "learning_rate": 0.003,
+      "loss": 4.0455,
+      "step": 6832
+    },
+    {
+      "epoch": 0.06833,
+      "grad_norm": 0.8956721313879287,
+      "learning_rate": 0.003,
+      "loss": 4.0279,
+      "step": 6833
+    },
+    {
+      "epoch": 0.06834,
+      "grad_norm": 1.1464044341999087,
+      "learning_rate": 0.003,
+      "loss": 4.0643,
+      "step": 6834
+    },
+    {
+      "epoch": 0.06835,
+      "grad_norm": 1.09845989400467,
+      "learning_rate": 0.003,
+      "loss": 4.0633,
+      "step": 6835
+    },
+    {
+      "epoch": 0.06836,
+      "grad_norm": 1.1577607159035546,
+      "learning_rate": 0.003,
+      "loss": 4.0883,
+      "step": 6836
+    },
+    {
+      "epoch": 0.06837,
+      "grad_norm": 0.9624906863376643,
+      "learning_rate": 0.003,
+      "loss": 4.0376,
+      "step": 6837
+    },
+    {
+      "epoch": 0.06838,
+      "grad_norm": 1.0526594999167607,
+      "learning_rate": 0.003,
+      "loss": 4.048,
+      "step": 6838
+    },
+    {
+      "epoch": 0.06839,
+      "grad_norm": 1.0673019415333493,
+      "learning_rate": 0.003,
+      "loss": 4.0393,
+      "step": 6839
+    },
+    {
+      "epoch": 0.0684,
+      "grad_norm": 1.012103935387283,
+      "learning_rate": 0.003,
+      "loss": 4.0636,
+      "step": 6840
+    },
+    {
+      "epoch": 0.06841,
+      "grad_norm": 0.9147560220218148,
+      "learning_rate": 0.003,
+      "loss": 4.0587,
+      "step": 6841
+    },
+    {
+      "epoch": 0.06842,
+      "grad_norm": 0.9643937321782413,
+      "learning_rate": 0.003,
+      "loss": 4.0581,
+      "step": 6842
+    },
+    {
+      "epoch": 0.06843,
+      "grad_norm": 1.0039478407079416,
+      "learning_rate": 0.003,
+      "loss": 4.053,
+      "step": 6843
+    },
+    {
+      "epoch": 0.06844,
+      "grad_norm": 1.2025234350492864,
+      "learning_rate": 0.003,
+      "loss": 4.0541,
+      "step": 6844
+    },
+    {
+      "epoch": 0.06845,
+      "grad_norm": 1.0176494517204697,
+      "learning_rate": 0.003,
+      "loss": 4.0685,
+      "step": 6845
+    },
+    {
+      "epoch": 0.06846,
+      "grad_norm": 0.9635201111106092,
+      "learning_rate": 0.003,
+      "loss": 4.0551,
+      "step": 6846
+    },
+    {
+      "epoch": 0.06847,
+      "grad_norm": 0.9695541827730285,
+      "learning_rate": 0.003,
+      "loss": 4.0432,
+      "step": 6847
+    },
+    {
+      "epoch": 0.06848,
+      "grad_norm": 1.055028109497596,
+      "learning_rate": 0.003,
+      "loss": 4.058,
+      "step": 6848
+    },
+    {
+      "epoch": 0.06849,
+      "grad_norm": 1.2623333403844699,
+      "learning_rate": 0.003,
+      "loss": 4.0513,
+      "step": 6849
+    },
+    {
+      "epoch": 0.0685,
+      "grad_norm": 1.0979408883303405,
+      "learning_rate": 0.003,
+      "loss": 4.0501,
+      "step": 6850
+    },
+    {
+      "epoch": 0.06851,
+      "grad_norm": 1.2299990074107543,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 6851
+    },
+    {
+      "epoch": 0.06852,
+      "grad_norm": 0.8622471399851679,
+      "learning_rate": 0.003,
+      "loss": 4.038,
+      "step": 6852
+    },
+    {
+      "epoch": 0.06853,
+      "grad_norm": 0.797053506469073,
+      "learning_rate": 0.003,
+      "loss": 4.031,
+      "step": 6853
+    },
+    {
+      "epoch": 0.06854,
+      "grad_norm": 0.7428713139049907,
+      "learning_rate": 0.003,
+      "loss": 4.0641,
+      "step": 6854
+    },
+    {
+      "epoch": 0.06855,
+      "grad_norm": 0.8702478930012675,
+      "learning_rate": 0.003,
+      "loss": 4.0767,
+      "step": 6855
+    },
+    {
+      "epoch": 0.06856,
+      "grad_norm": 0.9565155211806974,
+      "learning_rate": 0.003,
+      "loss": 4.0766,
+      "step": 6856
+    },
+    {
+      "epoch": 0.06857,
+      "grad_norm": 0.9401346796927762,
+      "learning_rate": 0.003,
+      "loss": 4.0578,
+      "step": 6857
+    },
+    {
+      "epoch": 0.06858,
+      "grad_norm": 1.046352232064668,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 6858
+    },
+    {
+      "epoch": 0.06859,
+      "grad_norm": 1.1113366890222538,
+      "learning_rate": 0.003,
+      "loss": 4.0401,
+      "step": 6859
+    },
+    {
+      "epoch": 0.0686,
+      "grad_norm": 1.0332225044816656,
+      "learning_rate": 0.003,
+      "loss": 4.0488,
+      "step": 6860
+    },
+    {
+      "epoch": 0.06861,
+      "grad_norm": 1.037641447920837,
+      "learning_rate": 0.003,
+      "loss": 4.0426,
+      "step": 6861
+    },
+    {
+      "epoch": 0.06862,
+      "grad_norm": 1.0707793888395856,
+      "learning_rate": 0.003,
+      "loss": 4.0386,
+      "step": 6862
+    },
+    {
+      "epoch": 0.06863,
+      "grad_norm": 1.3025261367467222,
+      "learning_rate": 0.003,
+      "loss": 4.0539,
+      "step": 6863
+    },
+    {
+      "epoch": 0.06864,
+      "grad_norm": 0.7117943067157152,
+      "learning_rate": 0.003,
+      "loss": 4.0517,
+      "step": 6864
+    },
+    {
+      "epoch": 0.06865,
+      "grad_norm": 0.8149012494620742,
+      "learning_rate": 0.003,
+      "loss": 4.0406,
+      "step": 6865
+    },
+    {
+      "epoch": 0.06866,
+      "grad_norm": 0.9768992618939744,
+      "learning_rate": 0.003,
+      "loss": 4.061,
+      "step": 6866
+    },
+    {
+      "epoch": 0.06867,
+      "grad_norm": 1.2876703502703644,
+      "learning_rate": 0.003,
+      "loss": 4.0825,
+      "step": 6867
+    },
+    {
+      "epoch": 0.06868,
+      "grad_norm": 0.8939557694074772,
+      "learning_rate": 0.003,
+      "loss": 4.0351,
+      "step": 6868
+    },
+    {
+      "epoch": 0.06869,
+      "grad_norm": 0.9739018190603045,
+      "learning_rate": 0.003,
+      "loss": 4.0648,
+      "step": 6869
+    },
+    {
+      "epoch": 0.0687,
+      "grad_norm": 1.0808672350561943,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 6870
+    },
+    {
+      "epoch": 0.06871,
+      "grad_norm": 0.9732612594475262,
+      "learning_rate": 0.003,
+      "loss": 4.0323,
+      "step": 6871
+    },
+    {
+      "epoch": 0.06872,
+      "grad_norm": 0.9231941574627566,
+      "learning_rate": 0.003,
+      "loss": 4.0347,
+      "step": 6872
+    },
+    {
+      "epoch": 0.06873,
+      "grad_norm": 1.0038158255901826,
+      "learning_rate": 0.003,
+      "loss": 4.0429,
+      "step": 6873
+    },
+    {
+      "epoch": 0.06874,
+      "grad_norm": 1.1994966137942642,
+      "learning_rate": 0.003,
+      "loss": 4.064,
+      "step": 6874
+    },
+    {
+      "epoch": 0.06875,
+      "grad_norm": 0.9012267255930811,
+      "learning_rate": 0.003,
+      "loss": 4.0605,
+      "step": 6875
+    },
+    {
+      "epoch": 0.06876,
+      "grad_norm": 0.958415530478731,
+      "learning_rate": 0.003,
+      "loss": 4.0705,
+      "step": 6876
+    },
+    {
+      "epoch": 0.06877,
+      "grad_norm": 0.9667858571717155,
+      "learning_rate": 0.003,
+      "loss": 4.038,
+      "step": 6877
+    },
+    {
+      "epoch": 0.06878,
+      "grad_norm": 1.2429535644925198,
+      "learning_rate": 0.003,
+      "loss": 4.0391,
+      "step": 6878
+    },
+    {
+      "epoch": 0.06879,
+      "grad_norm": 0.8678125841491381,
+      "learning_rate": 0.003,
+      "loss": 4.0425,
+      "step": 6879
+    },
+    {
+      "epoch": 0.0688,
+      "grad_norm": 0.9483241231979963,
+      "learning_rate": 0.003,
+      "loss": 4.0528,
+      "step": 6880
+    },
+    {
+      "epoch": 0.06881,
+      "grad_norm": 1.1081275778671038,
+      "learning_rate": 0.003,
+      "loss": 4.0476,
+      "step": 6881
+    },
+    {
+      "epoch": 0.06882,
+      "grad_norm": 0.8941631616443152,
+      "learning_rate": 0.003,
+      "loss": 4.0456,
+      "step": 6882
+    },
+    {
+      "epoch": 0.06883,
+      "grad_norm": 1.0008866943662982,
+      "learning_rate": 0.003,
+      "loss": 4.0715,
+      "step": 6883
+    },
+    {
+      "epoch": 0.06884,
+      "grad_norm": 1.1763212407023795,
+      "learning_rate": 0.003,
+      "loss": 4.0428,
+      "step": 6884
+    },
+    {
+      "epoch": 0.06885,
+      "grad_norm": 0.947660863840069,
+      "learning_rate": 0.003,
+      "loss": 4.0572,
+      "step": 6885
+    },
+    {
+      "epoch": 0.06886,
+      "grad_norm": 1.0445333316933652,
+      "learning_rate": 0.003,
+      "loss": 4.0425,
+      "step": 6886
+    },
+    {
+      "epoch": 0.06887,
+      "grad_norm": 1.2206210118596559,
+      "learning_rate": 0.003,
+      "loss": 4.0683,
+      "step": 6887
+    },
+    {
+      "epoch": 0.06888,
+      "grad_norm": 1.0692166428843015,
+      "learning_rate": 0.003,
+      "loss": 4.0418,
+      "step": 6888
+    },
+    {
+      "epoch": 0.06889,
+      "grad_norm": 0.9355380989508778,
+      "learning_rate": 0.003,
+      "loss": 4.05,
+      "step": 6889
+    },
+    {
+      "epoch": 0.0689,
+      "grad_norm": 1.158851574547141,
+      "learning_rate": 0.003,
+      "loss": 4.033,
+      "step": 6890
+    },
+    {
+      "epoch": 0.06891,
+      "grad_norm": 0.8775458730749034,
+      "learning_rate": 0.003,
+      "loss": 4.0639,
+      "step": 6891
+    },
+    {
+      "epoch": 0.06892,
+      "grad_norm": 1.0821142493586515,
+      "learning_rate": 0.003,
+      "loss": 4.0442,
+      "step": 6892
+    },
+    {
+      "epoch": 0.06893,
+      "grad_norm": 1.0013455149294792,
+      "learning_rate": 0.003,
+      "loss": 4.054,
+      "step": 6893
+    },
+    {
+      "epoch": 0.06894,
+      "grad_norm": 1.1973645911348245,
+      "learning_rate": 0.003,
+      "loss": 4.0446,
+      "step": 6894
+    },
+    {
+      "epoch": 0.06895,
+      "grad_norm": 0.9524668934091803,
+      "learning_rate": 0.003,
+      "loss": 4.0379,
+      "step": 6895
+    },
+    {
+      "epoch": 0.06896,
+      "grad_norm": 1.042772383295928,
+      "learning_rate": 0.003,
+      "loss": 4.0356,
+      "step": 6896
+    },
+    {
+      "epoch": 0.06897,
+      "grad_norm": 1.0973998064718429,
+      "learning_rate": 0.003,
+      "loss": 4.0829,
+      "step": 6897
+    },
+    {
+      "epoch": 0.06898,
+      "grad_norm": 0.941011645940574,
+      "learning_rate": 0.003,
+      "loss": 4.0016,
+      "step": 6898
+    },
+    {
+      "epoch": 0.06899,
+      "grad_norm": 0.9259127376564472,
+      "learning_rate": 0.003,
+      "loss": 4.0801,
+      "step": 6899
+    },
+    {
+      "epoch": 0.069,
+      "grad_norm": 1.0042825475944963,
+      "learning_rate": 0.003,
+      "loss": 4.0468,
+      "step": 6900
+    },
+    {
+      "epoch": 0.06901,
+      "grad_norm": 1.0504783164304958,
+      "learning_rate": 0.003,
+      "loss": 4.0241,
+      "step": 6901
+    },
+    {
+      "epoch": 0.06902,
+      "grad_norm": 1.001395184859859,
+      "learning_rate": 0.003,
+      "loss": 4.0375,
+      "step": 6902
+    },
+    {
+      "epoch": 0.06903,
+      "grad_norm": 0.9533095870741758,
+      "learning_rate": 0.003,
+      "loss": 4.0419,
+      "step": 6903
+    },
+    {
+      "epoch": 0.06904,
+      "grad_norm": 0.8946382452071323,
+      "learning_rate": 0.003,
+      "loss": 4.0414,
+      "step": 6904
+    },
+    {
+      "epoch": 0.06905,
+      "grad_norm": 0.9637064412868039,
+      "learning_rate": 0.003,
+      "loss": 4.0721,
+      "step": 6905
+    },
+    {
+      "epoch": 0.06906,
+      "grad_norm": 1.0077282899066335,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 6906
+    },
+    {
+      "epoch": 0.06907,
+      "grad_norm": 1.1560211480862905,
+      "learning_rate": 0.003,
+      "loss": 4.0466,
+      "step": 6907
+    },
+    {
+      "epoch": 0.06908,
+      "grad_norm": 0.9093203754997685,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 6908
+    },
+    {
+      "epoch": 0.06909,
+      "grad_norm": 0.9106719670133724,
+      "learning_rate": 0.003,
+      "loss": 4.0556,
+      "step": 6909
+    },
+    {
+      "epoch": 0.0691,
+      "grad_norm": 1.0352808190093583,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 6910
+    },
+    {
+      "epoch": 0.06911,
+      "grad_norm": 1.172505571950034,
+      "learning_rate": 0.003,
+      "loss": 4.0749,
+      "step": 6911
+    },
+    {
+      "epoch": 0.06912,
+      "grad_norm": 1.004717986434819,
+      "learning_rate": 0.003,
+      "loss": 4.0503,
+      "step": 6912
+    },
+    {
+      "epoch": 0.06913,
+      "grad_norm": 1.0346811560718048,
+      "learning_rate": 0.003,
+      "loss": 4.0275,
+      "step": 6913
+    },
+    {
+      "epoch": 0.06914,
+      "grad_norm": 0.9902540554926873,
+      "learning_rate": 0.003,
+      "loss": 4.0739,
+      "step": 6914
+    },
+    {
+      "epoch": 0.06915,
+      "grad_norm": 1.3265154292186019,
+      "learning_rate": 0.003,
+      "loss": 4.0875,
+      "step": 6915
+    },
+    {
+      "epoch": 0.06916,
+      "grad_norm": 1.0096731892843975,
+      "learning_rate": 0.003,
+      "loss": 4.0511,
+      "step": 6916
+    },
+    {
+      "epoch": 0.06917,
+      "grad_norm": 1.1017730347115613,
+      "learning_rate": 0.003,
+      "loss": 4.0387,
+      "step": 6917
+    },
+    {
+      "epoch": 0.06918,
+      "grad_norm": 1.2367019859366197,
+      "learning_rate": 0.003,
+      "loss": 4.0773,
+      "step": 6918
+    },
+    {
+      "epoch": 0.06919,
+      "grad_norm": 0.9219679674649628,
+      "learning_rate": 0.003,
+      "loss": 4.0667,
+      "step": 6919
+    },
+    {
+      "epoch": 0.0692,
+      "grad_norm": 0.991754900343874,
+      "learning_rate": 0.003,
+      "loss": 4.0536,
+      "step": 6920
+    },
+    {
+      "epoch": 0.06921,
+      "grad_norm": 1.2175829497274775,
+      "learning_rate": 0.003,
+      "loss": 4.0674,
+      "step": 6921
+    },
+    {
+      "epoch": 0.06922,
+      "grad_norm": 0.9154029403754875,
+      "learning_rate": 0.003,
+      "loss": 4.0578,
+      "step": 6922
+    },
+    {
+      "epoch": 0.06923,
+      "grad_norm": 1.0695987825330893,
+      "learning_rate": 0.003,
+      "loss": 4.0428,
+      "step": 6923
+    },
+    {
+      "epoch": 0.06924,
+      "grad_norm": 0.940173575072378,
+      "learning_rate": 0.003,
+      "loss": 4.0814,
+      "step": 6924
+    },
+    {
+      "epoch": 0.06925,
+      "grad_norm": 1.1072649608493197,
+      "learning_rate": 0.003,
+      "loss": 4.0783,
+      "step": 6925
+    },
+    {
+      "epoch": 0.06926,
+      "grad_norm": 1.0433608354839305,
+      "learning_rate": 0.003,
+      "loss": 4.0736,
+      "step": 6926
+    },
+    {
+      "epoch": 0.06927,
+      "grad_norm": 1.0081972527274148,
+      "learning_rate": 0.003,
+      "loss": 4.0648,
+      "step": 6927
+    },
+    {
+      "epoch": 0.06928,
+      "grad_norm": 1.1110960603111513,
+      "learning_rate": 0.003,
+      "loss": 4.0465,
+      "step": 6928
+    },
+    {
+      "epoch": 0.06929,
+      "grad_norm": 1.020106821680874,
+      "learning_rate": 0.003,
+      "loss": 4.0522,
+      "step": 6929
+    },
+    {
+      "epoch": 0.0693,
+      "grad_norm": 1.1534854540437196,
+      "learning_rate": 0.003,
+      "loss": 4.1001,
+      "step": 6930
+    },
+    {
+      "epoch": 0.06931,
+      "grad_norm": 0.9801283370812117,
+      "learning_rate": 0.003,
+      "loss": 4.0648,
+      "step": 6931
+    },
+    {
+      "epoch": 0.06932,
+      "grad_norm": 1.0759745476144513,
+      "learning_rate": 0.003,
+      "loss": 4.0404,
+      "step": 6932
+    },
+    {
+      "epoch": 0.06933,
+      "grad_norm": 1.1318320501940358,
+      "learning_rate": 0.003,
+      "loss": 4.0827,
+      "step": 6933
+    },
+    {
+      "epoch": 0.06934,
+      "grad_norm": 0.9953954741024389,
+      "learning_rate": 0.003,
+      "loss": 4.0428,
+      "step": 6934
+    },
+    {
+      "epoch": 0.06935,
+      "grad_norm": 1.0404905470053114,
+      "learning_rate": 0.003,
+      "loss": 4.0649,
+      "step": 6935
+    },
+    {
+      "epoch": 0.06936,
+      "grad_norm": 1.0468597561286765,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 6936
+    },
+    {
+      "epoch": 0.06937,
+      "grad_norm": 1.0201183676680066,
+      "learning_rate": 0.003,
+      "loss": 4.0475,
+      "step": 6937
+    },
+    {
+      "epoch": 0.06938,
+      "grad_norm": 1.0787460833199358,
+      "learning_rate": 0.003,
+      "loss": 4.0282,
+      "step": 6938
+    },
+    {
+      "epoch": 0.06939,
+      "grad_norm": 1.0901798974191894,
+      "learning_rate": 0.003,
+      "loss": 4.059,
+      "step": 6939
+    },
+    {
+      "epoch": 0.0694,
+      "grad_norm": 0.9385122020121525,
+      "learning_rate": 0.003,
+      "loss": 4.0391,
+      "step": 6940
+    },
+    {
+      "epoch": 0.06941,
+      "grad_norm": 1.09751042724724,
+      "learning_rate": 0.003,
+      "loss": 4.0641,
+      "step": 6941
+    },
+    {
+      "epoch": 0.06942,
+      "grad_norm": 1.0152502004249075,
+      "learning_rate": 0.003,
+      "loss": 4.0407,
+      "step": 6942
+    },
+    {
+      "epoch": 0.06943,
+      "grad_norm": 1.1106778728034443,
+      "learning_rate": 0.003,
+      "loss": 4.0735,
+      "step": 6943
+    },
+    {
+      "epoch": 0.06944,
+      "grad_norm": 0.9402245095674768,
+      "learning_rate": 0.003,
+      "loss": 4.0511,
+      "step": 6944
+    },
+    {
+      "epoch": 0.06945,
+      "grad_norm": 1.0498365010646888,
+      "learning_rate": 0.003,
+      "loss": 4.0794,
+      "step": 6945
+    },
+    {
+      "epoch": 0.06946,
+      "grad_norm": 1.04938260769981,
+      "learning_rate": 0.003,
+      "loss": 4.0559,
+      "step": 6946
+    },
+    {
+      "epoch": 0.06947,
+      "grad_norm": 1.2040313407275116,
+      "learning_rate": 0.003,
+      "loss": 4.0464,
+      "step": 6947
+    },
+    {
+      "epoch": 0.06948,
+      "grad_norm": 0.9174434519705801,
+      "learning_rate": 0.003,
+      "loss": 4.0552,
+      "step": 6948
+    },
+    {
+      "epoch": 0.06949,
+      "grad_norm": 1.056561705670037,
+      "learning_rate": 0.003,
+      "loss": 4.0642,
+      "step": 6949
+    },
+    {
+      "epoch": 0.0695,
+      "grad_norm": 1.1160879395367673,
+      "learning_rate": 0.003,
+      "loss": 4.0454,
+      "step": 6950
+    },
+    {
+      "epoch": 0.06951,
+      "grad_norm": 0.9311634834480271,
+      "learning_rate": 0.003,
+      "loss": 4.0707,
+      "step": 6951
+    },
+    {
+      "epoch": 0.06952,
+      "grad_norm": 1.0397971869811435,
+      "learning_rate": 0.003,
+      "loss": 4.0764,
+      "step": 6952
+    },
+    {
+      "epoch": 0.06953,
+      "grad_norm": 1.1245034234997564,
+      "learning_rate": 0.003,
+      "loss": 4.0744,
+      "step": 6953
+    },
+    {
+      "epoch": 0.06954,
+      "grad_norm": 1.0386924889956406,
+      "learning_rate": 0.003,
+      "loss": 4.0215,
+      "step": 6954
+    },
+    {
+      "epoch": 0.06955,
+      "grad_norm": 0.9732927657279983,
+      "learning_rate": 0.003,
+      "loss": 4.064,
+      "step": 6955
+    },
+    {
+      "epoch": 0.06956,
+      "grad_norm": 0.9919815526911104,
+      "learning_rate": 0.003,
+      "loss": 4.0669,
+      "step": 6956
+    },
+    {
+      "epoch": 0.06957,
+      "grad_norm": 0.9511426911729295,
+      "learning_rate": 0.003,
+      "loss": 4.0454,
+      "step": 6957
+    },
+    {
+      "epoch": 0.06958,
+      "grad_norm": 0.936900952232894,
+      "learning_rate": 0.003,
+      "loss": 4.0634,
+      "step": 6958
+    },
+    {
+      "epoch": 0.06959,
+      "grad_norm": 0.9506093199052728,
+      "learning_rate": 0.003,
+      "loss": 4.0596,
+      "step": 6959
+    },
+    {
+      "epoch": 0.0696,
+      "grad_norm": 0.894303598657002,
+      "learning_rate": 0.003,
+      "loss": 4.0687,
+      "step": 6960
+    },
+    {
+      "epoch": 0.06961,
+      "grad_norm": 0.860669748877687,
+      "learning_rate": 0.003,
+      "loss": 4.0637,
+      "step": 6961
+    },
+    {
+      "epoch": 0.06962,
+      "grad_norm": 0.9634371666233146,
+      "learning_rate": 0.003,
+      "loss": 4.0588,
+      "step": 6962
+    },
+    {
+      "epoch": 0.06963,
+      "grad_norm": 1.0960373688281182,
+      "learning_rate": 0.003,
+      "loss": 4.064,
+      "step": 6963
+    },
+    {
+      "epoch": 0.06964,
+      "grad_norm": 1.1321187322512247,
+      "learning_rate": 0.003,
+      "loss": 4.0626,
+      "step": 6964
+    },
+    {
+      "epoch": 0.06965,
+      "grad_norm": 1.0060414603670687,
+      "learning_rate": 0.003,
+      "loss": 4.043,
+      "step": 6965
+    },
+    {
+      "epoch": 0.06966,
+      "grad_norm": 1.0380239906833486,
+      "learning_rate": 0.003,
+      "loss": 4.066,
+      "step": 6966
+    },
+    {
+      "epoch": 0.06967,
+      "grad_norm": 1.1084883931180407,
+      "learning_rate": 0.003,
+      "loss": 4.0497,
+      "step": 6967
+    },
+    {
+      "epoch": 0.06968,
+      "grad_norm": 0.9467387430672358,
+      "learning_rate": 0.003,
+      "loss": 4.0522,
+      "step": 6968
+    },
+    {
+      "epoch": 0.06969,
+      "grad_norm": 0.9622808523217883,
+      "learning_rate": 0.003,
+      "loss": 4.0689,
+      "step": 6969
+    },
+    {
+      "epoch": 0.0697,
+      "grad_norm": 1.2314020219415485,
+      "learning_rate": 0.003,
+      "loss": 4.0411,
+      "step": 6970
+    },
+    {
+      "epoch": 0.06971,
+      "grad_norm": 1.1260021071930193,
+      "learning_rate": 0.003,
+      "loss": 4.0491,
+      "step": 6971
+    },
+    {
+      "epoch": 0.06972,
+      "grad_norm": 0.9154963484107886,
+      "learning_rate": 0.003,
+      "loss": 4.0089,
+      "step": 6972
+    },
+    {
+      "epoch": 0.06973,
+      "grad_norm": 0.8840670259700667,
+      "learning_rate": 0.003,
+      "loss": 4.0558,
+      "step": 6973
+    },
+    {
+      "epoch": 0.06974,
+      "grad_norm": 0.8851178357236568,
+      "learning_rate": 0.003,
+      "loss": 4.0454,
+      "step": 6974
+    },
+    {
+      "epoch": 0.06975,
+      "grad_norm": 0.9524411300067905,
+      "learning_rate": 0.003,
+      "loss": 4.0527,
+      "step": 6975
+    },
+    {
+      "epoch": 0.06976,
+      "grad_norm": 1.088636962287439,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 6976
+    },
+    {
+      "epoch": 0.06977,
+      "grad_norm": 1.1071976636641974,
+      "learning_rate": 0.003,
+      "loss": 4.0944,
+      "step": 6977
+    },
+    {
+      "epoch": 0.06978,
+      "grad_norm": 0.9798522761711383,
+      "learning_rate": 0.003,
+      "loss": 4.025,
+      "step": 6978
+    },
+    {
+      "epoch": 0.06979,
+      "grad_norm": 0.807513012542661,
+      "learning_rate": 0.003,
+      "loss": 4.0675,
+      "step": 6979
+    },
+    {
+      "epoch": 0.0698,
+      "grad_norm": 1.0085245208426825,
+      "learning_rate": 0.003,
+      "loss": 4.0573,
+      "step": 6980
+    },
+    {
+      "epoch": 0.06981,
+      "grad_norm": 1.0861815134074257,
+      "learning_rate": 0.003,
+      "loss": 4.0327,
+      "step": 6981
+    },
+    {
+      "epoch": 0.06982,
+      "grad_norm": 0.7980450645757602,
+      "learning_rate": 0.003,
+      "loss": 4.0413,
+      "step": 6982
+    },
+    {
+      "epoch": 0.06983,
+      "grad_norm": 0.9321566297048406,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 6983
+    },
+    {
+      "epoch": 0.06984,
+      "grad_norm": 1.0721971827592451,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 6984
+    },
+    {
+      "epoch": 0.06985,
+      "grad_norm": 1.0237038863583252,
+      "learning_rate": 0.003,
+      "loss": 4.0676,
+      "step": 6985
+    },
+    {
+      "epoch": 0.06986,
+      "grad_norm": 1.0546187122638413,
+      "learning_rate": 0.003,
+      "loss": 4.0611,
+      "step": 6986
+    },
+    {
+      "epoch": 0.06987,
+      "grad_norm": 1.1037883958736543,
+      "learning_rate": 0.003,
+      "loss": 4.0658,
+      "step": 6987
+    },
+    {
+      "epoch": 0.06988,
+      "grad_norm": 1.0507061025832318,
+      "learning_rate": 0.003,
+      "loss": 4.0466,
+      "step": 6988
+    },
+    {
+      "epoch": 0.06989,
+      "grad_norm": 1.0236366890111934,
+      "learning_rate": 0.003,
+      "loss": 4.0332,
+      "step": 6989
+    },
+    {
+      "epoch": 0.0699,
+      "grad_norm": 1.133782459769843,
+      "learning_rate": 0.003,
+      "loss": 4.035,
+      "step": 6990
+    },
+    {
+      "epoch": 0.06991,
+      "grad_norm": 0.9260651932828348,
+      "learning_rate": 0.003,
+      "loss": 4.0627,
+      "step": 6991
+    },
+    {
+      "epoch": 0.06992,
+      "grad_norm": 0.9925580541567944,
+      "learning_rate": 0.003,
+      "loss": 4.031,
+      "step": 6992
+    },
+    {
+      "epoch": 0.06993,
+      "grad_norm": 1.067990124386425,
+      "learning_rate": 0.003,
+      "loss": 4.0351,
+      "step": 6993
+    },
+    {
+      "epoch": 0.06994,
+      "grad_norm": 0.9360919285448381,
+      "learning_rate": 0.003,
+      "loss": 4.0384,
+      "step": 6994
+    },
+    {
+      "epoch": 0.06995,
+      "grad_norm": 0.96458377970943,
+      "learning_rate": 0.003,
+      "loss": 4.0647,
+      "step": 6995
+    },
+    {
+      "epoch": 0.06996,
+      "grad_norm": 1.148084887236895,
+      "learning_rate": 0.003,
+      "loss": 4.0534,
+      "step": 6996
+    },
+    {
+      "epoch": 0.06997,
+      "grad_norm": 0.9046547601765281,
+      "learning_rate": 0.003,
+      "loss": 4.0265,
+      "step": 6997
+    },
+    {
+      "epoch": 0.06998,
+      "grad_norm": 0.8772615969985239,
+      "learning_rate": 0.003,
+      "loss": 4.0435,
+      "step": 6998
+    },
+    {
+      "epoch": 0.06999,
+      "grad_norm": 0.9340057257128744,
+      "learning_rate": 0.003,
+      "loss": 4.0316,
+      "step": 6999
+    },
+    {
+      "epoch": 0.07,
+      "grad_norm": 1.0565666461050007,
+      "learning_rate": 0.003,
+      "loss": 4.039,
+      "step": 7000
+    },
+    {
+      "epoch": 0.07001,
+      "grad_norm": 0.9426298349737786,
+      "learning_rate": 0.003,
+      "loss": 4.0934,
+      "step": 7001
+    },
+    {
+      "epoch": 0.07002,
+      "grad_norm": 1.0664130531690763,
+      "learning_rate": 0.003,
+      "loss": 4.048,
+      "step": 7002
+    },
+    {
+      "epoch": 0.07003,
+      "grad_norm": 1.1291863119100505,
+      "learning_rate": 0.003,
+      "loss": 4.0402,
+      "step": 7003
+    },
+    {
+      "epoch": 0.07004,
+      "grad_norm": 1.0642470026156752,
+      "learning_rate": 0.003,
+      "loss": 4.0238,
+      "step": 7004
+    },
+    {
+      "epoch": 0.07005,
+      "grad_norm": 1.2390880199207346,
+      "learning_rate": 0.003,
+      "loss": 4.0584,
+      "step": 7005
+    },
+    {
+      "epoch": 0.07006,
+      "grad_norm": 0.9062982273502466,
+      "learning_rate": 0.003,
+      "loss": 4.0494,
+      "step": 7006
+    },
+    {
+      "epoch": 0.07007,
+      "grad_norm": 1.1259354802693449,
+      "learning_rate": 0.003,
+      "loss": 4.0533,
+      "step": 7007
+    },
+    {
+      "epoch": 0.07008,
+      "grad_norm": 1.0839899240911712,
+      "learning_rate": 0.003,
+      "loss": 4.051,
+      "step": 7008
+    },
+    {
+      "epoch": 0.07009,
+      "grad_norm": 1.068714363123779,
+      "learning_rate": 0.003,
+      "loss": 4.0649,
+      "step": 7009
+    },
+    {
+      "epoch": 0.0701,
+      "grad_norm": 0.9452433028423766,
+      "learning_rate": 0.003,
+      "loss": 4.0363,
+      "step": 7010
+    },
+    {
+      "epoch": 0.07011,
+      "grad_norm": 0.9919155661649216,
+      "learning_rate": 0.003,
+      "loss": 4.0326,
+      "step": 7011
+    },
+    {
+      "epoch": 0.07012,
+      "grad_norm": 1.2609870971082537,
+      "learning_rate": 0.003,
+      "loss": 4.0711,
+      "step": 7012
+    },
+    {
+      "epoch": 0.07013,
+      "grad_norm": 0.9993261593044893,
+      "learning_rate": 0.003,
+      "loss": 4.0402,
+      "step": 7013
+    },
+    {
+      "epoch": 0.07014,
+      "grad_norm": 1.0685461199627173,
+      "learning_rate": 0.003,
+      "loss": 4.0339,
+      "step": 7014
+    },
+    {
+      "epoch": 0.07015,
+      "grad_norm": 0.8992285999432262,
+      "learning_rate": 0.003,
+      "loss": 4.0385,
+      "step": 7015
+    },
+    {
+      "epoch": 0.07016,
+      "grad_norm": 0.8983621405017727,
+      "learning_rate": 0.003,
+      "loss": 4.0425,
+      "step": 7016
+    },
+    {
+      "epoch": 0.07017,
+      "grad_norm": 0.9901752576746331,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 7017
+    },
+    {
+      "epoch": 0.07018,
+      "grad_norm": 1.1074224986234265,
+      "learning_rate": 0.003,
+      "loss": 4.0629,
+      "step": 7018
+    },
+    {
+      "epoch": 0.07019,
+      "grad_norm": 1.057295544596109,
+      "learning_rate": 0.003,
+      "loss": 4.0506,
+      "step": 7019
+    },
+    {
+      "epoch": 0.0702,
+      "grad_norm": 1.0481341550110743,
+      "learning_rate": 0.003,
+      "loss": 4.0436,
+      "step": 7020
+    },
+    {
+      "epoch": 0.07021,
+      "grad_norm": 1.0407732639272513,
+      "learning_rate": 0.003,
+      "loss": 4.0595,
+      "step": 7021
+    },
+    {
+      "epoch": 0.07022,
+      "grad_norm": 1.1448172616705823,
+      "learning_rate": 0.003,
+      "loss": 4.066,
+      "step": 7022
+    },
+    {
+      "epoch": 0.07023,
+      "grad_norm": 1.0313350981765694,
+      "learning_rate": 0.003,
+      "loss": 4.0582,
+      "step": 7023
+    },
+    {
+      "epoch": 0.07024,
+      "grad_norm": 1.0020252731504273,
+      "learning_rate": 0.003,
+      "loss": 4.0375,
+      "step": 7024
+    },
+    {
+      "epoch": 0.07025,
+      "grad_norm": 1.1359222787122534,
+      "learning_rate": 0.003,
+      "loss": 4.0677,
+      "step": 7025
+    },
+    {
+      "epoch": 0.07026,
+      "grad_norm": 1.1041895748549755,
+      "learning_rate": 0.003,
+      "loss": 4.0374,
+      "step": 7026
+    },
+    {
+      "epoch": 0.07027,
+      "grad_norm": 0.9841999651428623,
+      "learning_rate": 0.003,
+      "loss": 4.0435,
+      "step": 7027
+    },
+    {
+      "epoch": 0.07028,
+      "grad_norm": 1.025891345196206,
+      "learning_rate": 0.003,
+      "loss": 4.0471,
+      "step": 7028
+    },
+    {
+      "epoch": 0.07029,
+      "grad_norm": 0.9948854364695006,
+      "learning_rate": 0.003,
+      "loss": 4.037,
+      "step": 7029
+    },
+    {
+      "epoch": 0.0703,
+      "grad_norm": 0.9668935895325885,
+      "learning_rate": 0.003,
+      "loss": 4.0687,
+      "step": 7030
+    },
+    {
+      "epoch": 0.07031,
+      "grad_norm": 1.0575663087737428,
+      "learning_rate": 0.003,
+      "loss": 4.0254,
+      "step": 7031
+    },
+    {
+      "epoch": 0.07032,
+      "grad_norm": 0.9164666976168099,
+      "learning_rate": 0.003,
+      "loss": 4.0563,
+      "step": 7032
+    },
+    {
+      "epoch": 0.07033,
+      "grad_norm": 0.9214454265285555,
+      "learning_rate": 0.003,
+      "loss": 4.0558,
+      "step": 7033
+    },
+    {
+      "epoch": 0.07034,
+      "grad_norm": 0.9583267727565041,
+      "learning_rate": 0.003,
+      "loss": 4.0464,
+      "step": 7034
+    },
+    {
+      "epoch": 0.07035,
+      "grad_norm": 1.3383936324024557,
+      "learning_rate": 0.003,
+      "loss": 4.0601,
+      "step": 7035
+    },
+    {
+      "epoch": 0.07036,
+      "grad_norm": 0.8658946616429294,
+      "learning_rate": 0.003,
+      "loss": 4.0432,
+      "step": 7036
+    },
+    {
+      "epoch": 0.07037,
+      "grad_norm": 0.9475554646881245,
+      "learning_rate": 0.003,
+      "loss": 4.0378,
+      "step": 7037
+    },
+    {
+      "epoch": 0.07038,
+      "grad_norm": 0.8924840709336554,
+      "learning_rate": 0.003,
+      "loss": 4.0497,
+      "step": 7038
+    },
+    {
+      "epoch": 0.07039,
+      "grad_norm": 0.8951723916341889,
+      "learning_rate": 0.003,
+      "loss": 4.0443,
+      "step": 7039
+    },
+    {
+      "epoch": 0.0704,
+      "grad_norm": 0.854416819628114,
+      "learning_rate": 0.003,
+      "loss": 4.066,
+      "step": 7040
+    },
+    {
+      "epoch": 0.07041,
+      "grad_norm": 0.9333352868640866,
+      "learning_rate": 0.003,
+      "loss": 4.0463,
+      "step": 7041
+    },
+    {
+      "epoch": 0.07042,
+      "grad_norm": 1.094975909783682,
+      "learning_rate": 0.003,
+      "loss": 4.0334,
+      "step": 7042
+    },
+    {
+      "epoch": 0.07043,
+      "grad_norm": 1.111332862841996,
+      "learning_rate": 0.003,
+      "loss": 4.0733,
+      "step": 7043
+    },
+    {
+      "epoch": 0.07044,
+      "grad_norm": 1.0657438894896911,
+      "learning_rate": 0.003,
+      "loss": 4.0692,
+      "step": 7044
+    },
+    {
+      "epoch": 0.07045,
+      "grad_norm": 1.214943958168645,
+      "learning_rate": 0.003,
+      "loss": 4.0585,
+      "step": 7045
+    },
+    {
+      "epoch": 0.07046,
+      "grad_norm": 1.0052618021881827,
+      "learning_rate": 0.003,
+      "loss": 4.061,
+      "step": 7046
+    },
+    {
+      "epoch": 0.07047,
+      "grad_norm": 1.2069549174110699,
+      "learning_rate": 0.003,
+      "loss": 4.0628,
+      "step": 7047
+    },
+    {
+      "epoch": 0.07048,
+      "grad_norm": 0.7843964117630299,
+      "learning_rate": 0.003,
+      "loss": 4.0488,
+      "step": 7048
+    },
+    {
+      "epoch": 0.07049,
+      "grad_norm": 0.691374439878814,
+      "learning_rate": 0.003,
+      "loss": 4.0418,
+      "step": 7049
+    },
+    {
+      "epoch": 0.0705,
+      "grad_norm": 0.7306826895178101,
+      "learning_rate": 0.003,
+      "loss": 4.0606,
+      "step": 7050
+    },
+    {
+      "epoch": 0.07051,
+      "grad_norm": 0.8933100091919502,
+      "learning_rate": 0.003,
+      "loss": 4.0689,
+      "step": 7051
+    },
+    {
+      "epoch": 0.07052,
+      "grad_norm": 1.0871930655736466,
+      "learning_rate": 0.003,
+      "loss": 4.0279,
+      "step": 7052
+    },
+    {
+      "epoch": 0.07053,
+      "grad_norm": 1.306112767575483,
+      "learning_rate": 0.003,
+      "loss": 4.0455,
+      "step": 7053
+    },
+    {
+      "epoch": 0.07054,
+      "grad_norm": 0.7906641478755512,
+      "learning_rate": 0.003,
+      "loss": 4.041,
+      "step": 7054
+    },
+    {
+      "epoch": 0.07055,
+      "grad_norm": 0.8522746819689732,
+      "learning_rate": 0.003,
+      "loss": 4.0477,
+      "step": 7055
+    },
+    {
+      "epoch": 0.07056,
+      "grad_norm": 0.9004086354520517,
+      "learning_rate": 0.003,
+      "loss": 4.0366,
+      "step": 7056
+    },
+    {
+      "epoch": 0.07057,
+      "grad_norm": 0.9624496801222439,
+      "learning_rate": 0.003,
+      "loss": 4.0564,
+      "step": 7057
+    },
+    {
+      "epoch": 0.07058,
+      "grad_norm": 0.8808033290776586,
+      "learning_rate": 0.003,
+      "loss": 4.0396,
+      "step": 7058
+    },
+    {
+      "epoch": 0.07059,
+      "grad_norm": 1.0148842321503468,
+      "learning_rate": 0.003,
+      "loss": 4.0502,
+      "step": 7059
+    },
+    {
+      "epoch": 0.0706,
+      "grad_norm": 1.0661395469335966,
+      "learning_rate": 0.003,
+      "loss": 4.0509,
+      "step": 7060
+    },
+    {
+      "epoch": 0.07061,
+      "grad_norm": 1.033267111562011,
+      "learning_rate": 0.003,
+      "loss": 4.0434,
+      "step": 7061
+    },
+    {
+      "epoch": 0.07062,
+      "grad_norm": 1.1023501283703616,
+      "learning_rate": 0.003,
+      "loss": 4.0808,
+      "step": 7062
+    },
+    {
+      "epoch": 0.07063,
+      "grad_norm": 1.1371048285194043,
+      "learning_rate": 0.003,
+      "loss": 4.0571,
+      "step": 7063
+    },
+    {
+      "epoch": 0.07064,
+      "grad_norm": 1.0600698489467515,
+      "learning_rate": 0.003,
+      "loss": 4.0677,
+      "step": 7064
+    },
+    {
+      "epoch": 0.07065,
+      "grad_norm": 1.0845674242904877,
+      "learning_rate": 0.003,
+      "loss": 4.0576,
+      "step": 7065
+    },
+    {
+      "epoch": 0.07066,
+      "grad_norm": 1.0117517612735492,
+      "learning_rate": 0.003,
+      "loss": 4.0618,
+      "step": 7066
+    },
+    {
+      "epoch": 0.07067,
+      "grad_norm": 1.1834369979286008,
+      "learning_rate": 0.003,
+      "loss": 4.1004,
+      "step": 7067
+    },
+    {
+      "epoch": 0.07068,
+      "grad_norm": 0.9912589707934546,
+      "learning_rate": 0.003,
+      "loss": 4.0291,
+      "step": 7068
+    },
+    {
+      "epoch": 0.07069,
+      "grad_norm": 1.008241016661584,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 7069
+    },
+    {
+      "epoch": 0.0707,
+      "grad_norm": 1.1771304332271513,
+      "learning_rate": 0.003,
+      "loss": 4.0636,
+      "step": 7070
+    },
+    {
+      "epoch": 0.07071,
+      "grad_norm": 1.1143211837122156,
+      "learning_rate": 0.003,
+      "loss": 4.0396,
+      "step": 7071
+    },
+    {
+      "epoch": 0.07072,
+      "grad_norm": 0.9342779082515223,
+      "learning_rate": 0.003,
+      "loss": 4.0422,
+      "step": 7072
+    },
+    {
+      "epoch": 0.07073,
+      "grad_norm": 0.8837711443464364,
+      "learning_rate": 0.003,
+      "loss": 4.0554,
+      "step": 7073
+    },
+    {
+      "epoch": 0.07074,
+      "grad_norm": 1.0273879438054683,
+      "learning_rate": 0.003,
+      "loss": 4.0435,
+      "step": 7074
+    },
+    {
+      "epoch": 0.07075,
+      "grad_norm": 1.1016591310376989,
+      "learning_rate": 0.003,
+      "loss": 4.0838,
+      "step": 7075
+    },
+    {
+      "epoch": 0.07076,
+      "grad_norm": 1.105334861387564,
+      "learning_rate": 0.003,
+      "loss": 4.0301,
+      "step": 7076
+    },
+    {
+      "epoch": 0.07077,
+      "grad_norm": 0.9146056746290705,
+      "learning_rate": 0.003,
+      "loss": 4.0398,
+      "step": 7077
+    },
+    {
+      "epoch": 0.07078,
+      "grad_norm": 0.8766079673528181,
+      "learning_rate": 0.003,
+      "loss": 4.0667,
+      "step": 7078
+    },
+    {
+      "epoch": 0.07079,
+      "grad_norm": 0.9316844058335465,
+      "learning_rate": 0.003,
+      "loss": 4.0688,
+      "step": 7079
+    },
+    {
+      "epoch": 0.0708,
+      "grad_norm": 1.0866064222511407,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 7080
+    },
+    {
+      "epoch": 0.07081,
+      "grad_norm": 0.9789936615170471,
+      "learning_rate": 0.003,
+      "loss": 4.0552,
+      "step": 7081
+    },
+    {
+      "epoch": 0.07082,
+      "grad_norm": 1.1021827553494439,
+      "learning_rate": 0.003,
+      "loss": 4.0507,
+      "step": 7082
+    },
+    {
+      "epoch": 0.07083,
+      "grad_norm": 1.0245533209037434,
+      "learning_rate": 0.003,
+      "loss": 4.0342,
+      "step": 7083
+    },
+    {
+      "epoch": 0.07084,
+      "grad_norm": 1.0621477148568337,
+      "learning_rate": 0.003,
+      "loss": 4.0503,
+      "step": 7084
+    },
+    {
+      "epoch": 0.07085,
+      "grad_norm": 1.1601015540652249,
+      "learning_rate": 0.003,
+      "loss": 4.0714,
+      "step": 7085
+    },
+    {
+      "epoch": 0.07086,
+      "grad_norm": 1.0854553734016084,
+      "learning_rate": 0.003,
+      "loss": 4.0494,
+      "step": 7086
+    },
+    {
+      "epoch": 0.07087,
+      "grad_norm": 1.3922698043387116,
+      "learning_rate": 0.003,
+      "loss": 4.0545,
+      "step": 7087
+    },
+    {
+      "epoch": 0.07088,
+      "grad_norm": 0.9399003500801127,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 7088
+    },
+    {
+      "epoch": 0.07089,
+      "grad_norm": 1.106468816582972,
+      "learning_rate": 0.003,
+      "loss": 4.047,
+      "step": 7089
+    },
+    {
+      "epoch": 0.0709,
+      "grad_norm": 1.1027532636474677,
+      "learning_rate": 0.003,
+      "loss": 4.0502,
+      "step": 7090
+    },
+    {
+      "epoch": 0.07091,
+      "grad_norm": 1.0374251515271373,
+      "learning_rate": 0.003,
+      "loss": 4.0383,
+      "step": 7091
+    },
+    {
+      "epoch": 0.07092,
+      "grad_norm": 1.1299226085447214,
+      "learning_rate": 0.003,
+      "loss": 4.0453,
+      "step": 7092
+    },
+    {
+      "epoch": 0.07093,
+      "grad_norm": 0.9253623127929507,
+      "learning_rate": 0.003,
+      "loss": 4.064,
+      "step": 7093
+    },
+    {
+      "epoch": 0.07094,
+      "grad_norm": 1.1587332040574374,
+      "learning_rate": 0.003,
+      "loss": 4.0891,
+      "step": 7094
+    },
+    {
+      "epoch": 0.07095,
+      "grad_norm": 0.7769235402798084,
+      "learning_rate": 0.003,
+      "loss": 4.0661,
+      "step": 7095
+    },
+    {
+      "epoch": 0.07096,
+      "grad_norm": 0.8053305671402123,
+      "learning_rate": 0.003,
+      "loss": 4.0326,
+      "step": 7096
+    },
+    {
+      "epoch": 0.07097,
+      "grad_norm": 0.9209272441259044,
+      "learning_rate": 0.003,
+      "loss": 4.0543,
+      "step": 7097
+    },
+    {
+      "epoch": 0.07098,
+      "grad_norm": 1.0625889845082845,
+      "learning_rate": 0.003,
+      "loss": 4.0653,
+      "step": 7098
+    },
+    {
+      "epoch": 0.07099,
+      "grad_norm": 1.0498047751329527,
+      "learning_rate": 0.003,
+      "loss": 4.0621,
+      "step": 7099
+    },
+    {
+      "epoch": 0.071,
+      "grad_norm": 1.224649756474013,
+      "learning_rate": 0.003,
+      "loss": 4.0633,
+      "step": 7100
+    },
+    {
+      "epoch": 0.07101,
+      "grad_norm": 1.008032355385532,
+      "learning_rate": 0.003,
+      "loss": 4.0372,
+      "step": 7101
+    },
+    {
+      "epoch": 0.07102,
+      "grad_norm": 1.0752805417902511,
+      "learning_rate": 0.003,
+      "loss": 4.0555,
+      "step": 7102
+    },
+    {
+      "epoch": 0.07103,
+      "grad_norm": 1.0092223551265274,
+      "learning_rate": 0.003,
+      "loss": 4.0679,
+      "step": 7103
+    },
+    {
+      "epoch": 0.07104,
+      "grad_norm": 1.072487221815657,
+      "learning_rate": 0.003,
+      "loss": 4.0594,
+      "step": 7104
+    },
+    {
+      "epoch": 0.07105,
+      "grad_norm": 1.1260402490166252,
+      "learning_rate": 0.003,
+      "loss": 4.0548,
+      "step": 7105
+    },
+    {
+      "epoch": 0.07106,
+      "grad_norm": 0.9259910792008968,
+      "learning_rate": 0.003,
+      "loss": 4.0583,
+      "step": 7106
+    },
+    {
+      "epoch": 0.07107,
+      "grad_norm": 0.9680226789669217,
+      "learning_rate": 0.003,
+      "loss": 4.0597,
+      "step": 7107
+    },
+    {
+      "epoch": 0.07108,
+      "grad_norm": 0.935158998841827,
+      "learning_rate": 0.003,
+      "loss": 4.0664,
+      "step": 7108
+    },
+    {
+      "epoch": 0.07109,
+      "grad_norm": 1.0085487694764077,
+      "learning_rate": 0.003,
+      "loss": 4.0539,
+      "step": 7109
+    },
+    {
+      "epoch": 0.0711,
+      "grad_norm": 1.196671503733759,
+      "learning_rate": 0.003,
+      "loss": 4.0339,
+      "step": 7110
+    },
+    {
+      "epoch": 0.07111,
+      "grad_norm": 0.9584229952834501,
+      "learning_rate": 0.003,
+      "loss": 4.0487,
+      "step": 7111
+    },
+    {
+      "epoch": 0.07112,
+      "grad_norm": 0.9641919680290156,
+      "learning_rate": 0.003,
+      "loss": 4.0518,
+      "step": 7112
+    },
+    {
+      "epoch": 0.07113,
+      "grad_norm": 0.9993706432398796,
+      "learning_rate": 0.003,
+      "loss": 4.0858,
+      "step": 7113
+    },
+    {
+      "epoch": 0.07114,
+      "grad_norm": 1.1499489634058697,
+      "learning_rate": 0.003,
+      "loss": 4.0654,
+      "step": 7114
+    },
+    {
+      "epoch": 0.07115,
+      "grad_norm": 1.194464795275503,
+      "learning_rate": 0.003,
+      "loss": 4.0489,
+      "step": 7115
+    },
+    {
+      "epoch": 0.07116,
+      "grad_norm": 1.21121303150807,
+      "learning_rate": 0.003,
+      "loss": 4.0563,
+      "step": 7116
+    },
+    {
+      "epoch": 0.07117,
+      "grad_norm": 1.0049044123637882,
+      "learning_rate": 0.003,
+      "loss": 4.0678,
+      "step": 7117
+    },
+    {
+      "epoch": 0.07118,
+      "grad_norm": 1.2298880131028436,
+      "learning_rate": 0.003,
+      "loss": 4.0782,
+      "step": 7118
+    },
+    {
+      "epoch": 0.07119,
+      "grad_norm": 0.8611868402134719,
+      "learning_rate": 0.003,
+      "loss": 4.0491,
+      "step": 7119
+    },
+    {
+      "epoch": 0.0712,
+      "grad_norm": 0.7702510545372283,
+      "learning_rate": 0.003,
+      "loss": 4.0433,
+      "step": 7120
+    },
+    {
+      "epoch": 0.07121,
+      "grad_norm": 0.8954788767538316,
+      "learning_rate": 0.003,
+      "loss": 4.0493,
+      "step": 7121
+    },
+    {
+      "epoch": 0.07122,
+      "grad_norm": 1.0842710437040461,
+      "learning_rate": 0.003,
+      "loss": 4.0426,
+      "step": 7122
+    },
+    {
+      "epoch": 0.07123,
+      "grad_norm": 0.9923425116430409,
+      "learning_rate": 0.003,
+      "loss": 4.0362,
+      "step": 7123
+    },
+    {
+      "epoch": 0.07124,
+      "grad_norm": 1.1813157355705963,
+      "learning_rate": 0.003,
+      "loss": 4.0353,
+      "step": 7124
+    },
+    {
+      "epoch": 0.07125,
+      "grad_norm": 1.2554701276201077,
+      "learning_rate": 0.003,
+      "loss": 4.0429,
+      "step": 7125
+    },
+    {
+      "epoch": 0.07126,
+      "grad_norm": 0.9773001931060757,
+      "learning_rate": 0.003,
+      "loss": 4.074,
+      "step": 7126
+    },
+    {
+      "epoch": 0.07127,
+      "grad_norm": 1.0712269928450333,
+      "learning_rate": 0.003,
+      "loss": 4.0441,
+      "step": 7127
+    },
+    {
+      "epoch": 0.07128,
+      "grad_norm": 1.0739153312552485,
+      "learning_rate": 0.003,
+      "loss": 4.0296,
+      "step": 7128
+    },
+    {
+      "epoch": 0.07129,
+      "grad_norm": 1.041760051339172,
+      "learning_rate": 0.003,
+      "loss": 4.0615,
+      "step": 7129
+    },
+    {
+      "epoch": 0.0713,
+      "grad_norm": 1.079087703256973,
+      "learning_rate": 0.003,
+      "loss": 4.0517,
+      "step": 7130
+    },
+    {
+      "epoch": 0.07131,
+      "grad_norm": 0.9759469792704608,
+      "learning_rate": 0.003,
+      "loss": 4.0355,
+      "step": 7131
+    },
+    {
+      "epoch": 0.07132,
+      "grad_norm": 1.048658582228863,
+      "learning_rate": 0.003,
+      "loss": 4.0798,
+      "step": 7132
+    },
+    {
+      "epoch": 0.07133,
+      "grad_norm": 1.0618378394098185,
+      "learning_rate": 0.003,
+      "loss": 4.0869,
+      "step": 7133
+    },
+    {
+      "epoch": 0.07134,
+      "grad_norm": 1.1513266730887497,
+      "learning_rate": 0.003,
+      "loss": 4.0299,
+      "step": 7134
+    },
+    {
+      "epoch": 0.07135,
+      "grad_norm": 0.9259298591427148,
+      "learning_rate": 0.003,
+      "loss": 4.0516,
+      "step": 7135
+    },
+    {
+      "epoch": 0.07136,
+      "grad_norm": 1.0755737256450422,
+      "learning_rate": 0.003,
+      "loss": 4.0939,
+      "step": 7136
+    },
+    {
+      "epoch": 0.07137,
+      "grad_norm": 1.177551226290782,
+      "learning_rate": 0.003,
+      "loss": 4.0576,
+      "step": 7137
+    },
+    {
+      "epoch": 0.07138,
+      "grad_norm": 1.22091533451439,
+      "learning_rate": 0.003,
+      "loss": 4.0551,
+      "step": 7138
+    },
+    {
+      "epoch": 0.07139,
+      "grad_norm": 0.7759575681946775,
+      "learning_rate": 0.003,
+      "loss": 4.0466,
+      "step": 7139
+    },
+    {
+      "epoch": 0.0714,
+      "grad_norm": 0.7408826660171673,
+      "learning_rate": 0.003,
+      "loss": 4.0559,
+      "step": 7140
+    },
+    {
+      "epoch": 0.07141,
+      "grad_norm": 0.7146551856791451,
+      "learning_rate": 0.003,
+      "loss": 4.031,
+      "step": 7141
+    },
+    {
+      "epoch": 0.07142,
+      "grad_norm": 0.9506570889697072,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 7142
+    },
+    {
+      "epoch": 0.07143,
+      "grad_norm": 1.30251677781438,
+      "learning_rate": 0.003,
+      "loss": 4.0224,
+      "step": 7143
+    },
+    {
+      "epoch": 0.07144,
+      "grad_norm": 0.8991561050670873,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 7144
+    },
+    {
+      "epoch": 0.07145,
+      "grad_norm": 1.0074973465006982,
+      "learning_rate": 0.003,
+      "loss": 4.0506,
+      "step": 7145
+    },
+    {
+      "epoch": 0.07146,
+      "grad_norm": 1.0771529601001637,
+      "learning_rate": 0.003,
+      "loss": 4.0552,
+      "step": 7146
+    },
+    {
+      "epoch": 0.07147,
+      "grad_norm": 0.8612872764436263,
+      "learning_rate": 0.003,
+      "loss": 4.0547,
+      "step": 7147
+    },
+    {
+      "epoch": 0.07148,
+      "grad_norm": 0.9341113795199162,
+      "learning_rate": 0.003,
+      "loss": 4.0578,
+      "step": 7148
+    },
+    {
+      "epoch": 0.07149,
+      "grad_norm": 1.0062478097452772,
+      "learning_rate": 0.003,
+      "loss": 4.0659,
+      "step": 7149
+    },
+    {
+      "epoch": 0.0715,
+      "grad_norm": 1.0268598684975616,
+      "learning_rate": 0.003,
+      "loss": 4.0507,
+      "step": 7150
+    },
+    {
+      "epoch": 0.07151,
+      "grad_norm": 1.0224439130582545,
+      "learning_rate": 0.003,
+      "loss": 4.053,
+      "step": 7151
+    },
+    {
+      "epoch": 0.07152,
+      "grad_norm": 1.0646975295350594,
+      "learning_rate": 0.003,
+      "loss": 4.0716,
+      "step": 7152
+    },
+    {
+      "epoch": 0.07153,
+      "grad_norm": 1.0171664033211174,
+      "learning_rate": 0.003,
+      "loss": 4.0566,
+      "step": 7153
+    },
+    {
+      "epoch": 0.07154,
+      "grad_norm": 1.0400329068005092,
+      "learning_rate": 0.003,
+      "loss": 4.0911,
+      "step": 7154
+    },
+    {
+      "epoch": 0.07155,
+      "grad_norm": 0.9604129137098717,
+      "learning_rate": 0.003,
+      "loss": 4.0535,
+      "step": 7155
+    },
+    {
+      "epoch": 0.07156,
+      "grad_norm": 1.0703502921341488,
+      "learning_rate": 0.003,
+      "loss": 4.0596,
+      "step": 7156
+    },
+    {
+      "epoch": 0.07157,
+      "grad_norm": 1.2643947181831285,
+      "learning_rate": 0.003,
+      "loss": 4.0506,
+      "step": 7157
+    },
+    {
+      "epoch": 0.07158,
+      "grad_norm": 0.8820140566698819,
+      "learning_rate": 0.003,
+      "loss": 4.0495,
+      "step": 7158
+    },
+    {
+      "epoch": 0.07159,
+      "grad_norm": 0.9999756514750661,
+      "learning_rate": 0.003,
+      "loss": 4.0579,
+      "step": 7159
+    },
+    {
+      "epoch": 0.0716,
+      "grad_norm": 1.2329285675483432,
+      "learning_rate": 0.003,
+      "loss": 4.0356,
+      "step": 7160
+    },
+    {
+      "epoch": 0.07161,
+      "grad_norm": 0.9648860180858708,
+      "learning_rate": 0.003,
+      "loss": 4.0592,
+      "step": 7161
+    },
+    {
+      "epoch": 0.07162,
+      "grad_norm": 1.1598981829481805,
+      "learning_rate": 0.003,
+      "loss": 4.0674,
+      "step": 7162
+    },
+    {
+      "epoch": 0.07163,
+      "grad_norm": 1.0352892191066683,
+      "learning_rate": 0.003,
+      "loss": 4.0566,
+      "step": 7163
+    },
+    {
+      "epoch": 0.07164,
+      "grad_norm": 1.1005133026385345,
+      "learning_rate": 0.003,
+      "loss": 4.0379,
+      "step": 7164
+    },
+    {
+      "epoch": 0.07165,
+      "grad_norm": 1.081455462126825,
+      "learning_rate": 0.003,
+      "loss": 4.0412,
+      "step": 7165
+    },
+    {
+      "epoch": 0.07166,
+      "grad_norm": 1.2353060960241047,
+      "learning_rate": 0.003,
+      "loss": 4.0513,
+      "step": 7166
+    },
+    {
+      "epoch": 0.07167,
+      "grad_norm": 0.9483072899067242,
+      "learning_rate": 0.003,
+      "loss": 4.0593,
+      "step": 7167
+    },
+    {
+      "epoch": 0.07168,
+      "grad_norm": 0.9415550285281931,
+      "learning_rate": 0.003,
+      "loss": 4.0786,
+      "step": 7168
+    },
+    {
+      "epoch": 0.07169,
+      "grad_norm": 1.0091462587193163,
+      "learning_rate": 0.003,
+      "loss": 4.0449,
+      "step": 7169
+    },
+    {
+      "epoch": 0.0717,
+      "grad_norm": 1.0220680948634882,
+      "learning_rate": 0.003,
+      "loss": 4.0361,
+      "step": 7170
+    },
+    {
+      "epoch": 0.07171,
+      "grad_norm": 0.9757190374038757,
+      "learning_rate": 0.003,
+      "loss": 4.0617,
+      "step": 7171
+    },
+    {
+      "epoch": 0.07172,
+      "grad_norm": 1.1004971741989973,
+      "learning_rate": 0.003,
+      "loss": 4.0679,
+      "step": 7172
+    },
+    {
+      "epoch": 0.07173,
+      "grad_norm": 0.9543571710406531,
+      "learning_rate": 0.003,
+      "loss": 4.0665,
+      "step": 7173
+    },
+    {
+      "epoch": 0.07174,
+      "grad_norm": 1.087698472674678,
+      "learning_rate": 0.003,
+      "loss": 4.0389,
+      "step": 7174
+    },
+    {
+      "epoch": 0.07175,
+      "grad_norm": 1.1109156909053697,
+      "learning_rate": 0.003,
+      "loss": 4.0795,
+      "step": 7175
+    },
+    {
+      "epoch": 0.07176,
+      "grad_norm": 0.9503187829288862,
+      "learning_rate": 0.003,
+      "loss": 4.0493,
+      "step": 7176
+    },
+    {
+      "epoch": 0.07177,
+      "grad_norm": 0.9936594901006058,
+      "learning_rate": 0.003,
+      "loss": 4.0577,
+      "step": 7177
+    },
+    {
+      "epoch": 0.07178,
+      "grad_norm": 1.061316451230859,
+      "learning_rate": 0.003,
+      "loss": 4.021,
+      "step": 7178
+    },
+    {
+      "epoch": 0.07179,
+      "grad_norm": 0.9212825081885807,
+      "learning_rate": 0.003,
+      "loss": 4.053,
+      "step": 7179
+    },
+    {
+      "epoch": 0.0718,
+      "grad_norm": 1.0771371633803515,
+      "learning_rate": 0.003,
+      "loss": 4.0641,
+      "step": 7180
+    },
+    {
+      "epoch": 0.07181,
+      "grad_norm": 1.0622270106395655,
+      "learning_rate": 0.003,
+      "loss": 4.0534,
+      "step": 7181
+    },
+    {
+      "epoch": 0.07182,
+      "grad_norm": 1.2387354855038064,
+      "learning_rate": 0.003,
+      "loss": 4.0676,
+      "step": 7182
+    },
+    {
+      "epoch": 0.07183,
+      "grad_norm": 0.9639603006489462,
+      "learning_rate": 0.003,
+      "loss": 4.0592,
+      "step": 7183
+    },
+    {
+      "epoch": 0.07184,
+      "grad_norm": 1.0093619599090504,
+      "learning_rate": 0.003,
+      "loss": 4.0353,
+      "step": 7184
+    },
+    {
+      "epoch": 0.07185,
+      "grad_norm": 0.9714974637831857,
+      "learning_rate": 0.003,
+      "loss": 4.0413,
+      "step": 7185
+    },
+    {
+      "epoch": 0.07186,
+      "grad_norm": 0.9153837367744138,
+      "learning_rate": 0.003,
+      "loss": 4.0584,
+      "step": 7186
+    },
+    {
+      "epoch": 0.07187,
+      "grad_norm": 0.9316846582768762,
+      "learning_rate": 0.003,
+      "loss": 4.0457,
+      "step": 7187
+    },
+    {
+      "epoch": 0.07188,
+      "grad_norm": 1.0594153818209886,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 7188
+    },
+    {
+      "epoch": 0.07189,
+      "grad_norm": 0.918217273642505,
+      "learning_rate": 0.003,
+      "loss": 4.0616,
+      "step": 7189
+    },
+    {
+      "epoch": 0.0719,
+      "grad_norm": 1.0085899488533316,
+      "learning_rate": 0.003,
+      "loss": 4.0624,
+      "step": 7190
+    },
+    {
+      "epoch": 0.07191,
+      "grad_norm": 1.1236120434513654,
+      "learning_rate": 0.003,
+      "loss": 4.0748,
+      "step": 7191
+    },
+    {
+      "epoch": 0.07192,
+      "grad_norm": 1.0142883651839383,
+      "learning_rate": 0.003,
+      "loss": 4.0343,
+      "step": 7192
+    },
+    {
+      "epoch": 0.07193,
+      "grad_norm": 0.976978007096748,
+      "learning_rate": 0.003,
+      "loss": 4.0611,
+      "step": 7193
+    },
+    {
+      "epoch": 0.07194,
+      "grad_norm": 1.1423821853316132,
+      "learning_rate": 0.003,
+      "loss": 4.0648,
+      "step": 7194
+    },
+    {
+      "epoch": 0.07195,
+      "grad_norm": 1.105242324637109,
+      "learning_rate": 0.003,
+      "loss": 4.0393,
+      "step": 7195
+    },
+    {
+      "epoch": 0.07196,
+      "grad_norm": 1.050425013065668,
+      "learning_rate": 0.003,
+      "loss": 4.049,
+      "step": 7196
+    },
+    {
+      "epoch": 0.07197,
+      "grad_norm": 1.1236152449610997,
+      "learning_rate": 0.003,
+      "loss": 4.0753,
+      "step": 7197
+    },
+    {
+      "epoch": 0.07198,
+      "grad_norm": 1.0814707926776106,
+      "learning_rate": 0.003,
+      "loss": 4.0324,
+      "step": 7198
+    },
+    {
+      "epoch": 0.07199,
+      "grad_norm": 1.1497210191318876,
+      "learning_rate": 0.003,
+      "loss": 4.0452,
+      "step": 7199
+    },
+    {
+      "epoch": 0.072,
+      "grad_norm": 0.9814431913033573,
+      "learning_rate": 0.003,
+      "loss": 4.0747,
+      "step": 7200
+    },
+    {
+      "epoch": 0.07201,
+      "grad_norm": 0.9399249347861436,
+      "learning_rate": 0.003,
+      "loss": 4.0392,
+      "step": 7201
+    },
+    {
+      "epoch": 0.07202,
+      "grad_norm": 1.0283475742305797,
+      "learning_rate": 0.003,
+      "loss": 4.0697,
+      "step": 7202
+    },
+    {
+      "epoch": 0.07203,
+      "grad_norm": 1.3436302183163868,
+      "learning_rate": 0.003,
+      "loss": 4.0433,
+      "step": 7203
+    },
+    {
+      "epoch": 0.07204,
+      "grad_norm": 0.8118935525393225,
+      "learning_rate": 0.003,
+      "loss": 4.0741,
+      "step": 7204
+    },
+    {
+      "epoch": 0.07205,
+      "grad_norm": 0.6513445176109008,
+      "learning_rate": 0.003,
+      "loss": 4.0651,
+      "step": 7205
+    },
+    {
+      "epoch": 0.07206,
+      "grad_norm": 0.7054255168362077,
+      "learning_rate": 0.003,
+      "loss": 4.0412,
+      "step": 7206
+    },
+    {
+      "epoch": 0.07207,
+      "grad_norm": 0.754052659367641,
+      "learning_rate": 0.003,
+      "loss": 4.0425,
+      "step": 7207
+    },
+    {
+      "epoch": 0.07208,
+      "grad_norm": 0.9086098698061754,
+      "learning_rate": 0.003,
+      "loss": 4.0393,
+      "step": 7208
+    },
+    {
+      "epoch": 0.07209,
+      "grad_norm": 1.1574952413730935,
+      "learning_rate": 0.003,
+      "loss": 4.0394,
+      "step": 7209
+    },
+    {
+      "epoch": 0.0721,
+      "grad_norm": 0.8639251076932414,
+      "learning_rate": 0.003,
+      "loss": 4.0364,
+      "step": 7210
+    },
+    {
+      "epoch": 0.07211,
+      "grad_norm": 0.8049066976652748,
+      "learning_rate": 0.003,
+      "loss": 4.0503,
+      "step": 7211
+    },
+    {
+      "epoch": 0.07212,
+      "grad_norm": 0.9012892110405708,
+      "learning_rate": 0.003,
+      "loss": 4.0578,
+      "step": 7212
+    },
+    {
+      "epoch": 0.07213,
+      "grad_norm": 1.0195248366641139,
+      "learning_rate": 0.003,
+      "loss": 4.0706,
+      "step": 7213
+    },
+    {
+      "epoch": 0.07214,
+      "grad_norm": 1.0957107033402924,
+      "learning_rate": 0.003,
+      "loss": 4.0258,
+      "step": 7214
+    },
+    {
+      "epoch": 0.07215,
+      "grad_norm": 1.130563317224989,
+      "learning_rate": 0.003,
+      "loss": 4.0531,
+      "step": 7215
+    },
+    {
+      "epoch": 0.07216,
+      "grad_norm": 1.0776995902257729,
+      "learning_rate": 0.003,
+      "loss": 4.062,
+      "step": 7216
+    },
+    {
+      "epoch": 0.07217,
+      "grad_norm": 0.9762456922279278,
+      "learning_rate": 0.003,
+      "loss": 4.052,
+      "step": 7217
+    },
+    {
+      "epoch": 0.07218,
+      "grad_norm": 1.0572760232733118,
+      "learning_rate": 0.003,
+      "loss": 4.0539,
+      "step": 7218
+    },
+    {
+      "epoch": 0.07219,
+      "grad_norm": 1.1200235428034362,
+      "learning_rate": 0.003,
+      "loss": 4.0354,
+      "step": 7219
+    },
+    {
+      "epoch": 0.0722,
+      "grad_norm": 0.9279710532680243,
+      "learning_rate": 0.003,
+      "loss": 4.0582,
+      "step": 7220
+    },
+    {
+      "epoch": 0.07221,
+      "grad_norm": 1.0865227213549595,
+      "learning_rate": 0.003,
+      "loss": 4.0601,
+      "step": 7221
+    },
+    {
+      "epoch": 0.07222,
+      "grad_norm": 1.1530440294312476,
+      "learning_rate": 0.003,
+      "loss": 4.0511,
+      "step": 7222
+    },
+    {
+      "epoch": 0.07223,
+      "grad_norm": 1.105728877799717,
+      "learning_rate": 0.003,
+      "loss": 4.065,
+      "step": 7223
+    },
+    {
+      "epoch": 0.07224,
+      "grad_norm": 1.246082917171359,
+      "learning_rate": 0.003,
+      "loss": 4.0346,
+      "step": 7224
+    },
+    {
+      "epoch": 0.07225,
+      "grad_norm": 0.8775383173500949,
+      "learning_rate": 0.003,
+      "loss": 4.0524,
+      "step": 7225
+    },
+    {
+      "epoch": 0.07226,
+      "grad_norm": 0.9055827431051069,
+      "learning_rate": 0.003,
+      "loss": 4.0563,
+      "step": 7226
+    },
+    {
+      "epoch": 0.07227,
+      "grad_norm": 0.9729711259743795,
+      "learning_rate": 0.003,
+      "loss": 4.0367,
+      "step": 7227
+    },
+    {
+      "epoch": 0.07228,
+      "grad_norm": 1.122946918012819,
+      "learning_rate": 0.003,
+      "loss": 4.0348,
+      "step": 7228
+    },
+    {
+      "epoch": 0.07229,
+      "grad_norm": 1.145241716228926,
+      "learning_rate": 0.003,
+      "loss": 4.0529,
+      "step": 7229
+    },
+    {
+      "epoch": 0.0723,
+      "grad_norm": 1.1809619676313377,
+      "learning_rate": 0.003,
+      "loss": 4.0291,
+      "step": 7230
+    },
+    {
+      "epoch": 0.07231,
+      "grad_norm": 1.0638511137158617,
+      "learning_rate": 0.003,
+      "loss": 4.063,
+      "step": 7231
+    },
+    {
+      "epoch": 0.07232,
+      "grad_norm": 1.3174222410179524,
+      "learning_rate": 0.003,
+      "loss": 4.059,
+      "step": 7232
+    },
+    {
+      "epoch": 0.07233,
+      "grad_norm": 0.7594174026538151,
+      "learning_rate": 0.003,
+      "loss": 4.0534,
+      "step": 7233
+    },
+    {
+      "epoch": 0.07234,
+      "grad_norm": 0.7113003269832634,
+      "learning_rate": 0.003,
+      "loss": 4.0257,
+      "step": 7234
+    },
+    {
+      "epoch": 0.07235,
+      "grad_norm": 0.7666612536000506,
+      "learning_rate": 0.003,
+      "loss": 4.0728,
+      "step": 7235
+    },
+    {
+      "epoch": 0.07236,
+      "grad_norm": 1.0228879699076707,
+      "learning_rate": 0.003,
+      "loss": 4.0739,
+      "step": 7236
+    },
+    {
+      "epoch": 0.07237,
+      "grad_norm": 1.0627669746296777,
+      "learning_rate": 0.003,
+      "loss": 4.0396,
+      "step": 7237
+    },
+    {
+      "epoch": 0.07238,
+      "grad_norm": 0.9408632718140275,
+      "learning_rate": 0.003,
+      "loss": 4.0608,
+      "step": 7238
+    },
+    {
+      "epoch": 0.07239,
+      "grad_norm": 1.07854762597468,
+      "learning_rate": 0.003,
+      "loss": 4.0625,
+      "step": 7239
+    },
+    {
+      "epoch": 0.0724,
+      "grad_norm": 1.1116574655009852,
+      "learning_rate": 0.003,
+      "loss": 4.0232,
+      "step": 7240
+    },
+    {
+      "epoch": 0.07241,
+      "grad_norm": 1.0625991735895157,
+      "learning_rate": 0.003,
+      "loss": 4.0655,
+      "step": 7241
+    },
+    {
+      "epoch": 0.07242,
+      "grad_norm": 1.1130412168878459,
+      "learning_rate": 0.003,
+      "loss": 4.0264,
+      "step": 7242
+    },
+    {
+      "epoch": 0.07243,
+      "grad_norm": 1.0523505008259475,
+      "learning_rate": 0.003,
+      "loss": 4.0593,
+      "step": 7243
+    },
+    {
+      "epoch": 0.07244,
+      "grad_norm": 1.1814825129506323,
+      "learning_rate": 0.003,
+      "loss": 4.0461,
+      "step": 7244
+    },
+    {
+      "epoch": 0.07245,
+      "grad_norm": 0.9545116614676915,
+      "learning_rate": 0.003,
+      "loss": 4.043,
+      "step": 7245
+    },
+    {
+      "epoch": 0.07246,
+      "grad_norm": 1.0800472059853927,
+      "learning_rate": 0.003,
+      "loss": 4.0575,
+      "step": 7246
+    },
+    {
+      "epoch": 0.07247,
+      "grad_norm": 0.8747307815336857,
+      "learning_rate": 0.003,
+      "loss": 4.0701,
+      "step": 7247
+    },
+    {
+      "epoch": 0.07248,
+      "grad_norm": 0.9457171551359618,
+      "learning_rate": 0.003,
+      "loss": 4.0345,
+      "step": 7248
+    },
+    {
+      "epoch": 0.07249,
+      "grad_norm": 0.9746074762050819,
+      "learning_rate": 0.003,
+      "loss": 4.0186,
+      "step": 7249
+    },
+    {
+      "epoch": 0.0725,
+      "grad_norm": 1.0830316386031944,
+      "learning_rate": 0.003,
+      "loss": 4.0355,
+      "step": 7250
+    },
+    {
+      "epoch": 0.07251,
+      "grad_norm": 1.1295155297093291,
+      "learning_rate": 0.003,
+      "loss": 4.0535,
+      "step": 7251
+    },
+    {
+      "epoch": 0.07252,
+      "grad_norm": 1.0574374191488838,
+      "learning_rate": 0.003,
+      "loss": 3.993,
+      "step": 7252
+    },
+    {
+      "epoch": 0.07253,
+      "grad_norm": 1.1371667240401926,
+      "learning_rate": 0.003,
+      "loss": 4.0246,
+      "step": 7253
+    },
+    {
+      "epoch": 0.07254,
+      "grad_norm": 0.8575120967224161,
+      "learning_rate": 0.003,
+      "loss": 4.0656,
+      "step": 7254
+    },
+    {
+      "epoch": 0.07255,
+      "grad_norm": 1.2040896120213669,
+      "learning_rate": 0.003,
+      "loss": 4.0694,
+      "step": 7255
+    },
+    {
+      "epoch": 0.07256,
+      "grad_norm": 1.0282986308414142,
+      "learning_rate": 0.003,
+      "loss": 4.0674,
+      "step": 7256
+    },
+    {
+      "epoch": 0.07257,
+      "grad_norm": 1.1867368710695958,
+      "learning_rate": 0.003,
+      "loss": 4.0569,
+      "step": 7257
+    },
+    {
+      "epoch": 0.07258,
+      "grad_norm": 0.8751280922843936,
+      "learning_rate": 0.003,
+      "loss": 4.0207,
+      "step": 7258
+    },
+    {
+      "epoch": 0.07259,
+      "grad_norm": 0.9468806915219505,
+      "learning_rate": 0.003,
+      "loss": 4.0472,
+      "step": 7259
+    },
+    {
+      "epoch": 0.0726,
+      "grad_norm": 1.13462604181537,
+      "learning_rate": 0.003,
+      "loss": 4.0325,
+      "step": 7260
+    },
+    {
+      "epoch": 0.07261,
+      "grad_norm": 0.9480119807528755,
+      "learning_rate": 0.003,
+      "loss": 4.053,
+      "step": 7261
+    },
+    {
+      "epoch": 0.07262,
+      "grad_norm": 1.0086785799416162,
+      "learning_rate": 0.003,
+      "loss": 4.0463,
+      "step": 7262
+    },
+    {
+      "epoch": 0.07263,
+      "grad_norm": 0.9776635908283169,
+      "learning_rate": 0.003,
+      "loss": 4.0577,
+      "step": 7263
+    },
+    {
+      "epoch": 0.07264,
+      "grad_norm": 0.9280201688532035,
+      "learning_rate": 0.003,
+      "loss": 4.0058,
+      "step": 7264
+    },
+    {
+      "epoch": 0.07265,
+      "grad_norm": 1.146251437770367,
+      "learning_rate": 0.003,
+      "loss": 4.0549,
+      "step": 7265
+    },
+    {
+      "epoch": 0.07266,
+      "grad_norm": 0.9536640102408181,
+      "learning_rate": 0.003,
+      "loss": 4.0676,
+      "step": 7266
+    },
+    {
+      "epoch": 0.07267,
+      "grad_norm": 1.1810792017759155,
+      "learning_rate": 0.003,
+      "loss": 4.0606,
+      "step": 7267
+    },
+    {
+      "epoch": 0.07268,
+      "grad_norm": 1.0667635725102285,
+      "learning_rate": 0.003,
+      "loss": 4.0683,
+      "step": 7268
+    },
+    {
+      "epoch": 0.07269,
+      "grad_norm": 1.0933436942514816,
+      "learning_rate": 0.003,
+      "loss": 4.0543,
+      "step": 7269
+    },
+    {
+      "epoch": 0.0727,
+      "grad_norm": 0.9075857622412058,
+      "learning_rate": 0.003,
+      "loss": 4.0349,
+      "step": 7270
+    },
+    {
+      "epoch": 0.07271,
+      "grad_norm": 0.9549614482171724,
+      "learning_rate": 0.003,
+      "loss": 4.0456,
+      "step": 7271
+    },
+    {
+      "epoch": 0.07272,
+      "grad_norm": 0.8614083119020385,
+      "learning_rate": 0.003,
+      "loss": 4.037,
+      "step": 7272
+    },
+    {
+      "epoch": 0.07273,
+      "grad_norm": 0.8876688164940039,
+      "learning_rate": 0.003,
+      "loss": 4.0542,
+      "step": 7273
+    },
+    {
+      "epoch": 0.07274,
+      "grad_norm": 0.9248498248735642,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 7274
+    },
+    {
+      "epoch": 0.07275,
+      "grad_norm": 0.9301192282673498,
+      "learning_rate": 0.003,
+      "loss": 4.0327,
+      "step": 7275
+    },
+    {
+      "epoch": 0.07276,
+      "grad_norm": 1.0236905978389461,
+      "learning_rate": 0.003,
+      "loss": 4.0139,
+      "step": 7276
+    },
+    {
+      "epoch": 0.07277,
+      "grad_norm": 1.2555861212594497,
+      "learning_rate": 0.003,
+      "loss": 4.0531,
+      "step": 7277
+    },
+    {
+      "epoch": 0.07278,
+      "grad_norm": 1.0168850674837941,
+      "learning_rate": 0.003,
+      "loss": 4.0565,
+      "step": 7278
+    },
+    {
+      "epoch": 0.07279,
+      "grad_norm": 1.3248139439976,
+      "learning_rate": 0.003,
+      "loss": 4.0437,
+      "step": 7279
+    },
+    {
+      "epoch": 0.0728,
+      "grad_norm": 0.9735763353453082,
+      "learning_rate": 0.003,
+      "loss": 4.022,
+      "step": 7280
+    },
+    {
+      "epoch": 0.07281,
+      "grad_norm": 1.1238701240024835,
+      "learning_rate": 0.003,
+      "loss": 4.0443,
+      "step": 7281
+    },
+    {
+      "epoch": 0.07282,
+      "grad_norm": 1.075085316193029,
+      "learning_rate": 0.003,
+      "loss": 4.0613,
+      "step": 7282
+    },
+    {
+      "epoch": 0.07283,
+      "grad_norm": 1.1149909879984707,
+      "learning_rate": 0.003,
+      "loss": 4.0766,
+      "step": 7283
+    },
+    {
+      "epoch": 0.07284,
+      "grad_norm": 1.060193425500397,
+      "learning_rate": 0.003,
+      "loss": 4.035,
+      "step": 7284
+    },
+    {
+      "epoch": 0.07285,
+      "grad_norm": 1.1145512915255615,
+      "learning_rate": 0.003,
+      "loss": 4.0618,
+      "step": 7285
+    },
+    {
+      "epoch": 0.07286,
+      "grad_norm": 1.0821681081123302,
+      "learning_rate": 0.003,
+      "loss": 4.0369,
+      "step": 7286
+    },
+    {
+      "epoch": 0.07287,
+      "grad_norm": 0.9482005317460671,
+      "learning_rate": 0.003,
+      "loss": 4.058,
+      "step": 7287
+    },
+    {
+      "epoch": 0.07288,
+      "grad_norm": 1.1770572136072466,
+      "learning_rate": 0.003,
+      "loss": 4.0396,
+      "step": 7288
+    },
+    {
+      "epoch": 0.07289,
+      "grad_norm": 1.0392539942924648,
+      "learning_rate": 0.003,
+      "loss": 4.0587,
+      "step": 7289
+    },
+    {
+      "epoch": 0.0729,
+      "grad_norm": 1.093723077978444,
+      "learning_rate": 0.003,
+      "loss": 4.0786,
+      "step": 7290
+    },
+    {
+      "epoch": 0.07291,
+      "grad_norm": 1.2345580518798212,
+      "learning_rate": 0.003,
+      "loss": 4.0487,
+      "step": 7291
+    },
+    {
+      "epoch": 0.07292,
+      "grad_norm": 1.066201654699835,
+      "learning_rate": 0.003,
+      "loss": 4.0577,
+      "step": 7292
+    },
+    {
+      "epoch": 0.07293,
+      "grad_norm": 1.1510430611988984,
+      "learning_rate": 0.003,
+      "loss": 4.0464,
+      "step": 7293
+    },
+    {
+      "epoch": 0.07294,
+      "grad_norm": 0.9783586851033524,
+      "learning_rate": 0.003,
+      "loss": 4.0299,
+      "step": 7294
+    },
+    {
+      "epoch": 0.07295,
+      "grad_norm": 1.125641508383537,
+      "learning_rate": 0.003,
+      "loss": 4.0261,
+      "step": 7295
+    },
+    {
+      "epoch": 0.07296,
+      "grad_norm": 0.8931193549630414,
+      "learning_rate": 0.003,
+      "loss": 4.0342,
+      "step": 7296
+    },
+    {
+      "epoch": 0.07297,
+      "grad_norm": 0.9640861362127288,
+      "learning_rate": 0.003,
+      "loss": 4.0718,
+      "step": 7297
+    },
+    {
+      "epoch": 0.07298,
+      "grad_norm": 1.1090770552512412,
+      "learning_rate": 0.003,
+      "loss": 4.021,
+      "step": 7298
+    },
+    {
+      "epoch": 0.07299,
+      "grad_norm": 0.9922013741324502,
+      "learning_rate": 0.003,
+      "loss": 4.0464,
+      "step": 7299
+    },
+    {
+      "epoch": 0.073,
+      "grad_norm": 1.0624960804734376,
+      "learning_rate": 0.003,
+      "loss": 4.0303,
+      "step": 7300
+    },
+    {
+      "epoch": 0.07301,
+      "grad_norm": 1.0296175568733714,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 7301
+    },
+    {
+      "epoch": 0.07302,
+      "grad_norm": 0.877285101418735,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 7302
+    },
+    {
+      "epoch": 0.07303,
+      "grad_norm": 0.8404046231538534,
+      "learning_rate": 0.003,
+      "loss": 4.0532,
+      "step": 7303
+    },
+    {
+      "epoch": 0.07304,
+      "grad_norm": 0.7275922913634015,
+      "learning_rate": 0.003,
+      "loss": 4.0323,
+      "step": 7304
+    },
+    {
+      "epoch": 0.07305,
+      "grad_norm": 0.7396081307701995,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 7305
+    },
+    {
+      "epoch": 0.07306,
+      "grad_norm": 0.8600590405301811,
+      "learning_rate": 0.003,
+      "loss": 4.0615,
+      "step": 7306
+    },
+    {
+      "epoch": 0.07307,
+      "grad_norm": 1.2179767907272885,
+      "learning_rate": 0.003,
+      "loss": 4.078,
+      "step": 7307
+    },
+    {
+      "epoch": 0.07308,
+      "grad_norm": 1.2171863102376617,
+      "learning_rate": 0.003,
+      "loss": 4.051,
+      "step": 7308
+    },
+    {
+      "epoch": 0.07309,
+      "grad_norm": 0.7498319891103764,
+      "learning_rate": 0.003,
+      "loss": 4.0465,
+      "step": 7309
+    },
+    {
+      "epoch": 0.0731,
+      "grad_norm": 0.7626246621724885,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 7310
+    },
+    {
+      "epoch": 0.07311,
+      "grad_norm": 0.9806079646889024,
+      "learning_rate": 0.003,
+      "loss": 4.0491,
+      "step": 7311
+    },
+    {
+      "epoch": 0.07312,
+      "grad_norm": 1.2649393161573326,
+      "learning_rate": 0.003,
+      "loss": 4.0695,
+      "step": 7312
+    },
+    {
+      "epoch": 0.07313,
+      "grad_norm": 1.0944111078253658,
+      "learning_rate": 0.003,
+      "loss": 4.0517,
+      "step": 7313
+    },
+    {
+      "epoch": 0.07314,
+      "grad_norm": 1.0625503664966063,
+      "learning_rate": 0.003,
+      "loss": 4.0646,
+      "step": 7314
+    },
+    {
+      "epoch": 0.07315,
+      "grad_norm": 0.8920266443424979,
+      "learning_rate": 0.003,
+      "loss": 4.0468,
+      "step": 7315
+    },
+    {
+      "epoch": 0.07316,
+      "grad_norm": 0.9411350308589397,
+      "learning_rate": 0.003,
+      "loss": 4.0684,
+      "step": 7316
+    },
+    {
+      "epoch": 0.07317,
+      "grad_norm": 1.0177785717592334,
+      "learning_rate": 0.003,
+      "loss": 4.0509,
+      "step": 7317
+    },
+    {
+      "epoch": 0.07318,
+      "grad_norm": 1.0697061033409336,
+      "learning_rate": 0.003,
+      "loss": 4.0369,
+      "step": 7318
+    },
+    {
+      "epoch": 0.07319,
+      "grad_norm": 1.1560468412807143,
+      "learning_rate": 0.003,
+      "loss": 4.0614,
+      "step": 7319
+    },
+    {
+      "epoch": 0.0732,
+      "grad_norm": 0.9848533350053253,
+      "learning_rate": 0.003,
+      "loss": 4.0363,
+      "step": 7320
+    },
+    {
+      "epoch": 0.07321,
+      "grad_norm": 1.406641344651784,
+      "learning_rate": 0.003,
+      "loss": 4.0569,
+      "step": 7321
+    },
+    {
+      "epoch": 0.07322,
+      "grad_norm": 0.8388161556157484,
+      "learning_rate": 0.003,
+      "loss": 4.0625,
+      "step": 7322
+    },
+    {
+      "epoch": 0.07323,
+      "grad_norm": 0.8844904618419197,
+      "learning_rate": 0.003,
+      "loss": 4.0487,
+      "step": 7323
+    },
+    {
+      "epoch": 0.07324,
+      "grad_norm": 0.9189035571192447,
+      "learning_rate": 0.003,
+      "loss": 4.0469,
+      "step": 7324
+    },
+    {
+      "epoch": 0.07325,
+      "grad_norm": 1.1269111817150788,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 7325
+    },
+    {
+      "epoch": 0.07326,
+      "grad_norm": 1.3034147784515224,
+      "learning_rate": 0.003,
+      "loss": 4.0738,
+      "step": 7326
+    },
+    {
+      "epoch": 0.07327,
+      "grad_norm": 0.9635421663578897,
+      "learning_rate": 0.003,
+      "loss": 4.0684,
+      "step": 7327
+    },
+    {
+      "epoch": 0.07328,
+      "grad_norm": 1.0710552918832759,
+      "learning_rate": 0.003,
+      "loss": 4.0279,
+      "step": 7328
+    },
+    {
+      "epoch": 0.07329,
+      "grad_norm": 0.9097624953128945,
+      "learning_rate": 0.003,
+      "loss": 4.0477,
+      "step": 7329
+    },
+    {
+      "epoch": 0.0733,
+      "grad_norm": 1.0784018937777076,
+      "learning_rate": 0.003,
+      "loss": 4.0393,
+      "step": 7330
+    },
+    {
+      "epoch": 0.07331,
+      "grad_norm": 1.0908726771554587,
+      "learning_rate": 0.003,
+      "loss": 4.0394,
+      "step": 7331
+    },
+    {
+      "epoch": 0.07332,
+      "grad_norm": 0.9641065596068242,
+      "learning_rate": 0.003,
+      "loss": 4.062,
+      "step": 7332
+    },
+    {
+      "epoch": 0.07333,
+      "grad_norm": 1.056358933350289,
+      "learning_rate": 0.003,
+      "loss": 4.0562,
+      "step": 7333
+    },
+    {
+      "epoch": 0.07334,
+      "grad_norm": 1.0825403734635632,
+      "learning_rate": 0.003,
+      "loss": 4.0489,
+      "step": 7334
+    },
+    {
+      "epoch": 0.07335,
+      "grad_norm": 0.996440123195999,
+      "learning_rate": 0.003,
+      "loss": 4.0242,
+      "step": 7335
+    },
+    {
+      "epoch": 0.07336,
+      "grad_norm": 1.194382823583778,
+      "learning_rate": 0.003,
+      "loss": 4.075,
+      "step": 7336
+    },
+    {
+      "epoch": 0.07337,
+      "grad_norm": 1.0010816804287952,
+      "learning_rate": 0.003,
+      "loss": 4.0607,
+      "step": 7337
+    },
+    {
+      "epoch": 0.07338,
+      "grad_norm": 1.1654307114738909,
+      "learning_rate": 0.003,
+      "loss": 4.0505,
+      "step": 7338
+    },
+    {
+      "epoch": 0.07339,
+      "grad_norm": 1.1220075968794316,
+      "learning_rate": 0.003,
+      "loss": 4.0822,
+      "step": 7339
+    },
+    {
+      "epoch": 0.0734,
+      "grad_norm": 1.131259039339433,
+      "learning_rate": 0.003,
+      "loss": 4.0373,
+      "step": 7340
+    },
+    {
+      "epoch": 0.07341,
+      "grad_norm": 1.175969663125831,
+      "learning_rate": 0.003,
+      "loss": 4.0619,
+      "step": 7341
+    },
+    {
+      "epoch": 0.07342,
+      "grad_norm": 0.9696219162610268,
+      "learning_rate": 0.003,
+      "loss": 4.0739,
+      "step": 7342
+    },
+    {
+      "epoch": 0.07343,
+      "grad_norm": 0.9225181238457246,
+      "learning_rate": 0.003,
+      "loss": 4.059,
+      "step": 7343
+    },
+    {
+      "epoch": 0.07344,
+      "grad_norm": 0.9472118247787676,
+      "learning_rate": 0.003,
+      "loss": 4.0586,
+      "step": 7344
+    },
+    {
+      "epoch": 0.07345,
+      "grad_norm": 1.0731847840368303,
+      "learning_rate": 0.003,
+      "loss": 4.0336,
+      "step": 7345
+    },
+    {
+      "epoch": 0.07346,
+      "grad_norm": 1.1511686026889996,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 7346
+    },
+    {
+      "epoch": 0.07347,
+      "grad_norm": 0.9402065576595197,
+      "learning_rate": 0.003,
+      "loss": 4.042,
+      "step": 7347
+    },
+    {
+      "epoch": 0.07348,
+      "grad_norm": 0.9633307339305025,
+      "learning_rate": 0.003,
+      "loss": 4.0433,
+      "step": 7348
+    },
+    {
+      "epoch": 0.07349,
+      "grad_norm": 1.0646374377407537,
+      "learning_rate": 0.003,
+      "loss": 4.0494,
+      "step": 7349
+    },
+    {
+      "epoch": 0.0735,
+      "grad_norm": 1.2040620732152902,
+      "learning_rate": 0.003,
+      "loss": 4.0381,
+      "step": 7350
+    },
+    {
+      "epoch": 0.07351,
+      "grad_norm": 0.904348905876666,
+      "learning_rate": 0.003,
+      "loss": 4.0296,
+      "step": 7351
+    },
+    {
+      "epoch": 0.07352,
+      "grad_norm": 1.1063199597987452,
+      "learning_rate": 0.003,
+      "loss": 4.0536,
+      "step": 7352
+    },
+    {
+      "epoch": 0.07353,
+      "grad_norm": 1.027927831165055,
+      "learning_rate": 0.003,
+      "loss": 4.0715,
+      "step": 7353
+    },
+    {
+      "epoch": 0.07354,
+      "grad_norm": 0.9388874030805077,
+      "learning_rate": 0.003,
+      "loss": 4.0304,
+      "step": 7354
+    },
+    {
+      "epoch": 0.07355,
+      "grad_norm": 0.9846783510750604,
+      "learning_rate": 0.003,
+      "loss": 4.0538,
+      "step": 7355
+    },
+    {
+      "epoch": 0.07356,
+      "grad_norm": 0.9691856508427281,
+      "learning_rate": 0.003,
+      "loss": 4.0562,
+      "step": 7356
+    },
+    {
+      "epoch": 0.07357,
+      "grad_norm": 1.0360248831901298,
+      "learning_rate": 0.003,
+      "loss": 4.0415,
+      "step": 7357
+    },
+    {
+      "epoch": 0.07358,
+      "grad_norm": 0.9624035012433214,
+      "learning_rate": 0.003,
+      "loss": 4.0556,
+      "step": 7358
+    },
+    {
+      "epoch": 0.07359,
+      "grad_norm": 1.2494789612717871,
+      "learning_rate": 0.003,
+      "loss": 4.0634,
+      "step": 7359
+    },
+    {
+      "epoch": 0.0736,
+      "grad_norm": 1.1524537864722764,
+      "learning_rate": 0.003,
+      "loss": 4.071,
+      "step": 7360
+    },
+    {
+      "epoch": 0.07361,
+      "grad_norm": 1.0340592810304519,
+      "learning_rate": 0.003,
+      "loss": 4.0436,
+      "step": 7361
+    },
+    {
+      "epoch": 0.07362,
+      "grad_norm": 0.8881109632693943,
+      "learning_rate": 0.003,
+      "loss": 4.0651,
+      "step": 7362
+    },
+    {
+      "epoch": 0.07363,
+      "grad_norm": 1.0007719234565362,
+      "learning_rate": 0.003,
+      "loss": 4.0338,
+      "step": 7363
+    },
+    {
+      "epoch": 0.07364,
+      "grad_norm": 1.1774673252784194,
+      "learning_rate": 0.003,
+      "loss": 4.0724,
+      "step": 7364
+    },
+    {
+      "epoch": 0.07365,
+      "grad_norm": 1.0952657927274845,
+      "learning_rate": 0.003,
+      "loss": 4.0212,
+      "step": 7365
+    },
+    {
+      "epoch": 0.07366,
+      "grad_norm": 1.1457353470828615,
+      "learning_rate": 0.003,
+      "loss": 4.0563,
+      "step": 7366
+    },
+    {
+      "epoch": 0.07367,
+      "grad_norm": 1.068208594063675,
+      "learning_rate": 0.003,
+      "loss": 4.0657,
+      "step": 7367
+    },
+    {
+      "epoch": 0.07368,
+      "grad_norm": 1.1209873829364174,
+      "learning_rate": 0.003,
+      "loss": 4.0838,
+      "step": 7368
+    },
+    {
+      "epoch": 0.07369,
+      "grad_norm": 0.9611042252278179,
+      "learning_rate": 0.003,
+      "loss": 4.0288,
+      "step": 7369
+    },
+    {
+      "epoch": 0.0737,
+      "grad_norm": 1.1681489840918342,
+      "learning_rate": 0.003,
+      "loss": 4.0607,
+      "step": 7370
+    },
+    {
+      "epoch": 0.07371,
+      "grad_norm": 1.1438687370427696,
+      "learning_rate": 0.003,
+      "loss": 4.0825,
+      "step": 7371
+    },
+    {
+      "epoch": 0.07372,
+      "grad_norm": 1.1163570815698207,
+      "learning_rate": 0.003,
+      "loss": 4.062,
+      "step": 7372
+    },
+    {
+      "epoch": 0.07373,
+      "grad_norm": 0.994703138600529,
+      "learning_rate": 0.003,
+      "loss": 4.0623,
+      "step": 7373
+    },
+    {
+      "epoch": 0.07374,
+      "grad_norm": 1.0487042099617259,
+      "learning_rate": 0.003,
+      "loss": 4.0504,
+      "step": 7374
+    },
+    {
+      "epoch": 0.07375,
+      "grad_norm": 0.9349117742647232,
+      "learning_rate": 0.003,
+      "loss": 4.054,
+      "step": 7375
+    },
+    {
+      "epoch": 0.07376,
+      "grad_norm": 0.8520163703396579,
+      "learning_rate": 0.003,
+      "loss": 4.0571,
+      "step": 7376
+    },
+    {
+      "epoch": 0.07377,
+      "grad_norm": 1.0456006728388636,
+      "learning_rate": 0.003,
+      "loss": 4.0573,
+      "step": 7377
+    },
+    {
+      "epoch": 0.07378,
+      "grad_norm": 1.1182333493411414,
+      "learning_rate": 0.003,
+      "loss": 4.0673,
+      "step": 7378
+    },
+    {
+      "epoch": 0.07379,
+      "grad_norm": 1.055202759953318,
+      "learning_rate": 0.003,
+      "loss": 4.036,
+      "step": 7379
+    },
+    {
+      "epoch": 0.0738,
+      "grad_norm": 0.9844943498588187,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 7380
+    },
+    {
+      "epoch": 0.07381,
+      "grad_norm": 1.0556758756646913,
+      "learning_rate": 0.003,
+      "loss": 4.0368,
+      "step": 7381
+    },
+    {
+      "epoch": 0.07382,
+      "grad_norm": 0.939993493011405,
+      "learning_rate": 0.003,
+      "loss": 4.0813,
+      "step": 7382
+    },
+    {
+      "epoch": 0.07383,
+      "grad_norm": 0.8597491485943202,
+      "learning_rate": 0.003,
+      "loss": 4.0557,
+      "step": 7383
+    },
+    {
+      "epoch": 0.07384,
+      "grad_norm": 0.921647665125338,
+      "learning_rate": 0.003,
+      "loss": 4.0309,
+      "step": 7384
+    },
+    {
+      "epoch": 0.07385,
+      "grad_norm": 1.2812955999677622,
+      "learning_rate": 0.003,
+      "loss": 4.0472,
+      "step": 7385
+    },
+    {
+      "epoch": 0.07386,
+      "grad_norm": 0.993685614599893,
+      "learning_rate": 0.003,
+      "loss": 4.0608,
+      "step": 7386
+    },
+    {
+      "epoch": 0.07387,
+      "grad_norm": 1.027709481484533,
+      "learning_rate": 0.003,
+      "loss": 4.0353,
+      "step": 7387
+    },
+    {
+      "epoch": 0.07388,
+      "grad_norm": 0.9013082495547424,
+      "learning_rate": 0.003,
+      "loss": 4.0561,
+      "step": 7388
+    },
+    {
+      "epoch": 0.07389,
+      "grad_norm": 0.9109951838256976,
+      "learning_rate": 0.003,
+      "loss": 4.0411,
+      "step": 7389
+    },
+    {
+      "epoch": 0.0739,
+      "grad_norm": 1.2343749977944833,
+      "learning_rate": 0.003,
+      "loss": 4.0412,
+      "step": 7390
+    },
+    {
+      "epoch": 0.07391,
+      "grad_norm": 0.9658322691066191,
+      "learning_rate": 0.003,
+      "loss": 4.0626,
+      "step": 7391
+    },
+    {
+      "epoch": 0.07392,
+      "grad_norm": 1.1277581857498538,
+      "learning_rate": 0.003,
+      "loss": 4.0232,
+      "step": 7392
+    },
+    {
+      "epoch": 0.07393,
+      "grad_norm": 1.0979017569203031,
+      "learning_rate": 0.003,
+      "loss": 4.0593,
+      "step": 7393
+    },
+    {
+      "epoch": 0.07394,
+      "grad_norm": 1.1170278163443845,
+      "learning_rate": 0.003,
+      "loss": 4.0507,
+      "step": 7394
+    },
+    {
+      "epoch": 0.07395,
+      "grad_norm": 1.0993990391688724,
+      "learning_rate": 0.003,
+      "loss": 4.0331,
+      "step": 7395
+    },
+    {
+      "epoch": 0.07396,
+      "grad_norm": 1.0848153750483966,
+      "learning_rate": 0.003,
+      "loss": 4.0485,
+      "step": 7396
+    },
+    {
+      "epoch": 0.07397,
+      "grad_norm": 1.1759764800565051,
+      "learning_rate": 0.003,
+      "loss": 4.0497,
+      "step": 7397
+    },
+    {
+      "epoch": 0.07398,
+      "grad_norm": 0.9099184176925739,
+      "learning_rate": 0.003,
+      "loss": 4.0428,
+      "step": 7398
+    },
+    {
+      "epoch": 0.07399,
+      "grad_norm": 0.9851023987142208,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 7399
+    },
+    {
+      "epoch": 0.074,
+      "grad_norm": 1.216378322428975,
+      "learning_rate": 0.003,
+      "loss": 4.0669,
+      "step": 7400
+    },
+    {
+      "epoch": 0.07401,
+      "grad_norm": 0.880119789550399,
+      "learning_rate": 0.003,
+      "loss": 4.0247,
+      "step": 7401
+    },
+    {
+      "epoch": 0.07402,
+      "grad_norm": 0.9513679960235595,
+      "learning_rate": 0.003,
+      "loss": 4.0461,
+      "step": 7402
+    },
+    {
+      "epoch": 0.07403,
+      "grad_norm": 1.1554462432688264,
+      "learning_rate": 0.003,
+      "loss": 4.0625,
+      "step": 7403
+    },
+    {
+      "epoch": 0.07404,
+      "grad_norm": 1.1015044417016977,
+      "learning_rate": 0.003,
+      "loss": 4.0921,
+      "step": 7404
+    },
+    {
+      "epoch": 0.07405,
+      "grad_norm": 1.3070390701211214,
+      "learning_rate": 0.003,
+      "loss": 4.0515,
+      "step": 7405
+    },
+    {
+      "epoch": 0.07406,
+      "grad_norm": 0.9272149699930327,
+      "learning_rate": 0.003,
+      "loss": 4.0469,
+      "step": 7406
+    },
+    {
+      "epoch": 0.07407,
+      "grad_norm": 1.0566499622445449,
+      "learning_rate": 0.003,
+      "loss": 4.0654,
+      "step": 7407
+    },
+    {
+      "epoch": 0.07408,
+      "grad_norm": 0.9492068898970054,
+      "learning_rate": 0.003,
+      "loss": 4.0569,
+      "step": 7408
+    },
+    {
+      "epoch": 0.07409,
+      "grad_norm": 0.9649105766310203,
+      "learning_rate": 0.003,
+      "loss": 4.0446,
+      "step": 7409
+    },
+    {
+      "epoch": 0.0741,
+      "grad_norm": 1.1043798243043246,
+      "learning_rate": 0.003,
+      "loss": 4.0682,
+      "step": 7410
+    },
+    {
+      "epoch": 0.07411,
+      "grad_norm": 1.1607113803566775,
+      "learning_rate": 0.003,
+      "loss": 4.0634,
+      "step": 7411
+    },
+    {
+      "epoch": 0.07412,
+      "grad_norm": 0.9327351376075126,
+      "learning_rate": 0.003,
+      "loss": 4.0449,
+      "step": 7412
+    },
+    {
+      "epoch": 0.07413,
+      "grad_norm": 1.0657239310703015,
+      "learning_rate": 0.003,
+      "loss": 4.0474,
+      "step": 7413
+    },
+    {
+      "epoch": 0.07414,
+      "grad_norm": 1.0370445327105178,
+      "learning_rate": 0.003,
+      "loss": 4.049,
+      "step": 7414
+    },
+    {
+      "epoch": 0.07415,
+      "grad_norm": 1.1291501330975042,
+      "learning_rate": 0.003,
+      "loss": 4.0509,
+      "step": 7415
+    },
+    {
+      "epoch": 0.07416,
+      "grad_norm": 1.0563748188952578,
+      "learning_rate": 0.003,
+      "loss": 4.0344,
+      "step": 7416
+    },
+    {
+      "epoch": 0.07417,
+      "grad_norm": 0.9426313292975299,
+      "learning_rate": 0.003,
+      "loss": 4.0499,
+      "step": 7417
+    },
+    {
+      "epoch": 0.07418,
+      "grad_norm": 1.138322949259601,
+      "learning_rate": 0.003,
+      "loss": 4.0377,
+      "step": 7418
+    },
+    {
+      "epoch": 0.07419,
+      "grad_norm": 1.213845231951027,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 7419
+    },
+    {
+      "epoch": 0.0742,
+      "grad_norm": 0.9160208863216656,
+      "learning_rate": 0.003,
+      "loss": 4.0611,
+      "step": 7420
+    },
+    {
+      "epoch": 0.07421,
+      "grad_norm": 0.9459537396238892,
+      "learning_rate": 0.003,
+      "loss": 4.0334,
+      "step": 7421
+    },
+    {
+      "epoch": 0.07422,
+      "grad_norm": 0.9896087479545597,
+      "learning_rate": 0.003,
+      "loss": 4.0559,
+      "step": 7422
+    },
+    {
+      "epoch": 0.07423,
+      "grad_norm": 1.029001637107005,
+      "learning_rate": 0.003,
+      "loss": 4.032,
+      "step": 7423
+    },
+    {
+      "epoch": 0.07424,
+      "grad_norm": 0.966524850109166,
+      "learning_rate": 0.003,
+      "loss": 4.0588,
+      "step": 7424
+    },
+    {
+      "epoch": 0.07425,
+      "grad_norm": 1.0858257368928335,
+      "learning_rate": 0.003,
+      "loss": 4.0237,
+      "step": 7425
+    },
+    {
+      "epoch": 0.07426,
+      "grad_norm": 0.9810888238304923,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 7426
+    },
+    {
+      "epoch": 0.07427,
+      "grad_norm": 1.1559445528412031,
+      "learning_rate": 0.003,
+      "loss": 4.0307,
+      "step": 7427
+    },
+    {
+      "epoch": 0.07428,
+      "grad_norm": 1.1236073459063627,
+      "learning_rate": 0.003,
+      "loss": 4.0835,
+      "step": 7428
+    },
+    {
+      "epoch": 0.07429,
+      "grad_norm": 1.096676807770383,
+      "learning_rate": 0.003,
+      "loss": 4.055,
+      "step": 7429
+    },
+    {
+      "epoch": 0.0743,
+      "grad_norm": 1.0713521198381804,
+      "learning_rate": 0.003,
+      "loss": 4.0454,
+      "step": 7430
+    },
+    {
+      "epoch": 0.07431,
+      "grad_norm": 1.0003185699822545,
+      "learning_rate": 0.003,
+      "loss": 4.0357,
+      "step": 7431
+    },
+    {
+      "epoch": 0.07432,
+      "grad_norm": 1.242346832405972,
+      "learning_rate": 0.003,
+      "loss": 4.0622,
+      "step": 7432
+    },
+    {
+      "epoch": 0.07433,
+      "grad_norm": 1.0630507457181497,
+      "learning_rate": 0.003,
+      "loss": 4.0427,
+      "step": 7433
+    },
+    {
+      "epoch": 0.07434,
+      "grad_norm": 1.1654066987813088,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 7434
+    },
+    {
+      "epoch": 0.07435,
+      "grad_norm": 1.2683692736396797,
+      "learning_rate": 0.003,
+      "loss": 4.0578,
+      "step": 7435
+    },
+    {
+      "epoch": 0.07436,
+      "grad_norm": 0.945218194969709,
+      "learning_rate": 0.003,
+      "loss": 4.0732,
+      "step": 7436
+    },
+    {
+      "epoch": 0.07437,
+      "grad_norm": 1.097019618654518,
+      "learning_rate": 0.003,
+      "loss": 4.0088,
+      "step": 7437
+    },
+    {
+      "epoch": 0.07438,
+      "grad_norm": 1.2074577366475394,
+      "learning_rate": 0.003,
+      "loss": 4.0596,
+      "step": 7438
+    },
+    {
+      "epoch": 0.07439,
+      "grad_norm": 0.9431501487823388,
+      "learning_rate": 0.003,
+      "loss": 4.0427,
+      "step": 7439
+    },
+    {
+      "epoch": 0.0744,
+      "grad_norm": 1.1411961430256892,
+      "learning_rate": 0.003,
+      "loss": 4.0521,
+      "step": 7440
+    },
+    {
+      "epoch": 0.07441,
+      "grad_norm": 0.945382643731931,
+      "learning_rate": 0.003,
+      "loss": 4.0836,
+      "step": 7441
+    },
+    {
+      "epoch": 0.07442,
+      "grad_norm": 0.9584211239670143,
+      "learning_rate": 0.003,
+      "loss": 4.0537,
+      "step": 7442
+    },
+    {
+      "epoch": 0.07443,
+      "grad_norm": 0.8936679140385173,
+      "learning_rate": 0.003,
+      "loss": 4.0381,
+      "step": 7443
+    },
+    {
+      "epoch": 0.07444,
+      "grad_norm": 0.8616182361179869,
+      "learning_rate": 0.003,
+      "loss": 4.0568,
+      "step": 7444
+    },
+    {
+      "epoch": 0.07445,
+      "grad_norm": 1.112391031977807,
+      "learning_rate": 0.003,
+      "loss": 4.0433,
+      "step": 7445
+    },
+    {
+      "epoch": 0.07446,
+      "grad_norm": 1.260904358901806,
+      "learning_rate": 0.003,
+      "loss": 4.0334,
+      "step": 7446
+    },
+    {
+      "epoch": 0.07447,
+      "grad_norm": 0.9143344109603971,
+      "learning_rate": 0.003,
+      "loss": 4.0322,
+      "step": 7447
+    },
+    {
+      "epoch": 0.07448,
+      "grad_norm": 0.9777877903133408,
+      "learning_rate": 0.003,
+      "loss": 4.0309,
+      "step": 7448
+    },
+    {
+      "epoch": 0.07449,
+      "grad_norm": 1.0092330290166718,
+      "learning_rate": 0.003,
+      "loss": 4.0509,
+      "step": 7449
+    },
+    {
+      "epoch": 0.0745,
+      "grad_norm": 1.116824586523927,
+      "learning_rate": 0.003,
+      "loss": 4.0581,
+      "step": 7450
+    },
+    {
+      "epoch": 0.07451,
+      "grad_norm": 1.0158873706838591,
+      "learning_rate": 0.003,
+      "loss": 4.0513,
+      "step": 7451
+    },
+    {
+      "epoch": 0.07452,
+      "grad_norm": 1.0683848996605232,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 7452
+    },
+    {
+      "epoch": 0.07453,
+      "grad_norm": 1.0005773823548063,
+      "learning_rate": 0.003,
+      "loss": 4.054,
+      "step": 7453
+    },
+    {
+      "epoch": 0.07454,
+      "grad_norm": 1.1094763576693252,
+      "learning_rate": 0.003,
+      "loss": 4.0352,
+      "step": 7454
+    },
+    {
+      "epoch": 0.07455,
+      "grad_norm": 1.0977415415623035,
+      "learning_rate": 0.003,
+      "loss": 4.0475,
+      "step": 7455
+    },
+    {
+      "epoch": 0.07456,
+      "grad_norm": 1.2070613083727133,
+      "learning_rate": 0.003,
+      "loss": 4.0542,
+      "step": 7456
+    },
+    {
+      "epoch": 0.07457,
+      "grad_norm": 1.033669789154854,
+      "learning_rate": 0.003,
+      "loss": 4.0444,
+      "step": 7457
+    },
+    {
+      "epoch": 0.07458,
+      "grad_norm": 0.9974734919174871,
+      "learning_rate": 0.003,
+      "loss": 4.0561,
+      "step": 7458
+    },
+    {
+      "epoch": 0.07459,
+      "grad_norm": 1.1596224019873076,
+      "learning_rate": 0.003,
+      "loss": 4.0398,
+      "step": 7459
+    },
+    {
+      "epoch": 0.0746,
+      "grad_norm": 1.0558735224515694,
+      "learning_rate": 0.003,
+      "loss": 4.0502,
+      "step": 7460
+    },
+    {
+      "epoch": 0.07461,
+      "grad_norm": 1.1013658395166883,
+      "learning_rate": 0.003,
+      "loss": 4.0589,
+      "step": 7461
+    },
+    {
+      "epoch": 0.07462,
+      "grad_norm": 1.099153298296693,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 7462
+    },
+    {
+      "epoch": 0.07463,
+      "grad_norm": 0.9155991610369465,
+      "learning_rate": 0.003,
+      "loss": 4.0778,
+      "step": 7463
+    },
+    {
+      "epoch": 0.07464,
+      "grad_norm": 0.9379208043653106,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 7464
+    },
+    {
+      "epoch": 0.07465,
+      "grad_norm": 1.0372587578155243,
+      "learning_rate": 0.003,
+      "loss": 4.0506,
+      "step": 7465
+    },
+    {
+      "epoch": 0.07466,
+      "grad_norm": 1.3132084128992856,
+      "learning_rate": 0.003,
+      "loss": 4.0717,
+      "step": 7466
+    },
+    {
+      "epoch": 0.07467,
+      "grad_norm": 0.933995203349761,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 7467
+    },
+    {
+      "epoch": 0.07468,
+      "grad_norm": 1.0967227098443246,
+      "learning_rate": 0.003,
+      "loss": 4.0603,
+      "step": 7468
+    },
+    {
+      "epoch": 0.07469,
+      "grad_norm": 0.8994513242913562,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 7469
+    },
+    {
+      "epoch": 0.0747,
+      "grad_norm": 0.8582057247678115,
+      "learning_rate": 0.003,
+      "loss": 4.0644,
+      "step": 7470
+    },
+    {
+      "epoch": 0.07471,
+      "grad_norm": 0.9750254675432543,
+      "learning_rate": 0.003,
+      "loss": 4.0503,
+      "step": 7471
+    },
+    {
+      "epoch": 0.07472,
+      "grad_norm": 1.0758291745603603,
+      "learning_rate": 0.003,
+      "loss": 4.0576,
+      "step": 7472
+    },
+    {
+      "epoch": 0.07473,
+      "grad_norm": 1.2870897431186328,
+      "learning_rate": 0.003,
+      "loss": 4.0637,
+      "step": 7473
+    },
+    {
+      "epoch": 0.07474,
+      "grad_norm": 0.8579070231808928,
+      "learning_rate": 0.003,
+      "loss": 4.0391,
+      "step": 7474
+    },
+    {
+      "epoch": 0.07475,
+      "grad_norm": 0.9738156550036234,
+      "learning_rate": 0.003,
+      "loss": 4.0557,
+      "step": 7475
+    },
+    {
+      "epoch": 0.07476,
+      "grad_norm": 1.045108915912123,
+      "learning_rate": 0.003,
+      "loss": 4.0258,
+      "step": 7476
+    },
+    {
+      "epoch": 0.07477,
+      "grad_norm": 1.094542247261385,
+      "learning_rate": 0.003,
+      "loss": 4.0652,
+      "step": 7477
+    },
+    {
+      "epoch": 0.07478,
+      "grad_norm": 0.8629572990188785,
+      "learning_rate": 0.003,
+      "loss": 4.0008,
+      "step": 7478
+    },
+    {
+      "epoch": 0.07479,
+      "grad_norm": 0.9292854361264181,
+      "learning_rate": 0.003,
+      "loss": 4.0246,
+      "step": 7479
+    },
+    {
+      "epoch": 0.0748,
+      "grad_norm": 1.0705526760945827,
+      "learning_rate": 0.003,
+      "loss": 4.0456,
+      "step": 7480
+    },
+    {
+      "epoch": 0.07481,
+      "grad_norm": 1.0359796271584742,
+      "learning_rate": 0.003,
+      "loss": 4.0525,
+      "step": 7481
+    },
+    {
+      "epoch": 0.07482,
+      "grad_norm": 1.232770848270821,
+      "learning_rate": 0.003,
+      "loss": 4.0597,
+      "step": 7482
+    },
+    {
+      "epoch": 0.07483,
+      "grad_norm": 1.0817607398369629,
+      "learning_rate": 0.003,
+      "loss": 4.0749,
+      "step": 7483
+    },
+    {
+      "epoch": 0.07484,
+      "grad_norm": 1.2858180984611243,
+      "learning_rate": 0.003,
+      "loss": 4.0636,
+      "step": 7484
+    },
+    {
+      "epoch": 0.07485,
+      "grad_norm": 0.9867732155173473,
+      "learning_rate": 0.003,
+      "loss": 4.0503,
+      "step": 7485
+    },
+    {
+      "epoch": 0.07486,
+      "grad_norm": 1.0647719783345513,
+      "learning_rate": 0.003,
+      "loss": 4.0382,
+      "step": 7486
+    },
+    {
+      "epoch": 0.07487,
+      "grad_norm": 0.9563519763714589,
+      "learning_rate": 0.003,
+      "loss": 4.051,
+      "step": 7487
+    },
+    {
+      "epoch": 0.07488,
+      "grad_norm": 1.081168855348119,
+      "learning_rate": 0.003,
+      "loss": 4.058,
+      "step": 7488
+    },
+    {
+      "epoch": 0.07489,
+      "grad_norm": 0.9057881307635063,
+      "learning_rate": 0.003,
+      "loss": 4.0612,
+      "step": 7489
+    },
+    {
+      "epoch": 0.0749,
+      "grad_norm": 0.941257228213405,
+      "learning_rate": 0.003,
+      "loss": 4.0356,
+      "step": 7490
+    },
+    {
+      "epoch": 0.07491,
+      "grad_norm": 1.023677883109361,
+      "learning_rate": 0.003,
+      "loss": 4.0427,
+      "step": 7491
+    },
+    {
+      "epoch": 0.07492,
+      "grad_norm": 1.0985843828113449,
+      "learning_rate": 0.003,
+      "loss": 4.0665,
+      "step": 7492
+    },
+    {
+      "epoch": 0.07493,
+      "grad_norm": 1.1291894542659202,
+      "learning_rate": 0.003,
+      "loss": 4.0581,
+      "step": 7493
+    },
+    {
+      "epoch": 0.07494,
+      "grad_norm": 1.046782915344204,
+      "learning_rate": 0.003,
+      "loss": 4.0712,
+      "step": 7494
+    },
+    {
+      "epoch": 0.07495,
+      "grad_norm": 1.2154732983788594,
+      "learning_rate": 0.003,
+      "loss": 4.0489,
+      "step": 7495
+    },
+    {
+      "epoch": 0.07496,
+      "grad_norm": 1.1000284508004632,
+      "learning_rate": 0.003,
+      "loss": 4.0409,
+      "step": 7496
+    },
+    {
+      "epoch": 0.07497,
+      "grad_norm": 1.1040574207112384,
+      "learning_rate": 0.003,
+      "loss": 4.0549,
+      "step": 7497
+    },
+    {
+      "epoch": 0.07498,
+      "grad_norm": 0.8948022242164695,
+      "learning_rate": 0.003,
+      "loss": 4.0429,
+      "step": 7498
+    },
+    {
+      "epoch": 0.07499,
+      "grad_norm": 1.0258546162106021,
+      "learning_rate": 0.003,
+      "loss": 4.0439,
+      "step": 7499
+    },
+    {
+      "epoch": 0.075,
+      "grad_norm": 1.1158118517557325,
+      "learning_rate": 0.003,
+      "loss": 4.0659,
+      "step": 7500
+    },
+    {
+      "epoch": 0.07501,
+      "grad_norm": 1.0543780189380676,
+      "learning_rate": 0.003,
+      "loss": 4.0421,
+      "step": 7501
+    },
+    {
+      "epoch": 0.07502,
+      "grad_norm": 1.1398746708324812,
+      "learning_rate": 0.003,
+      "loss": 4.0397,
+      "step": 7502
+    },
+    {
+      "epoch": 0.07503,
+      "grad_norm": 1.17233757471888,
+      "learning_rate": 0.003,
+      "loss": 4.0462,
+      "step": 7503
+    },
+    {
+      "epoch": 0.07504,
+      "grad_norm": 1.1747348217017677,
+      "learning_rate": 0.003,
+      "loss": 4.0445,
+      "step": 7504
+    },
+    {
+      "epoch": 0.07505,
+      "grad_norm": 0.8995993583616543,
+      "learning_rate": 0.003,
+      "loss": 4.0257,
+      "step": 7505
+    },
+    {
+      "epoch": 0.07506,
+      "grad_norm": 0.9337832043650974,
+      "learning_rate": 0.003,
+      "loss": 4.038,
+      "step": 7506
+    },
+    {
+      "epoch": 0.07507,
+      "grad_norm": 0.9695999584916757,
+      "learning_rate": 0.003,
+      "loss": 4.0259,
+      "step": 7507
+    },
+    {
+      "epoch": 0.07508,
+      "grad_norm": 1.1638077160586862,
+      "learning_rate": 0.003,
+      "loss": 4.0683,
+      "step": 7508
+    },
+    {
+      "epoch": 0.07509,
+      "grad_norm": 0.9874287876840504,
+      "learning_rate": 0.003,
+      "loss": 4.0471,
+      "step": 7509
+    },
+    {
+      "epoch": 0.0751,
+      "grad_norm": 0.9776311060010476,
+      "learning_rate": 0.003,
+      "loss": 4.0449,
+      "step": 7510
+    },
+    {
+      "epoch": 0.07511,
+      "grad_norm": 1.1033486233571166,
+      "learning_rate": 0.003,
+      "loss": 4.0377,
+      "step": 7511
+    },
+    {
+      "epoch": 0.07512,
+      "grad_norm": 0.8907632335623773,
+      "learning_rate": 0.003,
+      "loss": 4.0696,
+      "step": 7512
+    },
+    {
+      "epoch": 0.07513,
+      "grad_norm": 0.9988954487137791,
+      "learning_rate": 0.003,
+      "loss": 4.0554,
+      "step": 7513
+    },
+    {
+      "epoch": 0.07514,
+      "grad_norm": 1.194395962502461,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 7514
+    },
+    {
+      "epoch": 0.07515,
+      "grad_norm": 1.156162880522299,
+      "learning_rate": 0.003,
+      "loss": 4.0773,
+      "step": 7515
+    },
+    {
+      "epoch": 0.07516,
+      "grad_norm": 1.2727170425623708,
+      "learning_rate": 0.003,
+      "loss": 4.0316,
+      "step": 7516
+    },
+    {
+      "epoch": 0.07517,
+      "grad_norm": 1.1565889833590228,
+      "learning_rate": 0.003,
+      "loss": 4.0575,
+      "step": 7517
+    },
+    {
+      "epoch": 0.07518,
+      "grad_norm": 0.8708468314319205,
+      "learning_rate": 0.003,
+      "loss": 4.0349,
+      "step": 7518
+    },
+    {
+      "epoch": 0.07519,
+      "grad_norm": 0.9953433252661223,
+      "learning_rate": 0.003,
+      "loss": 4.0459,
+      "step": 7519
+    },
+    {
+      "epoch": 0.0752,
+      "grad_norm": 1.0940459643263882,
+      "learning_rate": 0.003,
+      "loss": 4.0488,
+      "step": 7520
+    },
+    {
+      "epoch": 0.07521,
+      "grad_norm": 0.928716112619678,
+      "learning_rate": 0.003,
+      "loss": 4.0484,
+      "step": 7521
+    },
+    {
+      "epoch": 0.07522,
+      "grad_norm": 0.9818555888838302,
+      "learning_rate": 0.003,
+      "loss": 4.0357,
+      "step": 7522
+    },
+    {
+      "epoch": 0.07523,
+      "grad_norm": 1.2068474109803409,
+      "learning_rate": 0.003,
+      "loss": 4.0384,
+      "step": 7523
+    },
+    {
+      "epoch": 0.07524,
+      "grad_norm": 1.01597504610466,
+      "learning_rate": 0.003,
+      "loss": 4.0628,
+      "step": 7524
+    },
+    {
+      "epoch": 0.07525,
+      "grad_norm": 1.2715734649300354,
+      "learning_rate": 0.003,
+      "loss": 4.0756,
+      "step": 7525
+    },
+    {
+      "epoch": 0.07526,
+      "grad_norm": 0.7870088972562932,
+      "learning_rate": 0.003,
+      "loss": 4.064,
+      "step": 7526
+    },
+    {
+      "epoch": 0.07527,
+      "grad_norm": 0.9106993971561816,
+      "learning_rate": 0.003,
+      "loss": 4.0652,
+      "step": 7527
+    },
+    {
+      "epoch": 0.07528,
+      "grad_norm": 0.8636142458933644,
+      "learning_rate": 0.003,
+      "loss": 4.0654,
+      "step": 7528
+    },
+    {
+      "epoch": 0.07529,
+      "grad_norm": 0.9616268692995935,
+      "learning_rate": 0.003,
+      "loss": 4.0369,
+      "step": 7529
+    },
+    {
+      "epoch": 0.0753,
+      "grad_norm": 1.114538018638255,
+      "learning_rate": 0.003,
+      "loss": 4.0376,
+      "step": 7530
+    },
+    {
+      "epoch": 0.07531,
+      "grad_norm": 0.9879931527488544,
+      "learning_rate": 0.003,
+      "loss": 4.0674,
+      "step": 7531
+    },
+    {
+      "epoch": 0.07532,
+      "grad_norm": 1.0655313216663054,
+      "learning_rate": 0.003,
+      "loss": 4.056,
+      "step": 7532
+    },
+    {
+      "epoch": 0.07533,
+      "grad_norm": 0.8780149908265403,
+      "learning_rate": 0.003,
+      "loss": 4.0381,
+      "step": 7533
+    },
+    {
+      "epoch": 0.07534,
+      "grad_norm": 0.8784040460131582,
+      "learning_rate": 0.003,
+      "loss": 4.0405,
+      "step": 7534
+    },
+    {
+      "epoch": 0.07535,
+      "grad_norm": 0.8518380464505314,
+      "learning_rate": 0.003,
+      "loss": 4.0526,
+      "step": 7535
+    },
+    {
+      "epoch": 0.07536,
+      "grad_norm": 0.9098902162790059,
+      "learning_rate": 0.003,
+      "loss": 4.0353,
+      "step": 7536
+    },
+    {
+      "epoch": 0.07537,
+      "grad_norm": 1.143562497559037,
+      "learning_rate": 0.003,
+      "loss": 4.0325,
+      "step": 7537
+    },
+    {
+      "epoch": 0.07538,
+      "grad_norm": 1.2254729496457664,
+      "learning_rate": 0.003,
+      "loss": 4.0551,
+      "step": 7538
+    },
+    {
+      "epoch": 0.07539,
+      "grad_norm": 1.043842367086055,
+      "learning_rate": 0.003,
+      "loss": 4.0215,
+      "step": 7539
+    },
+    {
+      "epoch": 0.0754,
+      "grad_norm": 1.21725440776156,
+      "learning_rate": 0.003,
+      "loss": 4.0496,
+      "step": 7540
+    },
+    {
+      "epoch": 0.07541,
+      "grad_norm": 0.9295788359152493,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 7541
+    },
+    {
+      "epoch": 0.07542,
+      "grad_norm": 1.130473398219038,
+      "learning_rate": 0.003,
+      "loss": 4.0444,
+      "step": 7542
+    },
+    {
+      "epoch": 0.07543,
+      "grad_norm": 0.9937582722105303,
+      "learning_rate": 0.003,
+      "loss": 4.065,
+      "step": 7543
+    },
+    {
+      "epoch": 0.07544,
+      "grad_norm": 1.2560834000797938,
+      "learning_rate": 0.003,
+      "loss": 4.0612,
+      "step": 7544
+    },
+    {
+      "epoch": 0.07545,
+      "grad_norm": 1.0050728570569878,
+      "learning_rate": 0.003,
+      "loss": 4.0808,
+      "step": 7545
+    },
+    {
+      "epoch": 0.07546,
+      "grad_norm": 1.0720793211640665,
+      "learning_rate": 0.003,
+      "loss": 4.0401,
+      "step": 7546
+    },
+    {
+      "epoch": 0.07547,
+      "grad_norm": 0.9113278300880503,
+      "learning_rate": 0.003,
+      "loss": 4.0609,
+      "step": 7547
+    },
+    {
+      "epoch": 0.07548,
+      "grad_norm": 0.7765738432773017,
+      "learning_rate": 0.003,
+      "loss": 4.0671,
+      "step": 7548
+    },
+    {
+      "epoch": 0.07549,
+      "grad_norm": 0.912954245340519,
+      "learning_rate": 0.003,
+      "loss": 4.0715,
+      "step": 7549
+    },
+    {
+      "epoch": 0.0755,
+      "grad_norm": 1.1357200728898464,
+      "learning_rate": 0.003,
+      "loss": 4.0709,
+      "step": 7550
+    },
+    {
+      "epoch": 0.07551,
+      "grad_norm": 1.1645892480065512,
+      "learning_rate": 0.003,
+      "loss": 4.06,
+      "step": 7551
+    },
+    {
+      "epoch": 0.07552,
+      "grad_norm": 1.0759538967371787,
+      "learning_rate": 0.003,
+      "loss": 4.061,
+      "step": 7552
+    },
+    {
+      "epoch": 0.07553,
+      "grad_norm": 1.113298651168989,
+      "learning_rate": 0.003,
+      "loss": 4.0659,
+      "step": 7553
+    },
+    {
+      "epoch": 0.07554,
+      "grad_norm": 1.3129530629673276,
+      "learning_rate": 0.003,
+      "loss": 4.0665,
+      "step": 7554
+    },
+    {
+      "epoch": 0.07555,
+      "grad_norm": 1.0195391886187795,
+      "learning_rate": 0.003,
+      "loss": 4.0502,
+      "step": 7555
+    },
+    {
+      "epoch": 0.07556,
+      "grad_norm": 1.1261424419589618,
+      "learning_rate": 0.003,
+      "loss": 4.0715,
+      "step": 7556
+    },
+    {
+      "epoch": 0.07557,
+      "grad_norm": 0.9078457564330181,
+      "learning_rate": 0.003,
+      "loss": 4.0407,
+      "step": 7557
+    },
+    {
+      "epoch": 0.07558,
+      "grad_norm": 1.1105037206557775,
+      "learning_rate": 0.003,
+      "loss": 4.0446,
+      "step": 7558
+    },
+    {
+      "epoch": 0.07559,
+      "grad_norm": 1.084878893896586,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 7559
+    },
+    {
+      "epoch": 0.0756,
+      "grad_norm": 0.9422492493376851,
+      "learning_rate": 0.003,
+      "loss": 4.0546,
+      "step": 7560
+    },
+    {
+      "epoch": 0.07561,
+      "grad_norm": 0.9472473971962481,
+      "learning_rate": 0.003,
+      "loss": 4.038,
+      "step": 7561
+    },
+    {
+      "epoch": 0.07562,
+      "grad_norm": 1.0141863361647219,
+      "learning_rate": 0.003,
+      "loss": 4.0163,
+      "step": 7562
+    },
+    {
+      "epoch": 0.07563,
+      "grad_norm": 1.209943999383626,
+      "learning_rate": 0.003,
+      "loss": 4.052,
+      "step": 7563
+    },
+    {
+      "epoch": 0.07564,
+      "grad_norm": 1.040474950950908,
+      "learning_rate": 0.003,
+      "loss": 4.0709,
+      "step": 7564
+    },
+    {
+      "epoch": 0.07565,
+      "grad_norm": 1.041564684801342,
+      "learning_rate": 0.003,
+      "loss": 4.0515,
+      "step": 7565
+    },
+    {
+      "epoch": 0.07566,
+      "grad_norm": 0.9211223111374635,
+      "learning_rate": 0.003,
+      "loss": 4.0462,
+      "step": 7566
+    },
+    {
+      "epoch": 0.07567,
+      "grad_norm": 0.9557313493057552,
+      "learning_rate": 0.003,
+      "loss": 4.0609,
+      "step": 7567
+    },
+    {
+      "epoch": 0.07568,
+      "grad_norm": 1.0157216311700132,
+      "learning_rate": 0.003,
+      "loss": 4.0558,
+      "step": 7568
+    },
+    {
+      "epoch": 0.07569,
+      "grad_norm": 1.0891520113381792,
+      "learning_rate": 0.003,
+      "loss": 4.0408,
+      "step": 7569
+    },
+    {
+      "epoch": 0.0757,
+      "grad_norm": 1.035599436574519,
+      "learning_rate": 0.003,
+      "loss": 4.0346,
+      "step": 7570
+    },
+    {
+      "epoch": 0.07571,
+      "grad_norm": 1.1416705208329894,
+      "learning_rate": 0.003,
+      "loss": 4.0311,
+      "step": 7571
+    },
+    {
+      "epoch": 0.07572,
+      "grad_norm": 1.1983465038651324,
+      "learning_rate": 0.003,
+      "loss": 4.0314,
+      "step": 7572
+    },
+    {
+      "epoch": 0.07573,
+      "grad_norm": 0.8376450895449468,
+      "learning_rate": 0.003,
+      "loss": 4.043,
+      "step": 7573
+    },
+    {
+      "epoch": 0.07574,
+      "grad_norm": 0.7440289348204431,
+      "learning_rate": 0.003,
+      "loss": 4.057,
+      "step": 7574
+    },
+    {
+      "epoch": 0.07575,
+      "grad_norm": 0.6603077120225412,
+      "learning_rate": 0.003,
+      "loss": 4.0328,
+      "step": 7575
+    },
+    {
+      "epoch": 0.07576,
+      "grad_norm": 0.7920785949672058,
+      "learning_rate": 0.003,
+      "loss": 4.0291,
+      "step": 7576
+    },
+    {
+      "epoch": 0.07577,
+      "grad_norm": 1.0268729068383955,
+      "learning_rate": 0.003,
+      "loss": 4.066,
+      "step": 7577
+    },
+    {
+      "epoch": 0.07578,
+      "grad_norm": 1.1368625436467688,
+      "learning_rate": 0.003,
+      "loss": 4.0512,
+      "step": 7578
+    },
+    {
+      "epoch": 0.07579,
+      "grad_norm": 1.008982576200628,
+      "learning_rate": 0.003,
+      "loss": 4.0365,
+      "step": 7579
+    },
+    {
+      "epoch": 0.0758,
+      "grad_norm": 1.3316307903450357,
+      "learning_rate": 0.003,
+      "loss": 4.0651,
+      "step": 7580
+    },
+    {
+      "epoch": 0.07581,
+      "grad_norm": 0.9546023266380341,
+      "learning_rate": 0.003,
+      "loss": 4.0468,
+      "step": 7581
+    },
+    {
+      "epoch": 0.07582,
+      "grad_norm": 1.028351315894607,
+      "learning_rate": 0.003,
+      "loss": 4.0439,
+      "step": 7582
+    },
+    {
+      "epoch": 0.07583,
+      "grad_norm": 1.1084447205745265,
+      "learning_rate": 0.003,
+      "loss": 4.0487,
+      "step": 7583
+    },
+    {
+      "epoch": 0.07584,
+      "grad_norm": 1.083029995624086,
+      "learning_rate": 0.003,
+      "loss": 4.0887,
+      "step": 7584
+    },
+    {
+      "epoch": 0.07585,
+      "grad_norm": 1.0956918067982808,
+      "learning_rate": 0.003,
+      "loss": 4.0585,
+      "step": 7585
+    },
+    {
+      "epoch": 0.07586,
+      "grad_norm": 1.0704482847092711,
+      "learning_rate": 0.003,
+      "loss": 4.0366,
+      "step": 7586
+    },
+    {
+      "epoch": 0.07587,
+      "grad_norm": 1.0248155370988479,
+      "learning_rate": 0.003,
+      "loss": 4.0742,
+      "step": 7587
+    },
+    {
+      "epoch": 0.07588,
+      "grad_norm": 0.9469007252567827,
+      "learning_rate": 0.003,
+      "loss": 4.0477,
+      "step": 7588
+    },
+    {
+      "epoch": 0.07589,
+      "grad_norm": 0.9083602594004783,
+      "learning_rate": 0.003,
+      "loss": 4.0525,
+      "step": 7589
+    },
+    {
+      "epoch": 0.0759,
+      "grad_norm": 1.0888081212751122,
+      "learning_rate": 0.003,
+      "loss": 4.0522,
+      "step": 7590
+    },
+    {
+      "epoch": 0.07591,
+      "grad_norm": 1.1286869611060864,
+      "learning_rate": 0.003,
+      "loss": 4.0602,
+      "step": 7591
+    },
+    {
+      "epoch": 0.07592,
+      "grad_norm": 1.1994073883546317,
+      "learning_rate": 0.003,
+      "loss": 4.0632,
+      "step": 7592
+    },
+    {
+      "epoch": 0.07593,
+      "grad_norm": 0.930854156102424,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 7593
+    },
+    {
+      "epoch": 0.07594,
+      "grad_norm": 0.9949944707269984,
+      "learning_rate": 0.003,
+      "loss": 4.0301,
+      "step": 7594
+    },
+    {
+      "epoch": 0.07595,
+      "grad_norm": 1.0566054746262994,
+      "learning_rate": 0.003,
+      "loss": 4.0355,
+      "step": 7595
+    },
+    {
+      "epoch": 0.07596,
+      "grad_norm": 0.9893650668367953,
+      "learning_rate": 0.003,
+      "loss": 4.022,
+      "step": 7596
+    },
+    {
+      "epoch": 0.07597,
+      "grad_norm": 1.1346833349811905,
+      "learning_rate": 0.003,
+      "loss": 4.0686,
+      "step": 7597
+    },
+    {
+      "epoch": 0.07598,
+      "grad_norm": 1.0292720315909252,
+      "learning_rate": 0.003,
+      "loss": 4.0629,
+      "step": 7598
+    },
+    {
+      "epoch": 0.07599,
+      "grad_norm": 1.152689224088955,
+      "learning_rate": 0.003,
+      "loss": 4.0663,
+      "step": 7599
+    },
+    {
+      "epoch": 0.076,
+      "grad_norm": 1.0383151228598986,
+      "learning_rate": 0.003,
+      "loss": 4.0544,
+      "step": 7600
+    },
+    {
+      "epoch": 0.07601,
+      "grad_norm": 1.047117789348393,
+      "learning_rate": 0.003,
+      "loss": 4.0714,
+      "step": 7601
+    },
+    {
+      "epoch": 0.07602,
+      "grad_norm": 1.106052696242171,
+      "learning_rate": 0.003,
+      "loss": 4.0465,
+      "step": 7602
+    },
+    {
+      "epoch": 0.07603,
+      "grad_norm": 1.0073145436596014,
+      "learning_rate": 0.003,
+      "loss": 4.0458,
+      "step": 7603
+    },
+    {
+      "epoch": 0.07604,
+      "grad_norm": 1.0664230896712803,
+      "learning_rate": 0.003,
+      "loss": 4.0586,
+      "step": 7604
+    },
+    {
+      "epoch": 0.07605,
+      "grad_norm": 0.9753888567376868,
+      "learning_rate": 0.003,
+      "loss": 4.0484,
+      "step": 7605
+    },
+    {
+      "epoch": 0.07606,
+      "grad_norm": 1.2030307927813533,
+      "learning_rate": 0.003,
+      "loss": 4.1116,
+      "step": 7606
+    },
+    {
+      "epoch": 0.07607,
+      "grad_norm": 0.9086575357118534,
+      "learning_rate": 0.003,
+      "loss": 4.0615,
+      "step": 7607
+    },
+    {
+      "epoch": 0.07608,
+      "grad_norm": 0.9273700226274223,
+      "learning_rate": 0.003,
+      "loss": 4.074,
+      "step": 7608
+    },
+    {
+      "epoch": 0.07609,
+      "grad_norm": 1.003689493135648,
+      "learning_rate": 0.003,
+      "loss": 4.0503,
+      "step": 7609
+    },
+    {
+      "epoch": 0.0761,
+      "grad_norm": 1.1377269625068565,
+      "learning_rate": 0.003,
+      "loss": 4.0461,
+      "step": 7610
+    },
+    {
+      "epoch": 0.07611,
+      "grad_norm": 0.9384559805000248,
+      "learning_rate": 0.003,
+      "loss": 4.07,
+      "step": 7611
+    },
+    {
+      "epoch": 0.07612,
+      "grad_norm": 1.1271298131099274,
+      "learning_rate": 0.003,
+      "loss": 4.0408,
+      "step": 7612
+    },
+    {
+      "epoch": 0.07613,
+      "grad_norm": 1.1197631113115567,
+      "learning_rate": 0.003,
+      "loss": 4.0384,
+      "step": 7613
+    },
+    {
+      "epoch": 0.07614,
+      "grad_norm": 1.0895475603662517,
+      "learning_rate": 0.003,
+      "loss": 4.0225,
+      "step": 7614
+    },
+    {
+      "epoch": 0.07615,
+      "grad_norm": 1.1379640727683638,
+      "learning_rate": 0.003,
+      "loss": 4.0504,
+      "step": 7615
+    },
+    {
+      "epoch": 0.07616,
+      "grad_norm": 1.0509591762451105,
+      "learning_rate": 0.003,
+      "loss": 4.049,
+      "step": 7616
+    },
+    {
+      "epoch": 0.07617,
+      "grad_norm": 1.0514288577627249,
+      "learning_rate": 0.003,
+      "loss": 4.0146,
+      "step": 7617
+    },
+    {
+      "epoch": 0.07618,
+      "grad_norm": 0.9463880932847941,
+      "learning_rate": 0.003,
+      "loss": 4.0788,
+      "step": 7618
+    },
+    {
+      "epoch": 0.07619,
+      "grad_norm": 1.0151178915603503,
+      "learning_rate": 0.003,
+      "loss": 4.0565,
+      "step": 7619
+    },
+    {
+      "epoch": 0.0762,
+      "grad_norm": 1.2638113293346065,
+      "learning_rate": 0.003,
+      "loss": 4.0375,
+      "step": 7620
+    },
+    {
+      "epoch": 0.07621,
+      "grad_norm": 0.8743906611118283,
+      "learning_rate": 0.003,
+      "loss": 4.0612,
+      "step": 7621
+    },
+    {
+      "epoch": 0.07622,
+      "grad_norm": 0.9376371308176443,
+      "learning_rate": 0.003,
+      "loss": 4.0617,
+      "step": 7622
+    },
+    {
+      "epoch": 0.07623,
+      "grad_norm": 1.2162047119192658,
+      "learning_rate": 0.003,
+      "loss": 4.0489,
+      "step": 7623
+    },
+    {
+      "epoch": 0.07624,
+      "grad_norm": 0.8081909472078085,
+      "learning_rate": 0.003,
+      "loss": 4.0448,
+      "step": 7624
+    },
+    {
+      "epoch": 0.07625,
+      "grad_norm": 0.9250877782038973,
+      "learning_rate": 0.003,
+      "loss": 4.0338,
+      "step": 7625
+    },
+    {
+      "epoch": 0.07626,
+      "grad_norm": 1.105895370815446,
+      "learning_rate": 0.003,
+      "loss": 4.0535,
+      "step": 7626
+    },
+    {
+      "epoch": 0.07627,
+      "grad_norm": 1.0603656758381923,
+      "learning_rate": 0.003,
+      "loss": 4.0562,
+      "step": 7627
+    },
+    {
+      "epoch": 0.07628,
+      "grad_norm": 1.2118848294290039,
+      "learning_rate": 0.003,
+      "loss": 4.0308,
+      "step": 7628
+    },
+    {
+      "epoch": 0.07629,
+      "grad_norm": 0.9491546824024952,
+      "learning_rate": 0.003,
+      "loss": 4.0436,
+      "step": 7629
+    },
+    {
+      "epoch": 0.0763,
+      "grad_norm": 1.1470367440434606,
+      "learning_rate": 0.003,
+      "loss": 4.0348,
+      "step": 7630
+    },
+    {
+      "epoch": 0.07631,
+      "grad_norm": 0.9750737788207199,
+      "learning_rate": 0.003,
+      "loss": 4.0384,
+      "step": 7631
+    },
+    {
+      "epoch": 0.07632,
+      "grad_norm": 0.9223121244351081,
+      "learning_rate": 0.003,
+      "loss": 4.0283,
+      "step": 7632
+    },
+    {
+      "epoch": 0.07633,
+      "grad_norm": 0.9447939771060309,
+      "learning_rate": 0.003,
+      "loss": 4.0354,
+      "step": 7633
+    },
+    {
+      "epoch": 0.07634,
+      "grad_norm": 1.0656262121811246,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 7634
+    },
+    {
+      "epoch": 0.07635,
+      "grad_norm": 1.1293493421594012,
+      "learning_rate": 0.003,
+      "loss": 4.0509,
+      "step": 7635
+    },
+    {
+      "epoch": 0.07636,
+      "grad_norm": 1.1936557881689314,
+      "learning_rate": 0.003,
+      "loss": 4.028,
+      "step": 7636
+    },
+    {
+      "epoch": 0.07637,
+      "grad_norm": 0.9929307055596185,
+      "learning_rate": 0.003,
+      "loss": 4.0687,
+      "step": 7637
+    },
+    {
+      "epoch": 0.07638,
+      "grad_norm": 1.0228636969536977,
+      "learning_rate": 0.003,
+      "loss": 4.0445,
+      "step": 7638
+    },
+    {
+      "epoch": 0.07639,
+      "grad_norm": 0.9823441975383835,
+      "learning_rate": 0.003,
+      "loss": 4.0487,
+      "step": 7639
+    },
+    {
+      "epoch": 0.0764,
+      "grad_norm": 1.1338997019966783,
+      "learning_rate": 0.003,
+      "loss": 4.0806,
+      "step": 7640
+    },
+    {
+      "epoch": 0.07641,
+      "grad_norm": 1.0093913734568616,
+      "learning_rate": 0.003,
+      "loss": 4.0741,
+      "step": 7641
+    },
+    {
+      "epoch": 0.07642,
+      "grad_norm": 1.125461302352058,
+      "learning_rate": 0.003,
+      "loss": 4.0679,
+      "step": 7642
+    },
+    {
+      "epoch": 0.07643,
+      "grad_norm": 1.0514677154433425,
+      "learning_rate": 0.003,
+      "loss": 4.0433,
+      "step": 7643
+    },
+    {
+      "epoch": 0.07644,
+      "grad_norm": 1.0553593637554315,
+      "learning_rate": 0.003,
+      "loss": 4.0296,
+      "step": 7644
+    },
+    {
+      "epoch": 0.07645,
+      "grad_norm": 0.9805977399864129,
+      "learning_rate": 0.003,
+      "loss": 4.0459,
+      "step": 7645
+    },
+    {
+      "epoch": 0.07646,
+      "grad_norm": 1.0299202945868386,
+      "learning_rate": 0.003,
+      "loss": 4.0495,
+      "step": 7646
+    },
+    {
+      "epoch": 0.07647,
+      "grad_norm": 1.3176337185067677,
+      "learning_rate": 0.003,
+      "loss": 4.0349,
+      "step": 7647
+    },
+    {
+      "epoch": 0.07648,
+      "grad_norm": 1.1005654564479557,
+      "learning_rate": 0.003,
+      "loss": 4.0512,
+      "step": 7648
+    },
+    {
+      "epoch": 0.07649,
+      "grad_norm": 1.0905324219896768,
+      "learning_rate": 0.003,
+      "loss": 4.0515,
+      "step": 7649
+    },
+    {
+      "epoch": 0.0765,
+      "grad_norm": 0.8802249156939694,
+      "learning_rate": 0.003,
+      "loss": 4.031,
+      "step": 7650
+    },
+    {
+      "epoch": 0.07651,
+      "grad_norm": 0.9072009257031926,
+      "learning_rate": 0.003,
+      "loss": 4.0419,
+      "step": 7651
+    },
+    {
+      "epoch": 0.07652,
+      "grad_norm": 0.9346164551384647,
+      "learning_rate": 0.003,
+      "loss": 4.0399,
+      "step": 7652
+    },
+    {
+      "epoch": 0.07653,
+      "grad_norm": 0.980298089231196,
+      "learning_rate": 0.003,
+      "loss": 4.019,
+      "step": 7653
+    },
+    {
+      "epoch": 0.07654,
+      "grad_norm": 1.0968929008117443,
+      "learning_rate": 0.003,
+      "loss": 4.0539,
+      "step": 7654
+    },
+    {
+      "epoch": 0.07655,
+      "grad_norm": 0.9601657405610515,
+      "learning_rate": 0.003,
+      "loss": 4.0569,
+      "step": 7655
+    },
+    {
+      "epoch": 0.07656,
+      "grad_norm": 1.258852527855847,
+      "learning_rate": 0.003,
+      "loss": 4.0714,
+      "step": 7656
+    },
+    {
+      "epoch": 0.07657,
+      "grad_norm": 0.8924445280055346,
+      "learning_rate": 0.003,
+      "loss": 4.0637,
+      "step": 7657
+    },
+    {
+      "epoch": 0.07658,
+      "grad_norm": 1.016577242205408,
+      "learning_rate": 0.003,
+      "loss": 4.0491,
+      "step": 7658
+    },
+    {
+      "epoch": 0.07659,
+      "grad_norm": 1.1676547370839572,
+      "learning_rate": 0.003,
+      "loss": 4.0479,
+      "step": 7659
+    },
+    {
+      "epoch": 0.0766,
+      "grad_norm": 1.0842253182708932,
+      "learning_rate": 0.003,
+      "loss": 4.0864,
+      "step": 7660
+    },
+    {
+      "epoch": 0.07661,
+      "grad_norm": 1.233560763252066,
+      "learning_rate": 0.003,
+      "loss": 4.0478,
+      "step": 7661
+    },
+    {
+      "epoch": 0.07662,
+      "grad_norm": 1.0033408895801803,
+      "learning_rate": 0.003,
+      "loss": 4.0731,
+      "step": 7662
+    },
+    {
+      "epoch": 0.07663,
+      "grad_norm": 1.2643790027991915,
+      "learning_rate": 0.003,
+      "loss": 4.0749,
+      "step": 7663
+    },
+    {
+      "epoch": 0.07664,
+      "grad_norm": 1.023325848782995,
+      "learning_rate": 0.003,
+      "loss": 4.0613,
+      "step": 7664
+    },
+    {
+      "epoch": 0.07665,
+      "grad_norm": 1.0799510581720673,
+      "learning_rate": 0.003,
+      "loss": 4.0334,
+      "step": 7665
+    },
+    {
+      "epoch": 0.07666,
+      "grad_norm": 1.108909756809187,
+      "learning_rate": 0.003,
+      "loss": 4.0523,
+      "step": 7666
+    },
+    {
+      "epoch": 0.07667,
+      "grad_norm": 0.8955114573010688,
+      "learning_rate": 0.003,
+      "loss": 4.0557,
+      "step": 7667
+    },
+    {
+      "epoch": 0.07668,
+      "grad_norm": 0.8923297593100042,
+      "learning_rate": 0.003,
+      "loss": 4.0448,
+      "step": 7668
+    },
+    {
+      "epoch": 0.07669,
+      "grad_norm": 1.0249046055145887,
+      "learning_rate": 0.003,
+      "loss": 4.0614,
+      "step": 7669
+    },
+    {
+      "epoch": 0.0767,
+      "grad_norm": 1.2053116597026634,
+      "learning_rate": 0.003,
+      "loss": 4.033,
+      "step": 7670
+    },
+    {
+      "epoch": 0.07671,
+      "grad_norm": 1.0082941639633163,
+      "learning_rate": 0.003,
+      "loss": 4.0691,
+      "step": 7671
+    },
+    {
+      "epoch": 0.07672,
+      "grad_norm": 1.2734345360500605,
+      "learning_rate": 0.003,
+      "loss": 4.0573,
+      "step": 7672
+    },
+    {
+      "epoch": 0.07673,
+      "grad_norm": 0.9281784475022431,
+      "learning_rate": 0.003,
+      "loss": 4.0818,
+      "step": 7673
+    },
+    {
+      "epoch": 0.07674,
+      "grad_norm": 1.0050293522340423,
+      "learning_rate": 0.003,
+      "loss": 4.0469,
+      "step": 7674
+    },
+    {
+      "epoch": 0.07675,
+      "grad_norm": 1.1578682951290742,
+      "learning_rate": 0.003,
+      "loss": 4.047,
+      "step": 7675
+    },
+    {
+      "epoch": 0.07676,
+      "grad_norm": 1.0107218219879721,
+      "learning_rate": 0.003,
+      "loss": 4.0378,
+      "step": 7676
+    },
+    {
+      "epoch": 0.07677,
+      "grad_norm": 1.122086603391903,
+      "learning_rate": 0.003,
+      "loss": 4.0429,
+      "step": 7677
+    },
+    {
+      "epoch": 0.07678,
+      "grad_norm": 1.0817490316966227,
+      "learning_rate": 0.003,
+      "loss": 4.0773,
+      "step": 7678
+    },
+    {
+      "epoch": 0.07679,
+      "grad_norm": 1.0060234752712105,
+      "learning_rate": 0.003,
+      "loss": 4.0682,
+      "step": 7679
+    },
+    {
+      "epoch": 0.0768,
+      "grad_norm": 1.0542727480500718,
+      "learning_rate": 0.003,
+      "loss": 4.0388,
+      "step": 7680
+    },
+    {
+      "epoch": 0.07681,
+      "grad_norm": 0.9906102704846765,
+      "learning_rate": 0.003,
+      "loss": 4.049,
+      "step": 7681
+    },
+    {
+      "epoch": 0.07682,
+      "grad_norm": 1.1799698353197907,
+      "learning_rate": 0.003,
+      "loss": 4.0395,
+      "step": 7682
+    },
+    {
+      "epoch": 0.07683,
+      "grad_norm": 0.9827379813572511,
+      "learning_rate": 0.003,
+      "loss": 4.048,
+      "step": 7683
+    },
+    {
+      "epoch": 0.07684,
+      "grad_norm": 1.0627107640985607,
+      "learning_rate": 0.003,
+      "loss": 4.0678,
+      "step": 7684
+    },
+    {
+      "epoch": 0.07685,
+      "grad_norm": 0.8775795962977772,
+      "learning_rate": 0.003,
+      "loss": 4.0394,
+      "step": 7685
+    },
+    {
+      "epoch": 0.07686,
+      "grad_norm": 0.7639644005133086,
+      "learning_rate": 0.003,
+      "loss": 4.0388,
+      "step": 7686
+    },
+    {
+      "epoch": 0.07687,
+      "grad_norm": 0.8605351853455829,
+      "learning_rate": 0.003,
+      "loss": 4.0551,
+      "step": 7687
+    },
+    {
+      "epoch": 0.07688,
+      "grad_norm": 0.9162241897881261,
+      "learning_rate": 0.003,
+      "loss": 4.0492,
+      "step": 7688
+    },
+    {
+      "epoch": 0.07689,
+      "grad_norm": 1.2441994585763008,
+      "learning_rate": 0.003,
+      "loss": 4.0393,
+      "step": 7689
+    },
+    {
+      "epoch": 0.0769,
+      "grad_norm": 1.0783654477764117,
+      "learning_rate": 0.003,
+      "loss": 4.0636,
+      "step": 7690
+    },
+    {
+      "epoch": 0.07691,
+      "grad_norm": 0.9813259733202012,
+      "learning_rate": 0.003,
+      "loss": 4.0562,
+      "step": 7691
+    },
+    {
+      "epoch": 0.07692,
+      "grad_norm": 1.1132849435616365,
+      "learning_rate": 0.003,
+      "loss": 4.0676,
+      "step": 7692
+    },
+    {
+      "epoch": 0.07693,
+      "grad_norm": 0.968186114392162,
+      "learning_rate": 0.003,
+      "loss": 4.0368,
+      "step": 7693
+    },
+    {
+      "epoch": 0.07694,
+      "grad_norm": 1.1180368633304685,
+      "learning_rate": 0.003,
+      "loss": 4.0423,
+      "step": 7694
+    },
+    {
+      "epoch": 0.07695,
+      "grad_norm": 1.018150504213498,
+      "learning_rate": 0.003,
+      "loss": 4.0628,
+      "step": 7695
+    },
+    {
+      "epoch": 0.07696,
+      "grad_norm": 1.1134933391530109,
+      "learning_rate": 0.003,
+      "loss": 4.0017,
+      "step": 7696
+    },
+    {
+      "epoch": 0.07697,
+      "grad_norm": 1.112740567601611,
+      "learning_rate": 0.003,
+      "loss": 4.0846,
+      "step": 7697
+    },
+    {
+      "epoch": 0.07698,
+      "grad_norm": 1.035646453959349,
+      "learning_rate": 0.003,
+      "loss": 4.0586,
+      "step": 7698
+    },
+    {
+      "epoch": 0.07699,
+      "grad_norm": 1.1776754763679083,
+      "learning_rate": 0.003,
+      "loss": 4.089,
+      "step": 7699
+    },
+    {
+      "epoch": 0.077,
+      "grad_norm": 1.0846924700885932,
+      "learning_rate": 0.003,
+      "loss": 4.0574,
+      "step": 7700
+    },
+    {
+      "epoch": 0.07701,
+      "grad_norm": 1.1355756172933456,
+      "learning_rate": 0.003,
+      "loss": 4.0374,
+      "step": 7701
+    },
+    {
+      "epoch": 0.07702,
+      "grad_norm": 1.2445054567640432,
+      "learning_rate": 0.003,
+      "loss": 4.0441,
+      "step": 7702
+    },
+    {
+      "epoch": 0.07703,
+      "grad_norm": 0.778195828814493,
+      "learning_rate": 0.003,
+      "loss": 4.0516,
+      "step": 7703
+    },
+    {
+      "epoch": 0.07704,
+      "grad_norm": 0.7695651423057185,
+      "learning_rate": 0.003,
+      "loss": 4.0515,
+      "step": 7704
+    },
+    {
+      "epoch": 0.07705,
+      "grad_norm": 0.8208723519118769,
+      "learning_rate": 0.003,
+      "loss": 4.0325,
+      "step": 7705
+    },
+    {
+      "epoch": 0.07706,
+      "grad_norm": 1.0591846703576349,
+      "learning_rate": 0.003,
+      "loss": 4.0574,
+      "step": 7706
+    },
+    {
+      "epoch": 0.07707,
+      "grad_norm": 1.4331501433140448,
+      "learning_rate": 0.003,
+      "loss": 4.0629,
+      "step": 7707
+    },
+    {
+      "epoch": 0.07708,
+      "grad_norm": 0.8586416225127098,
+      "learning_rate": 0.003,
+      "loss": 4.0661,
+      "step": 7708
+    },
+    {
+      "epoch": 0.07709,
+      "grad_norm": 0.9896715342897399,
+      "learning_rate": 0.003,
+      "loss": 4.0531,
+      "step": 7709
+    },
+    {
+      "epoch": 0.0771,
+      "grad_norm": 1.0247102942561306,
+      "learning_rate": 0.003,
+      "loss": 4.0286,
+      "step": 7710
+    },
+    {
+      "epoch": 0.07711,
+      "grad_norm": 0.9573824195950065,
+      "learning_rate": 0.003,
+      "loss": 4.0344,
+      "step": 7711
+    },
+    {
+      "epoch": 0.07712,
+      "grad_norm": 0.9223979651594529,
+      "learning_rate": 0.003,
+      "loss": 4.0435,
+      "step": 7712
+    },
+    {
+      "epoch": 0.07713,
+      "grad_norm": 1.123004149058808,
+      "learning_rate": 0.003,
+      "loss": 4.0234,
+      "step": 7713
+    },
+    {
+      "epoch": 0.07714,
+      "grad_norm": 1.1362346630117264,
+      "learning_rate": 0.003,
+      "loss": 4.0278,
+      "step": 7714
+    },
+    {
+      "epoch": 0.07715,
+      "grad_norm": 0.9873091975110139,
+      "learning_rate": 0.003,
+      "loss": 4.0505,
+      "step": 7715
+    },
+    {
+      "epoch": 0.07716,
+      "grad_norm": 1.1616502010150347,
+      "learning_rate": 0.003,
+      "loss": 4.0362,
+      "step": 7716
+    },
+    {
+      "epoch": 0.07717,
+      "grad_norm": 1.1465651911213932,
+      "learning_rate": 0.003,
+      "loss": 4.0328,
+      "step": 7717
+    },
+    {
+      "epoch": 0.07718,
+      "grad_norm": 1.07816656816876,
+      "learning_rate": 0.003,
+      "loss": 4.0732,
+      "step": 7718
+    },
+    {
+      "epoch": 0.07719,
+      "grad_norm": 0.879915285523726,
+      "learning_rate": 0.003,
+      "loss": 4.0398,
+      "step": 7719
+    },
+    {
+      "epoch": 0.0772,
+      "grad_norm": 0.8694587190603413,
+      "learning_rate": 0.003,
+      "loss": 4.0681,
+      "step": 7720
+    },
+    {
+      "epoch": 0.07721,
+      "grad_norm": 0.9644244720161652,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 7721
+    },
+    {
+      "epoch": 0.07722,
+      "grad_norm": 1.0848954562449826,
+      "learning_rate": 0.003,
+      "loss": 4.0671,
+      "step": 7722
+    },
+    {
+      "epoch": 0.07723,
+      "grad_norm": 1.2394893053064837,
+      "learning_rate": 0.003,
+      "loss": 4.0754,
+      "step": 7723
+    },
+    {
+      "epoch": 0.07724,
+      "grad_norm": 0.9558592147764633,
+      "learning_rate": 0.003,
+      "loss": 4.0488,
+      "step": 7724
+    },
+    {
+      "epoch": 0.07725,
+      "grad_norm": 1.1959360495832247,
+      "learning_rate": 0.003,
+      "loss": 4.0466,
+      "step": 7725
+    },
+    {
+      "epoch": 0.07726,
+      "grad_norm": 0.9517967220511826,
+      "learning_rate": 0.003,
+      "loss": 4.0652,
+      "step": 7726
+    },
+    {
+      "epoch": 0.07727,
+      "grad_norm": 1.1579653372604413,
+      "learning_rate": 0.003,
+      "loss": 4.0306,
+      "step": 7727
+    },
+    {
+      "epoch": 0.07728,
+      "grad_norm": 1.1068848248552594,
+      "learning_rate": 0.003,
+      "loss": 4.0411,
+      "step": 7728
+    },
+    {
+      "epoch": 0.07729,
+      "grad_norm": 0.9527663111508775,
+      "learning_rate": 0.003,
+      "loss": 4.0615,
+      "step": 7729
+    },
+    {
+      "epoch": 0.0773,
+      "grad_norm": 0.82211244826477,
+      "learning_rate": 0.003,
+      "loss": 4.0496,
+      "step": 7730
+    },
+    {
+      "epoch": 0.07731,
+      "grad_norm": 0.8528406417127462,
+      "learning_rate": 0.003,
+      "loss": 4.0378,
+      "step": 7731
+    },
+    {
+      "epoch": 0.07732,
+      "grad_norm": 0.9703295806343482,
+      "learning_rate": 0.003,
+      "loss": 4.0531,
+      "step": 7732
+    },
+    {
+      "epoch": 0.07733,
+      "grad_norm": 1.233288598154536,
+      "learning_rate": 0.003,
+      "loss": 4.0637,
+      "step": 7733
+    },
+    {
+      "epoch": 0.07734,
+      "grad_norm": 1.1910360013875985,
+      "learning_rate": 0.003,
+      "loss": 4.0635,
+      "step": 7734
+    },
+    {
+      "epoch": 0.07735,
+      "grad_norm": 1.1528280039800138,
+      "learning_rate": 0.003,
+      "loss": 4.0384,
+      "step": 7735
+    },
+    {
+      "epoch": 0.07736,
+      "grad_norm": 1.163933383869731,
+      "learning_rate": 0.003,
+      "loss": 4.0635,
+      "step": 7736
+    },
+    {
+      "epoch": 0.07737,
+      "grad_norm": 0.9130491112837049,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 7737
+    },
+    {
+      "epoch": 0.07738,
+      "grad_norm": 0.9070020333481247,
+      "learning_rate": 0.003,
+      "loss": 4.0379,
+      "step": 7738
+    },
+    {
+      "epoch": 0.07739,
+      "grad_norm": 0.9956178440672993,
+      "learning_rate": 0.003,
+      "loss": 4.0491,
+      "step": 7739
+    },
+    {
+      "epoch": 0.0774,
+      "grad_norm": 1.2016421176014782,
+      "learning_rate": 0.003,
+      "loss": 4.0329,
+      "step": 7740
+    },
+    {
+      "epoch": 0.07741,
+      "grad_norm": 1.0114172630176164,
+      "learning_rate": 0.003,
+      "loss": 4.0368,
+      "step": 7741
+    },
+    {
+      "epoch": 0.07742,
+      "grad_norm": 1.0767057084476068,
+      "learning_rate": 0.003,
+      "loss": 4.0258,
+      "step": 7742
+    },
+    {
+      "epoch": 0.07743,
+      "grad_norm": 1.140758765621141,
+      "learning_rate": 0.003,
+      "loss": 4.0487,
+      "step": 7743
+    },
+    {
+      "epoch": 0.07744,
+      "grad_norm": 0.941795514834158,
+      "learning_rate": 0.003,
+      "loss": 4.0431,
+      "step": 7744
+    },
+    {
+      "epoch": 0.07745,
+      "grad_norm": 0.947968983480798,
+      "learning_rate": 0.003,
+      "loss": 4.0436,
+      "step": 7745
+    },
+    {
+      "epoch": 0.07746,
+      "grad_norm": 1.1438558351135404,
+      "learning_rate": 0.003,
+      "loss": 4.0519,
+      "step": 7746
+    },
+    {
+      "epoch": 0.07747,
+      "grad_norm": 1.1271433369048343,
+      "learning_rate": 0.003,
+      "loss": 4.055,
+      "step": 7747
+    },
+    {
+      "epoch": 0.07748,
+      "grad_norm": 1.068011422911948,
+      "learning_rate": 0.003,
+      "loss": 4.0779,
+      "step": 7748
+    },
+    {
+      "epoch": 0.07749,
+      "grad_norm": 1.1673301793274091,
+      "learning_rate": 0.003,
+      "loss": 4.077,
+      "step": 7749
+    },
+    {
+      "epoch": 0.0775,
+      "grad_norm": 0.9871201697529424,
+      "learning_rate": 0.003,
+      "loss": 4.0304,
+      "step": 7750
+    },
+    {
+      "epoch": 0.07751,
+      "grad_norm": 1.1865630409028847,
+      "learning_rate": 0.003,
+      "loss": 4.0487,
+      "step": 7751
+    },
+    {
+      "epoch": 0.07752,
+      "grad_norm": 0.9135508252960873,
+      "learning_rate": 0.003,
+      "loss": 4.0648,
+      "step": 7752
+    },
+    {
+      "epoch": 0.07753,
+      "grad_norm": 1.0491466617434224,
+      "learning_rate": 0.003,
+      "loss": 4.0338,
+      "step": 7753
+    },
+    {
+      "epoch": 0.07754,
+      "grad_norm": 1.0597062148757896,
+      "learning_rate": 0.003,
+      "loss": 4.0448,
+      "step": 7754
+    },
+    {
+      "epoch": 0.07755,
+      "grad_norm": 1.0522100404904275,
+      "learning_rate": 0.003,
+      "loss": 4.0506,
+      "step": 7755
+    },
+    {
+      "epoch": 0.07756,
+      "grad_norm": 1.1298394191949892,
+      "learning_rate": 0.003,
+      "loss": 4.0644,
+      "step": 7756
+    },
+    {
+      "epoch": 0.07757,
+      "grad_norm": 0.9473952380257887,
+      "learning_rate": 0.003,
+      "loss": 4.0334,
+      "step": 7757
+    },
+    {
+      "epoch": 0.07758,
+      "grad_norm": 1.0741481447103787,
+      "learning_rate": 0.003,
+      "loss": 4.0321,
+      "step": 7758
+    },
+    {
+      "epoch": 0.07759,
+      "grad_norm": 1.1033185053370784,
+      "learning_rate": 0.003,
+      "loss": 4.0448,
+      "step": 7759
+    },
+    {
+      "epoch": 0.0776,
+      "grad_norm": 0.946764770560814,
+      "learning_rate": 0.003,
+      "loss": 4.0215,
+      "step": 7760
+    },
+    {
+      "epoch": 0.07761,
+      "grad_norm": 1.1024391112725458,
+      "learning_rate": 0.003,
+      "loss": 4.0647,
+      "step": 7761
+    },
+    {
+      "epoch": 0.07762,
+      "grad_norm": 1.0452461471198484,
+      "learning_rate": 0.003,
+      "loss": 4.0462,
+      "step": 7762
+    },
+    {
+      "epoch": 0.07763,
+      "grad_norm": 1.1390912229610954,
+      "learning_rate": 0.003,
+      "loss": 4.0643,
+      "step": 7763
+    },
+    {
+      "epoch": 0.07764,
+      "grad_norm": 1.0090027084277429,
+      "learning_rate": 0.003,
+      "loss": 4.0693,
+      "step": 7764
+    },
+    {
+      "epoch": 0.07765,
+      "grad_norm": 1.1375674959522515,
+      "learning_rate": 0.003,
+      "loss": 4.0537,
+      "step": 7765
+    },
+    {
+      "epoch": 0.07766,
+      "grad_norm": 1.2062883449499884,
+      "learning_rate": 0.003,
+      "loss": 4.0355,
+      "step": 7766
+    },
+    {
+      "epoch": 0.07767,
+      "grad_norm": 1.1804562993590413,
+      "learning_rate": 0.003,
+      "loss": 4.0355,
+      "step": 7767
+    },
+    {
+      "epoch": 0.07768,
+      "grad_norm": 1.0684635284018162,
+      "learning_rate": 0.003,
+      "loss": 4.0469,
+      "step": 7768
+    },
+    {
+      "epoch": 0.07769,
+      "grad_norm": 1.1159137124746297,
+      "learning_rate": 0.003,
+      "loss": 4.0537,
+      "step": 7769
+    },
+    {
+      "epoch": 0.0777,
+      "grad_norm": 1.0678717136438307,
+      "learning_rate": 0.003,
+      "loss": 4.065,
+      "step": 7770
+    },
+    {
+      "epoch": 0.07771,
+      "grad_norm": 0.9559701342030749,
+      "learning_rate": 0.003,
+      "loss": 4.0601,
+      "step": 7771
+    },
+    {
+      "epoch": 0.07772,
+      "grad_norm": 1.0062287758341482,
+      "learning_rate": 0.003,
+      "loss": 4.0543,
+      "step": 7772
+    },
+    {
+      "epoch": 0.07773,
+      "grad_norm": 1.0966855486122142,
+      "learning_rate": 0.003,
+      "loss": 4.0267,
+      "step": 7773
+    },
+    {
+      "epoch": 0.07774,
+      "grad_norm": 0.9558412183434721,
+      "learning_rate": 0.003,
+      "loss": 4.0484,
+      "step": 7774
+    },
+    {
+      "epoch": 0.07775,
+      "grad_norm": 1.2137613391546822,
+      "learning_rate": 0.003,
+      "loss": 4.0647,
+      "step": 7775
+    },
+    {
+      "epoch": 0.07776,
+      "grad_norm": 0.7820218930391715,
+      "learning_rate": 0.003,
+      "loss": 4.0443,
+      "step": 7776
+    },
+    {
+      "epoch": 0.07777,
+      "grad_norm": 0.7196952870861936,
+      "learning_rate": 0.003,
+      "loss": 4.0562,
+      "step": 7777
+    },
+    {
+      "epoch": 0.07778,
+      "grad_norm": 0.9999098617704734,
+      "learning_rate": 0.003,
+      "loss": 4.0668,
+      "step": 7778
+    },
+    {
+      "epoch": 0.07779,
+      "grad_norm": 1.4627741712221116,
+      "learning_rate": 0.003,
+      "loss": 4.0399,
+      "step": 7779
+    },
+    {
+      "epoch": 0.0778,
+      "grad_norm": 0.9529840352238272,
+      "learning_rate": 0.003,
+      "loss": 4.0275,
+      "step": 7780
+    },
+    {
+      "epoch": 0.07781,
+      "grad_norm": 1.2285518753842994,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 7781
+    },
+    {
+      "epoch": 0.07782,
+      "grad_norm": 1.1404528733856336,
+      "learning_rate": 0.003,
+      "loss": 4.0406,
+      "step": 7782
+    },
+    {
+      "epoch": 0.07783,
+      "grad_norm": 1.1975303734486777,
+      "learning_rate": 0.003,
+      "loss": 4.0512,
+      "step": 7783
+    },
+    {
+      "epoch": 0.07784,
+      "grad_norm": 1.0934483916125426,
+      "learning_rate": 0.003,
+      "loss": 4.0452,
+      "step": 7784
+    },
+    {
+      "epoch": 0.07785,
+      "grad_norm": 1.108972454763063,
+      "learning_rate": 0.003,
+      "loss": 4.0734,
+      "step": 7785
+    },
+    {
+      "epoch": 0.07786,
+      "grad_norm": 0.9879792571747996,
+      "learning_rate": 0.003,
+      "loss": 4.0372,
+      "step": 7786
+    },
+    {
+      "epoch": 0.07787,
+      "grad_norm": 1.2023448684879607,
+      "learning_rate": 0.003,
+      "loss": 4.0284,
+      "step": 7787
+    },
+    {
+      "epoch": 0.07788,
+      "grad_norm": 0.9538752828642438,
+      "learning_rate": 0.003,
+      "loss": 4.0417,
+      "step": 7788
+    },
+    {
+      "epoch": 0.07789,
+      "grad_norm": 1.0308214934344375,
+      "learning_rate": 0.003,
+      "loss": 4.0275,
+      "step": 7789
+    },
+    {
+      "epoch": 0.0779,
+      "grad_norm": 1.0650283574186172,
+      "learning_rate": 0.003,
+      "loss": 4.0806,
+      "step": 7790
+    },
+    {
+      "epoch": 0.07791,
+      "grad_norm": 0.9941845094809683,
+      "learning_rate": 0.003,
+      "loss": 4.0381,
+      "step": 7791
+    },
+    {
+      "epoch": 0.07792,
+      "grad_norm": 1.0225769158424693,
+      "learning_rate": 0.003,
+      "loss": 4.024,
+      "step": 7792
+    },
+    {
+      "epoch": 0.07793,
+      "grad_norm": 0.9877404519817934,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 7793
+    },
+    {
+      "epoch": 0.07794,
+      "grad_norm": 1.1151886303616545,
+      "learning_rate": 0.003,
+      "loss": 4.0809,
+      "step": 7794
+    },
+    {
+      "epoch": 0.07795,
+      "grad_norm": 0.8354209579395252,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 7795
+    },
+    {
+      "epoch": 0.07796,
+      "grad_norm": 0.8544590286302752,
+      "learning_rate": 0.003,
+      "loss": 4.0347,
+      "step": 7796
+    },
+    {
+      "epoch": 0.07797,
+      "grad_norm": 0.9428013064341034,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 7797
+    },
+    {
+      "epoch": 0.07798,
+      "grad_norm": 1.1393969133912794,
+      "learning_rate": 0.003,
+      "loss": 4.0745,
+      "step": 7798
+    },
+    {
+      "epoch": 0.07799,
+      "grad_norm": 1.1641475465913624,
+      "learning_rate": 0.003,
+      "loss": 4.0806,
+      "step": 7799
+    },
+    {
+      "epoch": 0.078,
+      "grad_norm": 1.0641337854621304,
+      "learning_rate": 0.003,
+      "loss": 4.0365,
+      "step": 7800
+    },
+    {
+      "epoch": 0.07801,
+      "grad_norm": 1.3641996826697704,
+      "learning_rate": 0.003,
+      "loss": 4.055,
+      "step": 7801
+    },
+    {
+      "epoch": 0.07802,
+      "grad_norm": 1.1452620077902496,
+      "learning_rate": 0.003,
+      "loss": 4.0327,
+      "step": 7802
+    },
+    {
+      "epoch": 0.07803,
+      "grad_norm": 1.1170775188247835,
+      "learning_rate": 0.003,
+      "loss": 4.0409,
+      "step": 7803
+    },
+    {
+      "epoch": 0.07804,
+      "grad_norm": 1.296395122652786,
+      "learning_rate": 0.003,
+      "loss": 4.0696,
+      "step": 7804
+    },
+    {
+      "epoch": 0.07805,
+      "grad_norm": 0.9159841573401555,
+      "learning_rate": 0.003,
+      "loss": 4.0505,
+      "step": 7805
+    },
+    {
+      "epoch": 0.07806,
+      "grad_norm": 1.0076146294344561,
+      "learning_rate": 0.003,
+      "loss": 4.0657,
+      "step": 7806
+    },
+    {
+      "epoch": 0.07807,
+      "grad_norm": 1.0288357992024348,
+      "learning_rate": 0.003,
+      "loss": 4.068,
+      "step": 7807
+    },
+    {
+      "epoch": 0.07808,
+      "grad_norm": 0.9962519995561984,
+      "learning_rate": 0.003,
+      "loss": 4.0352,
+      "step": 7808
+    },
+    {
+      "epoch": 0.07809,
+      "grad_norm": 0.9318878928153395,
+      "learning_rate": 0.003,
+      "loss": 4.0501,
+      "step": 7809
+    },
+    {
+      "epoch": 0.0781,
+      "grad_norm": 0.9578002546946285,
+      "learning_rate": 0.003,
+      "loss": 4.0328,
+      "step": 7810
+    },
+    {
+      "epoch": 0.07811,
+      "grad_norm": 1.2362712301307206,
+      "learning_rate": 0.003,
+      "loss": 4.0478,
+      "step": 7811
+    },
+    {
+      "epoch": 0.07812,
+      "grad_norm": 1.0902092466065818,
+      "learning_rate": 0.003,
+      "loss": 4.0461,
+      "step": 7812
+    },
+    {
+      "epoch": 0.07813,
+      "grad_norm": 1.0656770200110066,
+      "learning_rate": 0.003,
+      "loss": 4.0609,
+      "step": 7813
+    },
+    {
+      "epoch": 0.07814,
+      "grad_norm": 0.9616877242084323,
+      "learning_rate": 0.003,
+      "loss": 4.0843,
+      "step": 7814
+    },
+    {
+      "epoch": 0.07815,
+      "grad_norm": 1.0827894290436153,
+      "learning_rate": 0.003,
+      "loss": 4.0511,
+      "step": 7815
+    },
+    {
+      "epoch": 0.07816,
+      "grad_norm": 0.951863534026633,
+      "learning_rate": 0.003,
+      "loss": 4.0369,
+      "step": 7816
+    },
+    {
+      "epoch": 0.07817,
+      "grad_norm": 1.0693420699978473,
+      "learning_rate": 0.003,
+      "loss": 4.0449,
+      "step": 7817
+    },
+    {
+      "epoch": 0.07818,
+      "grad_norm": 1.1793875665990308,
+      "learning_rate": 0.003,
+      "loss": 4.0405,
+      "step": 7818
+    },
+    {
+      "epoch": 0.07819,
+      "grad_norm": 0.9806237203551665,
+      "learning_rate": 0.003,
+      "loss": 4.0232,
+      "step": 7819
+    },
+    {
+      "epoch": 0.0782,
+      "grad_norm": 1.074778244039972,
+      "learning_rate": 0.003,
+      "loss": 4.0707,
+      "step": 7820
+    },
+    {
+      "epoch": 0.07821,
+      "grad_norm": 0.9930768173537193,
+      "learning_rate": 0.003,
+      "loss": 4.0324,
+      "step": 7821
+    },
+    {
+      "epoch": 0.07822,
+      "grad_norm": 1.0685158401451227,
+      "learning_rate": 0.003,
+      "loss": 4.0251,
+      "step": 7822
+    },
+    {
+      "epoch": 0.07823,
+      "grad_norm": 1.0871181323072148,
+      "learning_rate": 0.003,
+      "loss": 4.0412,
+      "step": 7823
+    },
+    {
+      "epoch": 0.07824,
+      "grad_norm": 1.21966298784846,
+      "learning_rate": 0.003,
+      "loss": 4.0758,
+      "step": 7824
+    },
+    {
+      "epoch": 0.07825,
+      "grad_norm": 1.0804803596808354,
+      "learning_rate": 0.003,
+      "loss": 4.0371,
+      "step": 7825
+    },
+    {
+      "epoch": 0.07826,
+      "grad_norm": 1.019090987969821,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 7826
+    },
+    {
+      "epoch": 0.07827,
+      "grad_norm": 1.0988560306698911,
+      "learning_rate": 0.003,
+      "loss": 4.0557,
+      "step": 7827
+    },
+    {
+      "epoch": 0.07828,
+      "grad_norm": 1.0517679422087889,
+      "learning_rate": 0.003,
+      "loss": 4.0585,
+      "step": 7828
+    },
+    {
+      "epoch": 0.07829,
+      "grad_norm": 0.9621053073017727,
+      "learning_rate": 0.003,
+      "loss": 4.0619,
+      "step": 7829
+    },
+    {
+      "epoch": 0.0783,
+      "grad_norm": 0.9689820333439864,
+      "learning_rate": 0.003,
+      "loss": 4.0259,
+      "step": 7830
+    },
+    {
+      "epoch": 0.07831,
+      "grad_norm": 1.023016739445038,
+      "learning_rate": 0.003,
+      "loss": 4.0384,
+      "step": 7831
+    },
+    {
+      "epoch": 0.07832,
+      "grad_norm": 0.9984156227464857,
+      "learning_rate": 0.003,
+      "loss": 4.0468,
+      "step": 7832
+    },
+    {
+      "epoch": 0.07833,
+      "grad_norm": 0.9919107518305011,
+      "learning_rate": 0.003,
+      "loss": 4.0458,
+      "step": 7833
+    },
+    {
+      "epoch": 0.07834,
+      "grad_norm": 1.1543019969673187,
+      "learning_rate": 0.003,
+      "loss": 4.0981,
+      "step": 7834
+    },
+    {
+      "epoch": 0.07835,
+      "grad_norm": 1.0681590776729737,
+      "learning_rate": 0.003,
+      "loss": 4.0592,
+      "step": 7835
+    },
+    {
+      "epoch": 0.07836,
+      "grad_norm": 1.0574570781003565,
+      "learning_rate": 0.003,
+      "loss": 4.0415,
+      "step": 7836
+    },
+    {
+      "epoch": 0.07837,
+      "grad_norm": 1.2018757893559897,
+      "learning_rate": 0.003,
+      "loss": 4.0556,
+      "step": 7837
+    },
+    {
+      "epoch": 0.07838,
+      "grad_norm": 0.9161356805849349,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 7838
+    },
+    {
+      "epoch": 0.07839,
+      "grad_norm": 0.9907463263104761,
+      "learning_rate": 0.003,
+      "loss": 4.0327,
+      "step": 7839
+    },
+    {
+      "epoch": 0.0784,
+      "grad_norm": 1.1856358777888196,
+      "learning_rate": 0.003,
+      "loss": 4.0649,
+      "step": 7840
+    },
+    {
+      "epoch": 0.07841,
+      "grad_norm": 0.8938988806605744,
+      "learning_rate": 0.003,
+      "loss": 4.0352,
+      "step": 7841
+    },
+    {
+      "epoch": 0.07842,
+      "grad_norm": 1.0190036904110218,
+      "learning_rate": 0.003,
+      "loss": 4.0337,
+      "step": 7842
+    },
+    {
+      "epoch": 0.07843,
+      "grad_norm": 1.2414498860928165,
+      "learning_rate": 0.003,
+      "loss": 4.057,
+      "step": 7843
+    },
+    {
+      "epoch": 0.07844,
+      "grad_norm": 0.8645804084986568,
+      "learning_rate": 0.003,
+      "loss": 4.0496,
+      "step": 7844
+    },
+    {
+      "epoch": 0.07845,
+      "grad_norm": 1.2555968702914302,
+      "learning_rate": 0.003,
+      "loss": 4.0658,
+      "step": 7845
+    },
+    {
+      "epoch": 0.07846,
+      "grad_norm": 1.1827535638877162,
+      "learning_rate": 0.003,
+      "loss": 4.0827,
+      "step": 7846
+    },
+    {
+      "epoch": 0.07847,
+      "grad_norm": 1.0585750613727776,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 7847
+    },
+    {
+      "epoch": 0.07848,
+      "grad_norm": 1.0251038257294671,
+      "learning_rate": 0.003,
+      "loss": 4.0683,
+      "step": 7848
+    },
+    {
+      "epoch": 0.07849,
+      "grad_norm": 1.0012756568810086,
+      "learning_rate": 0.003,
+      "loss": 4.0769,
+      "step": 7849
+    },
+    {
+      "epoch": 0.0785,
+      "grad_norm": 1.1087682350572658,
+      "learning_rate": 0.003,
+      "loss": 4.0509,
+      "step": 7850
+    },
+    {
+      "epoch": 0.07851,
+      "grad_norm": 1.0570558235082463,
+      "learning_rate": 0.003,
+      "loss": 4.0395,
+      "step": 7851
+    },
+    {
+      "epoch": 0.07852,
+      "grad_norm": 1.1870988713474258,
+      "learning_rate": 0.003,
+      "loss": 4.0778,
+      "step": 7852
+    },
+    {
+      "epoch": 0.07853,
+      "grad_norm": 1.1877824271372333,
+      "learning_rate": 0.003,
+      "loss": 4.0481,
+      "step": 7853
+    },
+    {
+      "epoch": 0.07854,
+      "grad_norm": 0.9795576119811179,
+      "learning_rate": 0.003,
+      "loss": 4.0601,
+      "step": 7854
+    },
+    {
+      "epoch": 0.07855,
+      "grad_norm": 1.0106904572896982,
+      "learning_rate": 0.003,
+      "loss": 4.0471,
+      "step": 7855
+    },
+    {
+      "epoch": 0.07856,
+      "grad_norm": 1.074012949318807,
+      "learning_rate": 0.003,
+      "loss": 4.0477,
+      "step": 7856
+    },
+    {
+      "epoch": 0.07857,
+      "grad_norm": 1.0682693133140797,
+      "learning_rate": 0.003,
+      "loss": 4.0713,
+      "step": 7857
+    },
+    {
+      "epoch": 0.07858,
+      "grad_norm": 0.9622334233718971,
+      "learning_rate": 0.003,
+      "loss": 4.0585,
+      "step": 7858
+    },
+    {
+      "epoch": 0.07859,
+      "grad_norm": 1.2435642589923817,
+      "learning_rate": 0.003,
+      "loss": 4.0367,
+      "step": 7859
+    },
+    {
+      "epoch": 0.0786,
+      "grad_norm": 0.9713678691004027,
+      "learning_rate": 0.003,
+      "loss": 4.0451,
+      "step": 7860
+    },
+    {
+      "epoch": 0.07861,
+      "grad_norm": 1.1992439198541343,
+      "learning_rate": 0.003,
+      "loss": 4.0699,
+      "step": 7861
+    },
+    {
+      "epoch": 0.07862,
+      "grad_norm": 0.9729097717840791,
+      "learning_rate": 0.003,
+      "loss": 4.0833,
+      "step": 7862
+    },
+    {
+      "epoch": 0.07863,
+      "grad_norm": 1.2123850094787298,
+      "learning_rate": 0.003,
+      "loss": 4.0234,
+      "step": 7863
+    },
+    {
+      "epoch": 0.07864,
+      "grad_norm": 1.0554740247450138,
+      "learning_rate": 0.003,
+      "loss": 4.059,
+      "step": 7864
+    },
+    {
+      "epoch": 0.07865,
+      "grad_norm": 1.2691503318540769,
+      "learning_rate": 0.003,
+      "loss": 4.0412,
+      "step": 7865
+    },
+    {
+      "epoch": 0.07866,
+      "grad_norm": 0.9510762936067487,
+      "learning_rate": 0.003,
+      "loss": 4.0409,
+      "step": 7866
+    },
+    {
+      "epoch": 0.07867,
+      "grad_norm": 1.0063305379540366,
+      "learning_rate": 0.003,
+      "loss": 4.0292,
+      "step": 7867
+    },
+    {
+      "epoch": 0.07868,
+      "grad_norm": 1.0938115833363737,
+      "learning_rate": 0.003,
+      "loss": 4.0689,
+      "step": 7868
+    },
+    {
+      "epoch": 0.07869,
+      "grad_norm": 1.0089952434242428,
+      "learning_rate": 0.003,
+      "loss": 4.0493,
+      "step": 7869
+    },
+    {
+      "epoch": 0.0787,
+      "grad_norm": 1.0654347163848492,
+      "learning_rate": 0.003,
+      "loss": 4.0339,
+      "step": 7870
+    },
+    {
+      "epoch": 0.07871,
+      "grad_norm": 1.1907030044141855,
+      "learning_rate": 0.003,
+      "loss": 4.0898,
+      "step": 7871
+    },
+    {
+      "epoch": 0.07872,
+      "grad_norm": 0.8730754246990861,
+      "learning_rate": 0.003,
+      "loss": 4.0742,
+      "step": 7872
+    },
+    {
+      "epoch": 0.07873,
+      "grad_norm": 1.0021741546132197,
+      "learning_rate": 0.003,
+      "loss": 4.0347,
+      "step": 7873
+    },
+    {
+      "epoch": 0.07874,
+      "grad_norm": 1.4383882439567937,
+      "learning_rate": 0.003,
+      "loss": 4.0564,
+      "step": 7874
+    },
+    {
+      "epoch": 0.07875,
+      "grad_norm": 0.8464322093253535,
+      "learning_rate": 0.003,
+      "loss": 4.0598,
+      "step": 7875
+    },
+    {
+      "epoch": 0.07876,
+      "grad_norm": 0.86475881480814,
+      "learning_rate": 0.003,
+      "loss": 4.058,
+      "step": 7876
+    },
+    {
+      "epoch": 0.07877,
+      "grad_norm": 1.109652921566004,
+      "learning_rate": 0.003,
+      "loss": 4.0439,
+      "step": 7877
+    },
+    {
+      "epoch": 0.07878,
+      "grad_norm": 1.0879946460453025,
+      "learning_rate": 0.003,
+      "loss": 4.0542,
+      "step": 7878
+    },
+    {
+      "epoch": 0.07879,
+      "grad_norm": 1.0666099280508157,
+      "learning_rate": 0.003,
+      "loss": 4.0568,
+      "step": 7879
+    },
+    {
+      "epoch": 0.0788,
+      "grad_norm": 1.1532229023404108,
+      "learning_rate": 0.003,
+      "loss": 4.0406,
+      "step": 7880
+    },
+    {
+      "epoch": 0.07881,
+      "grad_norm": 0.9157988575667765,
+      "learning_rate": 0.003,
+      "loss": 4.0443,
+      "step": 7881
+    },
+    {
+      "epoch": 0.07882,
+      "grad_norm": 0.9989329398447048,
+      "learning_rate": 0.003,
+      "loss": 4.0402,
+      "step": 7882
+    },
+    {
+      "epoch": 0.07883,
+      "grad_norm": 1.1179829054728383,
+      "learning_rate": 0.003,
+      "loss": 4.0276,
+      "step": 7883
+    },
+    {
+      "epoch": 0.07884,
+      "grad_norm": 1.0047906870446375,
+      "learning_rate": 0.003,
+      "loss": 4.047,
+      "step": 7884
+    },
+    {
+      "epoch": 0.07885,
+      "grad_norm": 1.111240788046051,
+      "learning_rate": 0.003,
+      "loss": 4.0399,
+      "step": 7885
+    },
+    {
+      "epoch": 0.07886,
+      "grad_norm": 1.0009988839259023,
+      "learning_rate": 0.003,
+      "loss": 4.0631,
+      "step": 7886
+    },
+    {
+      "epoch": 0.07887,
+      "grad_norm": 1.0893230346535585,
+      "learning_rate": 0.003,
+      "loss": 4.0388,
+      "step": 7887
+    },
+    {
+      "epoch": 0.07888,
+      "grad_norm": 1.05660595072792,
+      "learning_rate": 0.003,
+      "loss": 4.0533,
+      "step": 7888
+    },
+    {
+      "epoch": 0.07889,
+      "grad_norm": 1.159409811390816,
+      "learning_rate": 0.003,
+      "loss": 4.0259,
+      "step": 7889
+    },
+    {
+      "epoch": 0.0789,
+      "grad_norm": 0.8841007147309605,
+      "learning_rate": 0.003,
+      "loss": 4.0353,
+      "step": 7890
+    },
+    {
+      "epoch": 0.07891,
+      "grad_norm": 1.1474805660791436,
+      "learning_rate": 0.003,
+      "loss": 4.0416,
+      "step": 7891
+    },
+    {
+      "epoch": 0.07892,
+      "grad_norm": 1.0700800869552938,
+      "learning_rate": 0.003,
+      "loss": 4.0379,
+      "step": 7892
+    },
+    {
+      "epoch": 0.07893,
+      "grad_norm": 1.0506523629123479,
+      "learning_rate": 0.003,
+      "loss": 4.0583,
+      "step": 7893
+    },
+    {
+      "epoch": 0.07894,
+      "grad_norm": 1.1196303889660475,
+      "learning_rate": 0.003,
+      "loss": 4.0305,
+      "step": 7894
+    },
+    {
+      "epoch": 0.07895,
+      "grad_norm": 1.28735191775539,
+      "learning_rate": 0.003,
+      "loss": 4.0322,
+      "step": 7895
+    },
+    {
+      "epoch": 0.07896,
+      "grad_norm": 0.9643317762261,
+      "learning_rate": 0.003,
+      "loss": 4.0399,
+      "step": 7896
+    },
+    {
+      "epoch": 0.07897,
+      "grad_norm": 1.1070256196078418,
+      "learning_rate": 0.003,
+      "loss": 4.0687,
+      "step": 7897
+    },
+    {
+      "epoch": 0.07898,
+      "grad_norm": 1.0355518469983214,
+      "learning_rate": 0.003,
+      "loss": 4.0448,
+      "step": 7898
+    },
+    {
+      "epoch": 0.07899,
+      "grad_norm": 1.2282329250453026,
+      "learning_rate": 0.003,
+      "loss": 4.0288,
+      "step": 7899
+    },
+    {
+      "epoch": 0.079,
+      "grad_norm": 1.0945607454529,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 7900
+    },
+    {
+      "epoch": 0.07901,
+      "grad_norm": 1.0648956548783817,
+      "learning_rate": 0.003,
+      "loss": 4.0677,
+      "step": 7901
+    },
+    {
+      "epoch": 0.07902,
+      "grad_norm": 1.0992859940840842,
+      "learning_rate": 0.003,
+      "loss": 4.0207,
+      "step": 7902
+    },
+    {
+      "epoch": 0.07903,
+      "grad_norm": 0.8834013507865798,
+      "learning_rate": 0.003,
+      "loss": 4.0311,
+      "step": 7903
+    },
+    {
+      "epoch": 0.07904,
+      "grad_norm": 0.9999633090920114,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 7904
+    },
+    {
+      "epoch": 0.07905,
+      "grad_norm": 1.0963454219201827,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 7905
+    },
+    {
+      "epoch": 0.07906,
+      "grad_norm": 0.9167535868840054,
+      "learning_rate": 0.003,
+      "loss": 4.0244,
+      "step": 7906
+    },
+    {
+      "epoch": 0.07907,
+      "grad_norm": 1.0671834336811867,
+      "learning_rate": 0.003,
+      "loss": 4.0514,
+      "step": 7907
+    },
+    {
+      "epoch": 0.07908,
+      "grad_norm": 0.9233899864843036,
+      "learning_rate": 0.003,
+      "loss": 4.0529,
+      "step": 7908
+    },
+    {
+      "epoch": 0.07909,
+      "grad_norm": 1.0062840807702038,
+      "learning_rate": 0.003,
+      "loss": 4.0559,
+      "step": 7909
+    },
+    {
+      "epoch": 0.0791,
+      "grad_norm": 1.235984238772413,
+      "learning_rate": 0.003,
+      "loss": 4.0448,
+      "step": 7910
+    },
+    {
+      "epoch": 0.07911,
+      "grad_norm": 0.9452902767282305,
+      "learning_rate": 0.003,
+      "loss": 4.06,
+      "step": 7911
+    },
+    {
+      "epoch": 0.07912,
+      "grad_norm": 1.170408612990077,
+      "learning_rate": 0.003,
+      "loss": 4.0408,
+      "step": 7912
+    },
+    {
+      "epoch": 0.07913,
+      "grad_norm": 0.96235951994912,
+      "learning_rate": 0.003,
+      "loss": 4.0247,
+      "step": 7913
+    },
+    {
+      "epoch": 0.07914,
+      "grad_norm": 1.0169853669192117,
+      "learning_rate": 0.003,
+      "loss": 4.0405,
+      "step": 7914
+    },
+    {
+      "epoch": 0.07915,
+      "grad_norm": 1.149306172968735,
+      "learning_rate": 0.003,
+      "loss": 4.0303,
+      "step": 7915
+    },
+    {
+      "epoch": 0.07916,
+      "grad_norm": 0.9684272704389931,
+      "learning_rate": 0.003,
+      "loss": 4.0548,
+      "step": 7916
+    },
+    {
+      "epoch": 0.07917,
+      "grad_norm": 1.0099443419186045,
+      "learning_rate": 0.003,
+      "loss": 4.0558,
+      "step": 7917
+    },
+    {
+      "epoch": 0.07918,
+      "grad_norm": 1.2290204445682136,
+      "learning_rate": 0.003,
+      "loss": 4.0576,
+      "step": 7918
+    },
+    {
+      "epoch": 0.07919,
+      "grad_norm": 0.950110955160198,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 7919
+    },
+    {
+      "epoch": 0.0792,
+      "grad_norm": 1.1616333982044544,
+      "learning_rate": 0.003,
+      "loss": 4.0437,
+      "step": 7920
+    },
+    {
+      "epoch": 0.07921,
+      "grad_norm": 1.2133949678821083,
+      "learning_rate": 0.003,
+      "loss": 4.0543,
+      "step": 7921
+    },
+    {
+      "epoch": 0.07922,
+      "grad_norm": 0.8406932934598226,
+      "learning_rate": 0.003,
+      "loss": 4.0258,
+      "step": 7922
+    },
+    {
+      "epoch": 0.07923,
+      "grad_norm": 1.000384156582474,
+      "learning_rate": 0.003,
+      "loss": 4.0279,
+      "step": 7923
+    },
+    {
+      "epoch": 0.07924,
+      "grad_norm": 1.102645789500658,
+      "learning_rate": 0.003,
+      "loss": 4.0286,
+      "step": 7924
+    },
+    {
+      "epoch": 0.07925,
+      "grad_norm": 1.0684901214506717,
+      "learning_rate": 0.003,
+      "loss": 4.061,
+      "step": 7925
+    },
+    {
+      "epoch": 0.07926,
+      "grad_norm": 1.293652658373931,
+      "learning_rate": 0.003,
+      "loss": 4.0717,
+      "step": 7926
+    },
+    {
+      "epoch": 0.07927,
+      "grad_norm": 0.9389310994335556,
+      "learning_rate": 0.003,
+      "loss": 4.0079,
+      "step": 7927
+    },
+    {
+      "epoch": 0.07928,
+      "grad_norm": 1.081617761304198,
+      "learning_rate": 0.003,
+      "loss": 4.0545,
+      "step": 7928
+    },
+    {
+      "epoch": 0.07929,
+      "grad_norm": 0.9572759773190526,
+      "learning_rate": 0.003,
+      "loss": 4.0333,
+      "step": 7929
+    },
+    {
+      "epoch": 0.0793,
+      "grad_norm": 1.150932445412838,
+      "learning_rate": 0.003,
+      "loss": 4.0239,
+      "step": 7930
+    },
+    {
+      "epoch": 0.07931,
+      "grad_norm": 1.0459375556090538,
+      "learning_rate": 0.003,
+      "loss": 4.0584,
+      "step": 7931
+    },
+    {
+      "epoch": 0.07932,
+      "grad_norm": 1.2162128355571467,
+      "learning_rate": 0.003,
+      "loss": 4.065,
+      "step": 7932
+    },
+    {
+      "epoch": 0.07933,
+      "grad_norm": 0.8823256635368836,
+      "learning_rate": 0.003,
+      "loss": 4.0464,
+      "step": 7933
+    },
+    {
+      "epoch": 0.07934,
+      "grad_norm": 1.0009325472605832,
+      "learning_rate": 0.003,
+      "loss": 4.069,
+      "step": 7934
+    },
+    {
+      "epoch": 0.07935,
+      "grad_norm": 1.0096305457720214,
+      "learning_rate": 0.003,
+      "loss": 4.0474,
+      "step": 7935
+    },
+    {
+      "epoch": 0.07936,
+      "grad_norm": 1.268310154088615,
+      "learning_rate": 0.003,
+      "loss": 4.0491,
+      "step": 7936
+    },
+    {
+      "epoch": 0.07937,
+      "grad_norm": 0.9021101857561977,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 7937
+    },
+    {
+      "epoch": 0.07938,
+      "grad_norm": 1.0291910822408756,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 7938
+    },
+    {
+      "epoch": 0.07939,
+      "grad_norm": 1.2336928484312266,
+      "learning_rate": 0.003,
+      "loss": 4.0365,
+      "step": 7939
+    },
+    {
+      "epoch": 0.0794,
+      "grad_norm": 0.9200004067230584,
+      "learning_rate": 0.003,
+      "loss": 4.0346,
+      "step": 7940
+    },
+    {
+      "epoch": 0.07941,
+      "grad_norm": 1.2029826125319552,
+      "learning_rate": 0.003,
+      "loss": 4.0395,
+      "step": 7941
+    },
+    {
+      "epoch": 0.07942,
+      "grad_norm": 0.9905351217758338,
+      "learning_rate": 0.003,
+      "loss": 4.0281,
+      "step": 7942
+    },
+    {
+      "epoch": 0.07943,
+      "grad_norm": 1.1672820977045213,
+      "learning_rate": 0.003,
+      "loss": 4.0338,
+      "step": 7943
+    },
+    {
+      "epoch": 0.07944,
+      "grad_norm": 0.9552098668578122,
+      "learning_rate": 0.003,
+      "loss": 4.0746,
+      "step": 7944
+    },
+    {
+      "epoch": 0.07945,
+      "grad_norm": 1.1163448086544936,
+      "learning_rate": 0.003,
+      "loss": 4.0511,
+      "step": 7945
+    },
+    {
+      "epoch": 0.07946,
+      "grad_norm": 1.1575061742112112,
+      "learning_rate": 0.003,
+      "loss": 4.0555,
+      "step": 7946
+    },
+    {
+      "epoch": 0.07947,
+      "grad_norm": 0.9387057322081345,
+      "learning_rate": 0.003,
+      "loss": 4.0628,
+      "step": 7947
+    },
+    {
+      "epoch": 0.07948,
+      "grad_norm": 1.036922515434725,
+      "learning_rate": 0.003,
+      "loss": 4.0502,
+      "step": 7948
+    },
+    {
+      "epoch": 0.07949,
+      "grad_norm": 1.0571677074257047,
+      "learning_rate": 0.003,
+      "loss": 4.0631,
+      "step": 7949
+    },
+    {
+      "epoch": 0.0795,
+      "grad_norm": 0.9485396434466922,
+      "learning_rate": 0.003,
+      "loss": 4.0181,
+      "step": 7950
+    },
+    {
+      "epoch": 0.07951,
+      "grad_norm": 0.9358661587180489,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 7951
+    },
+    {
+      "epoch": 0.07952,
+      "grad_norm": 1.072262899758602,
+      "learning_rate": 0.003,
+      "loss": 4.0548,
+      "step": 7952
+    },
+    {
+      "epoch": 0.07953,
+      "grad_norm": 1.1384679393249275,
+      "learning_rate": 0.003,
+      "loss": 4.0269,
+      "step": 7953
+    },
+    {
+      "epoch": 0.07954,
+      "grad_norm": 1.188750550794104,
+      "learning_rate": 0.003,
+      "loss": 4.0651,
+      "step": 7954
+    },
+    {
+      "epoch": 0.07955,
+      "grad_norm": 1.088794668390209,
+      "learning_rate": 0.003,
+      "loss": 4.0644,
+      "step": 7955
+    },
+    {
+      "epoch": 0.07956,
+      "grad_norm": 0.944395467836122,
+      "learning_rate": 0.003,
+      "loss": 4.0425,
+      "step": 7956
+    },
+    {
+      "epoch": 0.07957,
+      "grad_norm": 0.9876860612041447,
+      "learning_rate": 0.003,
+      "loss": 4.0326,
+      "step": 7957
+    },
+    {
+      "epoch": 0.07958,
+      "grad_norm": 1.007844983649128,
+      "learning_rate": 0.003,
+      "loss": 4.0443,
+      "step": 7958
+    },
+    {
+      "epoch": 0.07959,
+      "grad_norm": 1.0039606523415259,
+      "learning_rate": 0.003,
+      "loss": 4.05,
+      "step": 7959
+    },
+    {
+      "epoch": 0.0796,
+      "grad_norm": 1.1452784506766251,
+      "learning_rate": 0.003,
+      "loss": 4.048,
+      "step": 7960
+    },
+    {
+      "epoch": 0.07961,
+      "grad_norm": 1.1250238968213198,
+      "learning_rate": 0.003,
+      "loss": 4.0377,
+      "step": 7961
+    },
+    {
+      "epoch": 0.07962,
+      "grad_norm": 0.8771519916323887,
+      "learning_rate": 0.003,
+      "loss": 4.0532,
+      "step": 7962
+    },
+    {
+      "epoch": 0.07963,
+      "grad_norm": 1.039301187147565,
+      "learning_rate": 0.003,
+      "loss": 4.0245,
+      "step": 7963
+    },
+    {
+      "epoch": 0.07964,
+      "grad_norm": 1.1539629521338333,
+      "learning_rate": 0.003,
+      "loss": 4.0476,
+      "step": 7964
+    },
+    {
+      "epoch": 0.07965,
+      "grad_norm": 1.006514542734939,
+      "learning_rate": 0.003,
+      "loss": 4.0535,
+      "step": 7965
+    },
+    {
+      "epoch": 0.07966,
+      "grad_norm": 1.2419647152063866,
+      "learning_rate": 0.003,
+      "loss": 4.0389,
+      "step": 7966
+    },
+    {
+      "epoch": 0.07967,
+      "grad_norm": 1.0445292324352928,
+      "learning_rate": 0.003,
+      "loss": 4.0541,
+      "step": 7967
+    },
+    {
+      "epoch": 0.07968,
+      "grad_norm": 1.1283302172541068,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 7968
+    },
+    {
+      "epoch": 0.07969,
+      "grad_norm": 0.9667428222328616,
+      "learning_rate": 0.003,
+      "loss": 4.0686,
+      "step": 7969
+    },
+    {
+      "epoch": 0.0797,
+      "grad_norm": 0.9853555487701827,
+      "learning_rate": 0.003,
+      "loss": 4.0452,
+      "step": 7970
+    },
+    {
+      "epoch": 0.07971,
+      "grad_norm": 0.9314086481126423,
+      "learning_rate": 0.003,
+      "loss": 4.0306,
+      "step": 7971
+    },
+    {
+      "epoch": 0.07972,
+      "grad_norm": 1.041017548263576,
+      "learning_rate": 0.003,
+      "loss": 4.0393,
+      "step": 7972
+    },
+    {
+      "epoch": 0.07973,
+      "grad_norm": 1.1632363339227705,
+      "learning_rate": 0.003,
+      "loss": 4.0602,
+      "step": 7973
+    },
+    {
+      "epoch": 0.07974,
+      "grad_norm": 0.9448198238046855,
+      "learning_rate": 0.003,
+      "loss": 4.0367,
+      "step": 7974
+    },
+    {
+      "epoch": 0.07975,
+      "grad_norm": 1.2575451524297467,
+      "learning_rate": 0.003,
+      "loss": 4.0759,
+      "step": 7975
+    },
+    {
+      "epoch": 0.07976,
+      "grad_norm": 0.9254382565643743,
+      "learning_rate": 0.003,
+      "loss": 4.0542,
+      "step": 7976
+    },
+    {
+      "epoch": 0.07977,
+      "grad_norm": 0.9554319748351229,
+      "learning_rate": 0.003,
+      "loss": 4.0433,
+      "step": 7977
+    },
+    {
+      "epoch": 0.07978,
+      "grad_norm": 1.1583747304811194,
+      "learning_rate": 0.003,
+      "loss": 4.0571,
+      "step": 7978
+    },
+    {
+      "epoch": 0.07979,
+      "grad_norm": 1.1000321798363142,
+      "learning_rate": 0.003,
+      "loss": 4.0365,
+      "step": 7979
+    },
+    {
+      "epoch": 0.0798,
+      "grad_norm": 0.8911730010907487,
+      "learning_rate": 0.003,
+      "loss": 4.0614,
+      "step": 7980
+    },
+    {
+      "epoch": 0.07981,
+      "grad_norm": 1.020237000586781,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 7981
+    },
+    {
+      "epoch": 0.07982,
+      "grad_norm": 1.3180466552295464,
+      "learning_rate": 0.003,
+      "loss": 4.0569,
+      "step": 7982
+    },
+    {
+      "epoch": 0.07983,
+      "grad_norm": 1.071553392145814,
+      "learning_rate": 0.003,
+      "loss": 4.0767,
+      "step": 7983
+    },
+    {
+      "epoch": 0.07984,
+      "grad_norm": 1.2117191312115865,
+      "learning_rate": 0.003,
+      "loss": 4.0646,
+      "step": 7984
+    },
+    {
+      "epoch": 0.07985,
+      "grad_norm": 0.7909400534108864,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 7985
+    },
+    {
+      "epoch": 0.07986,
+      "grad_norm": 0.69774180892976,
+      "learning_rate": 0.003,
+      "loss": 4.0548,
+      "step": 7986
+    },
+    {
+      "epoch": 0.07987,
+      "grad_norm": 0.7627415677016504,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 7987
+    },
+    {
+      "epoch": 0.07988,
+      "grad_norm": 0.7977914673045347,
+      "learning_rate": 0.003,
+      "loss": 4.0415,
+      "step": 7988
+    },
+    {
+      "epoch": 0.07989,
+      "grad_norm": 0.9057464007553878,
+      "learning_rate": 0.003,
+      "loss": 4.0187,
+      "step": 7989
+    },
+    {
+      "epoch": 0.0799,
+      "grad_norm": 0.9968132407767837,
+      "learning_rate": 0.003,
+      "loss": 4.0406,
+      "step": 7990
+    },
+    {
+      "epoch": 0.07991,
+      "grad_norm": 1.0049624387001375,
+      "learning_rate": 0.003,
+      "loss": 4.0352,
+      "step": 7991
+    },
+    {
+      "epoch": 0.07992,
+      "grad_norm": 1.2238112277907751,
+      "learning_rate": 0.003,
+      "loss": 4.0435,
+      "step": 7992
+    },
+    {
+      "epoch": 0.07993,
+      "grad_norm": 0.9170174217210549,
+      "learning_rate": 0.003,
+      "loss": 4.0328,
+      "step": 7993
+    },
+    {
+      "epoch": 0.07994,
+      "grad_norm": 0.9120312741869245,
+      "learning_rate": 0.003,
+      "loss": 4.0507,
+      "step": 7994
+    },
+    {
+      "epoch": 0.07995,
+      "grad_norm": 1.0185635782956717,
+      "learning_rate": 0.003,
+      "loss": 4.0534,
+      "step": 7995
+    },
+    {
+      "epoch": 0.07996,
+      "grad_norm": 1.1924384020647054,
+      "learning_rate": 0.003,
+      "loss": 4.0465,
+      "step": 7996
+    },
+    {
+      "epoch": 0.07997,
+      "grad_norm": 1.1865042797171061,
+      "learning_rate": 0.003,
+      "loss": 4.0461,
+      "step": 7997
+    },
+    {
+      "epoch": 0.07998,
+      "grad_norm": 0.9979487786588422,
+      "learning_rate": 0.003,
+      "loss": 4.0721,
+      "step": 7998
+    },
+    {
+      "epoch": 0.07999,
+      "grad_norm": 1.0145256510147855,
+      "learning_rate": 0.003,
+      "loss": 4.0432,
+      "step": 7999
+    },
+    {
+      "epoch": 0.08,
+      "grad_norm": 1.0061604187402993,
+      "learning_rate": 0.003,
+      "loss": 4.0521,
+      "step": 8000
+    },
+    {
+      "epoch": 0.08001,
+      "grad_norm": 1.0877536512304802,
+      "learning_rate": 0.003,
+      "loss": 4.0393,
+      "step": 8001
+    },
+    {
+      "epoch": 0.08002,
+      "grad_norm": 1.103363283471722,
+      "learning_rate": 0.003,
+      "loss": 4.0286,
+      "step": 8002
+    },
+    {
+      "epoch": 0.08003,
+      "grad_norm": 1.1565936510093848,
+      "learning_rate": 0.003,
+      "loss": 4.0567,
+      "step": 8003
+    },
+    {
+      "epoch": 0.08004,
+      "grad_norm": 0.8339485185169685,
+      "learning_rate": 0.003,
+      "loss": 4.0307,
+      "step": 8004
+    },
+    {
+      "epoch": 0.08005,
+      "grad_norm": 0.8957972908506947,
+      "learning_rate": 0.003,
+      "loss": 4.0559,
+      "step": 8005
+    },
+    {
+      "epoch": 0.08006,
+      "grad_norm": 0.9755062166369889,
+      "learning_rate": 0.003,
+      "loss": 4.0344,
+      "step": 8006
+    },
+    {
+      "epoch": 0.08007,
+      "grad_norm": 1.1356741968512036,
+      "learning_rate": 0.003,
+      "loss": 4.0319,
+      "step": 8007
+    },
+    {
+      "epoch": 0.08008,
+      "grad_norm": 0.9798864056276021,
+      "learning_rate": 0.003,
+      "loss": 4.0174,
+      "step": 8008
+    },
+    {
+      "epoch": 0.08009,
+      "grad_norm": 1.1435111910784777,
+      "learning_rate": 0.003,
+      "loss": 4.0388,
+      "step": 8009
+    },
+    {
+      "epoch": 0.0801,
+      "grad_norm": 1.1205942333697474,
+      "learning_rate": 0.003,
+      "loss": 4.0338,
+      "step": 8010
+    },
+    {
+      "epoch": 0.08011,
+      "grad_norm": 1.131476082802707,
+      "learning_rate": 0.003,
+      "loss": 4.0448,
+      "step": 8011
+    },
+    {
+      "epoch": 0.08012,
+      "grad_norm": 1.0461557826019758,
+      "learning_rate": 0.003,
+      "loss": 4.0432,
+      "step": 8012
+    },
+    {
+      "epoch": 0.08013,
+      "grad_norm": 1.0524770267958885,
+      "learning_rate": 0.003,
+      "loss": 4.0453,
+      "step": 8013
+    },
+    {
+      "epoch": 0.08014,
+      "grad_norm": 1.2440328295727094,
+      "learning_rate": 0.003,
+      "loss": 4.0351,
+      "step": 8014
+    },
+    {
+      "epoch": 0.08015,
+      "grad_norm": 1.114392646783546,
+      "learning_rate": 0.003,
+      "loss": 4.0656,
+      "step": 8015
+    },
+    {
+      "epoch": 0.08016,
+      "grad_norm": 1.0812684439503044,
+      "learning_rate": 0.003,
+      "loss": 4.019,
+      "step": 8016
+    },
+    {
+      "epoch": 0.08017,
+      "grad_norm": 1.1433960945173143,
+      "learning_rate": 0.003,
+      "loss": 4.0313,
+      "step": 8017
+    },
+    {
+      "epoch": 0.08018,
+      "grad_norm": 0.9868904103661147,
+      "learning_rate": 0.003,
+      "loss": 4.041,
+      "step": 8018
+    },
+    {
+      "epoch": 0.08019,
+      "grad_norm": 1.084639898239188,
+      "learning_rate": 0.003,
+      "loss": 4.0678,
+      "step": 8019
+    },
+    {
+      "epoch": 0.0802,
+      "grad_norm": 1.185981726128112,
+      "learning_rate": 0.003,
+      "loss": 4.0376,
+      "step": 8020
+    },
+    {
+      "epoch": 0.08021,
+      "grad_norm": 1.0766762166520623,
+      "learning_rate": 0.003,
+      "loss": 4.006,
+      "step": 8021
+    },
+    {
+      "epoch": 0.08022,
+      "grad_norm": 0.9287360641086851,
+      "learning_rate": 0.003,
+      "loss": 4.0483,
+      "step": 8022
+    },
+    {
+      "epoch": 0.08023,
+      "grad_norm": 0.9456722393800089,
+      "learning_rate": 0.003,
+      "loss": 4.0354,
+      "step": 8023
+    },
+    {
+      "epoch": 0.08024,
+      "grad_norm": 1.0828009737424313,
+      "learning_rate": 0.003,
+      "loss": 4.0556,
+      "step": 8024
+    },
+    {
+      "epoch": 0.08025,
+      "grad_norm": 1.2456772261250684,
+      "learning_rate": 0.003,
+      "loss": 4.0261,
+      "step": 8025
+    },
+    {
+      "epoch": 0.08026,
+      "grad_norm": 1.0148217772847923,
+      "learning_rate": 0.003,
+      "loss": 4.0373,
+      "step": 8026
+    },
+    {
+      "epoch": 0.08027,
+      "grad_norm": 0.9138608081544262,
+      "learning_rate": 0.003,
+      "loss": 4.0198,
+      "step": 8027
+    },
+    {
+      "epoch": 0.08028,
+      "grad_norm": 0.9517142464911736,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 8028
+    },
+    {
+      "epoch": 0.08029,
+      "grad_norm": 1.1073434017859498,
+      "learning_rate": 0.003,
+      "loss": 4.0637,
+      "step": 8029
+    },
+    {
+      "epoch": 0.0803,
+      "grad_norm": 0.9290905977228465,
+      "learning_rate": 0.003,
+      "loss": 4.0275,
+      "step": 8030
+    },
+    {
+      "epoch": 0.08031,
+      "grad_norm": 0.9226578478764562,
+      "learning_rate": 0.003,
+      "loss": 4.0459,
+      "step": 8031
+    },
+    {
+      "epoch": 0.08032,
+      "grad_norm": 1.0759256080505066,
+      "learning_rate": 0.003,
+      "loss": 4.0344,
+      "step": 8032
+    },
+    {
+      "epoch": 0.08033,
+      "grad_norm": 0.9511400028043164,
+      "learning_rate": 0.003,
+      "loss": 4.0588,
+      "step": 8033
+    },
+    {
+      "epoch": 0.08034,
+      "grad_norm": 1.1078049780159342,
+      "learning_rate": 0.003,
+      "loss": 4.0565,
+      "step": 8034
+    },
+    {
+      "epoch": 0.08035,
+      "grad_norm": 0.9535032742600911,
+      "learning_rate": 0.003,
+      "loss": 4.0629,
+      "step": 8035
+    },
+    {
+      "epoch": 0.08036,
+      "grad_norm": 1.1614455582425816,
+      "learning_rate": 0.003,
+      "loss": 4.0849,
+      "step": 8036
+    },
+    {
+      "epoch": 0.08037,
+      "grad_norm": 0.8834094122204313,
+      "learning_rate": 0.003,
+      "loss": 4.0378,
+      "step": 8037
+    },
+    {
+      "epoch": 0.08038,
+      "grad_norm": 1.1130873275597826,
+      "learning_rate": 0.003,
+      "loss": 4.0386,
+      "step": 8038
+    },
+    {
+      "epoch": 0.08039,
+      "grad_norm": 1.211408515493295,
+      "learning_rate": 0.003,
+      "loss": 4.0502,
+      "step": 8039
+    },
+    {
+      "epoch": 0.0804,
+      "grad_norm": 1.1685320433873827,
+      "learning_rate": 0.003,
+      "loss": 4.0593,
+      "step": 8040
+    },
+    {
+      "epoch": 0.08041,
+      "grad_norm": 1.0277080460599624,
+      "learning_rate": 0.003,
+      "loss": 4.0633,
+      "step": 8041
+    },
+    {
+      "epoch": 0.08042,
+      "grad_norm": 1.1973269882264728,
+      "learning_rate": 0.003,
+      "loss": 4.0521,
+      "step": 8042
+    },
+    {
+      "epoch": 0.08043,
+      "grad_norm": 0.8271614764410427,
+      "learning_rate": 0.003,
+      "loss": 4.0362,
+      "step": 8043
+    },
+    {
+      "epoch": 0.08044,
+      "grad_norm": 0.8887802571055234,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 8044
+    },
+    {
+      "epoch": 0.08045,
+      "grad_norm": 1.2110808135009032,
+      "learning_rate": 0.003,
+      "loss": 4.0376,
+      "step": 8045
+    },
+    {
+      "epoch": 0.08046,
+      "grad_norm": 0.9518356758920827,
+      "learning_rate": 0.003,
+      "loss": 4.0405,
+      "step": 8046
+    },
+    {
+      "epoch": 0.08047,
+      "grad_norm": 1.3038849837413196,
+      "learning_rate": 0.003,
+      "loss": 4.0467,
+      "step": 8047
+    },
+    {
+      "epoch": 0.08048,
+      "grad_norm": 0.919567623717858,
+      "learning_rate": 0.003,
+      "loss": 4.0483,
+      "step": 8048
+    },
+    {
+      "epoch": 0.08049,
+      "grad_norm": 0.8954045094329315,
+      "learning_rate": 0.003,
+      "loss": 4.0273,
+      "step": 8049
+    },
+    {
+      "epoch": 0.0805,
+      "grad_norm": 1.0858667351123445,
+      "learning_rate": 0.003,
+      "loss": 4.0569,
+      "step": 8050
+    },
+    {
+      "epoch": 0.08051,
+      "grad_norm": 1.0474964812508383,
+      "learning_rate": 0.003,
+      "loss": 4.0352,
+      "step": 8051
+    },
+    {
+      "epoch": 0.08052,
+      "grad_norm": 0.998847972432671,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 8052
+    },
+    {
+      "epoch": 0.08053,
+      "grad_norm": 1.0859084509999066,
+      "learning_rate": 0.003,
+      "loss": 4.0596,
+      "step": 8053
+    },
+    {
+      "epoch": 0.08054,
+      "grad_norm": 0.9520041929258798,
+      "learning_rate": 0.003,
+      "loss": 4.0314,
+      "step": 8054
+    },
+    {
+      "epoch": 0.08055,
+      "grad_norm": 0.8849900316097494,
+      "learning_rate": 0.003,
+      "loss": 4.0104,
+      "step": 8055
+    },
+    {
+      "epoch": 0.08056,
+      "grad_norm": 1.0200607183606778,
+      "learning_rate": 0.003,
+      "loss": 4.0429,
+      "step": 8056
+    },
+    {
+      "epoch": 0.08057,
+      "grad_norm": 1.385440316855803,
+      "learning_rate": 0.003,
+      "loss": 4.0547,
+      "step": 8057
+    },
+    {
+      "epoch": 0.08058,
+      "grad_norm": 0.7560965761499825,
+      "learning_rate": 0.003,
+      "loss": 4.0405,
+      "step": 8058
+    },
+    {
+      "epoch": 0.08059,
+      "grad_norm": 0.802745797203238,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 8059
+    },
+    {
+      "epoch": 0.0806,
+      "grad_norm": 0.8780307041913072,
+      "learning_rate": 0.003,
+      "loss": 4.0292,
+      "step": 8060
+    },
+    {
+      "epoch": 0.08061,
+      "grad_norm": 0.9388145447642858,
+      "learning_rate": 0.003,
+      "loss": 4.0155,
+      "step": 8061
+    },
+    {
+      "epoch": 0.08062,
+      "grad_norm": 1.1392056930959564,
+      "learning_rate": 0.003,
+      "loss": 4.0503,
+      "step": 8062
+    },
+    {
+      "epoch": 0.08063,
+      "grad_norm": 1.1500771661810032,
+      "learning_rate": 0.003,
+      "loss": 4.0786,
+      "step": 8063
+    },
+    {
+      "epoch": 0.08064,
+      "grad_norm": 1.2259785226656719,
+      "learning_rate": 0.003,
+      "loss": 4.0389,
+      "step": 8064
+    },
+    {
+      "epoch": 0.08065,
+      "grad_norm": 0.9482855137356565,
+      "learning_rate": 0.003,
+      "loss": 4.0477,
+      "step": 8065
+    },
+    {
+      "epoch": 0.08066,
+      "grad_norm": 1.1941185655993347,
+      "learning_rate": 0.003,
+      "loss": 4.069,
+      "step": 8066
+    },
+    {
+      "epoch": 0.08067,
+      "grad_norm": 1.0216574976533404,
+      "learning_rate": 0.003,
+      "loss": 4.0517,
+      "step": 8067
+    },
+    {
+      "epoch": 0.08068,
+      "grad_norm": 1.1815373632833652,
+      "learning_rate": 0.003,
+      "loss": 4.0513,
+      "step": 8068
+    },
+    {
+      "epoch": 0.08069,
+      "grad_norm": 0.8362728337965947,
+      "learning_rate": 0.003,
+      "loss": 4.0491,
+      "step": 8069
+    },
+    {
+      "epoch": 0.0807,
+      "grad_norm": 1.0141924727587812,
+      "learning_rate": 0.003,
+      "loss": 4.0375,
+      "step": 8070
+    },
+    {
+      "epoch": 0.08071,
+      "grad_norm": 1.2085013425881042,
+      "learning_rate": 0.003,
+      "loss": 4.0484,
+      "step": 8071
+    },
+    {
+      "epoch": 0.08072,
+      "grad_norm": 1.1611590433461545,
+      "learning_rate": 0.003,
+      "loss": 4.0342,
+      "step": 8072
+    },
+    {
+      "epoch": 0.08073,
+      "grad_norm": 1.1178371421514612,
+      "learning_rate": 0.003,
+      "loss": 4.0377,
+      "step": 8073
+    },
+    {
+      "epoch": 0.08074,
+      "grad_norm": 0.8685364005438322,
+      "learning_rate": 0.003,
+      "loss": 4.022,
+      "step": 8074
+    },
+    {
+      "epoch": 0.08075,
+      "grad_norm": 0.9730008967866436,
+      "learning_rate": 0.003,
+      "loss": 4.0674,
+      "step": 8075
+    },
+    {
+      "epoch": 0.08076,
+      "grad_norm": 1.2822538457110706,
+      "learning_rate": 0.003,
+      "loss": 4.0252,
+      "step": 8076
+    },
+    {
+      "epoch": 0.08077,
+      "grad_norm": 1.0798964686596582,
+      "learning_rate": 0.003,
+      "loss": 4.0716,
+      "step": 8077
+    },
+    {
+      "epoch": 0.08078,
+      "grad_norm": 1.2985664488989839,
+      "learning_rate": 0.003,
+      "loss": 4.0614,
+      "step": 8078
+    },
+    {
+      "epoch": 0.08079,
+      "grad_norm": 0.9957269652282372,
+      "learning_rate": 0.003,
+      "loss": 4.0493,
+      "step": 8079
+    },
+    {
+      "epoch": 0.0808,
+      "grad_norm": 0.8700008490746549,
+      "learning_rate": 0.003,
+      "loss": 4.0359,
+      "step": 8080
+    },
+    {
+      "epoch": 0.08081,
+      "grad_norm": 0.9054583409173683,
+      "learning_rate": 0.003,
+      "loss": 4.0533,
+      "step": 8081
+    },
+    {
+      "epoch": 0.08082,
+      "grad_norm": 0.9766818304459607,
+      "learning_rate": 0.003,
+      "loss": 4.0157,
+      "step": 8082
+    },
+    {
+      "epoch": 0.08083,
+      "grad_norm": 1.0138171171604249,
+      "learning_rate": 0.003,
+      "loss": 4.037,
+      "step": 8083
+    },
+    {
+      "epoch": 0.08084,
+      "grad_norm": 1.1036070375778735,
+      "learning_rate": 0.003,
+      "loss": 4.0417,
+      "step": 8084
+    },
+    {
+      "epoch": 0.08085,
+      "grad_norm": 1.1006361031316971,
+      "learning_rate": 0.003,
+      "loss": 4.0732,
+      "step": 8085
+    },
+    {
+      "epoch": 0.08086,
+      "grad_norm": 1.2665221292524846,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 8086
+    },
+    {
+      "epoch": 0.08087,
+      "grad_norm": 0.87235683124875,
+      "learning_rate": 0.003,
+      "loss": 4.0346,
+      "step": 8087
+    },
+    {
+      "epoch": 0.08088,
+      "grad_norm": 1.2101069976094259,
+      "learning_rate": 0.003,
+      "loss": 4.0551,
+      "step": 8088
+    },
+    {
+      "epoch": 0.08089,
+      "grad_norm": 1.174020295847284,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 8089
+    },
+    {
+      "epoch": 0.0809,
+      "grad_norm": 1.0782301641574674,
+      "learning_rate": 0.003,
+      "loss": 4.0448,
+      "step": 8090
+    },
+    {
+      "epoch": 0.08091,
+      "grad_norm": 1.3340904191723806,
+      "learning_rate": 0.003,
+      "loss": 4.0728,
+      "step": 8091
+    },
+    {
+      "epoch": 0.08092,
+      "grad_norm": 0.9678394845670897,
+      "learning_rate": 0.003,
+      "loss": 4.0651,
+      "step": 8092
+    },
+    {
+      "epoch": 0.08093,
+      "grad_norm": 1.1614114147755075,
+      "learning_rate": 0.003,
+      "loss": 4.0323,
+      "step": 8093
+    },
+    {
+      "epoch": 0.08094,
+      "grad_norm": 1.1087224012161654,
+      "learning_rate": 0.003,
+      "loss": 4.0283,
+      "step": 8094
+    },
+    {
+      "epoch": 0.08095,
+      "grad_norm": 1.280636136936636,
+      "learning_rate": 0.003,
+      "loss": 4.026,
+      "step": 8095
+    },
+    {
+      "epoch": 0.08096,
+      "grad_norm": 0.9820162541292741,
+      "learning_rate": 0.003,
+      "loss": 4.0489,
+      "step": 8096
+    },
+    {
+      "epoch": 0.08097,
+      "grad_norm": 1.1089399244430402,
+      "learning_rate": 0.003,
+      "loss": 4.042,
+      "step": 8097
+    },
+    {
+      "epoch": 0.08098,
+      "grad_norm": 0.9715722278401574,
+      "learning_rate": 0.003,
+      "loss": 4.0699,
+      "step": 8098
+    },
+    {
+      "epoch": 0.08099,
+      "grad_norm": 1.164836620545108,
+      "learning_rate": 0.003,
+      "loss": 4.0499,
+      "step": 8099
+    },
+    {
+      "epoch": 0.081,
+      "grad_norm": 1.0306069756917127,
+      "learning_rate": 0.003,
+      "loss": 4.0517,
+      "step": 8100
+    },
+    {
+      "epoch": 0.08101,
+      "grad_norm": 1.1971074710915723,
+      "learning_rate": 0.003,
+      "loss": 4.0742,
+      "step": 8101
+    },
+    {
+      "epoch": 0.08102,
+      "grad_norm": 0.9529922879670738,
+      "learning_rate": 0.003,
+      "loss": 4.0474,
+      "step": 8102
+    },
+    {
+      "epoch": 0.08103,
+      "grad_norm": 1.1839304856546549,
+      "learning_rate": 0.003,
+      "loss": 4.054,
+      "step": 8103
+    },
+    {
+      "epoch": 0.08104,
+      "grad_norm": 0.9885159799435862,
+      "learning_rate": 0.003,
+      "loss": 4.047,
+      "step": 8104
+    },
+    {
+      "epoch": 0.08105,
+      "grad_norm": 1.0385726215167874,
+      "learning_rate": 0.003,
+      "loss": 4.0611,
+      "step": 8105
+    },
+    {
+      "epoch": 0.08106,
+      "grad_norm": 0.9513496918238994,
+      "learning_rate": 0.003,
+      "loss": 4.0483,
+      "step": 8106
+    },
+    {
+      "epoch": 0.08107,
+      "grad_norm": 1.1227985010536334,
+      "learning_rate": 0.003,
+      "loss": 4.0537,
+      "step": 8107
+    },
+    {
+      "epoch": 0.08108,
+      "grad_norm": 1.0247223199647508,
+      "learning_rate": 0.003,
+      "loss": 4.0012,
+      "step": 8108
+    },
+    {
+      "epoch": 0.08109,
+      "grad_norm": 1.0113529850755245,
+      "learning_rate": 0.003,
+      "loss": 4.0433,
+      "step": 8109
+    },
+    {
+      "epoch": 0.0811,
+      "grad_norm": 1.1642917754838757,
+      "learning_rate": 0.003,
+      "loss": 4.0662,
+      "step": 8110
+    },
+    {
+      "epoch": 0.08111,
+      "grad_norm": 0.885899156879861,
+      "learning_rate": 0.003,
+      "loss": 4.0492,
+      "step": 8111
+    },
+    {
+      "epoch": 0.08112,
+      "grad_norm": 1.0233932927167342,
+      "learning_rate": 0.003,
+      "loss": 4.0252,
+      "step": 8112
+    },
+    {
+      "epoch": 0.08113,
+      "grad_norm": 1.1478499269861702,
+      "learning_rate": 0.003,
+      "loss": 4.0376,
+      "step": 8113
+    },
+    {
+      "epoch": 0.08114,
+      "grad_norm": 1.191391018906478,
+      "learning_rate": 0.003,
+      "loss": 4.0685,
+      "step": 8114
+    },
+    {
+      "epoch": 0.08115,
+      "grad_norm": 1.0398287191821558,
+      "learning_rate": 0.003,
+      "loss": 4.0586,
+      "step": 8115
+    },
+    {
+      "epoch": 0.08116,
+      "grad_norm": 1.029396069350254,
+      "learning_rate": 0.003,
+      "loss": 4.0663,
+      "step": 8116
+    },
+    {
+      "epoch": 0.08117,
+      "grad_norm": 1.0620529797775633,
+      "learning_rate": 0.003,
+      "loss": 4.067,
+      "step": 8117
+    },
+    {
+      "epoch": 0.08118,
+      "grad_norm": 1.153228275579987,
+      "learning_rate": 0.003,
+      "loss": 4.0501,
+      "step": 8118
+    },
+    {
+      "epoch": 0.08119,
+      "grad_norm": 1.0235694255861916,
+      "learning_rate": 0.003,
+      "loss": 4.0455,
+      "step": 8119
+    },
+    {
+      "epoch": 0.0812,
+      "grad_norm": 1.2920287937035275,
+      "learning_rate": 0.003,
+      "loss": 4.0312,
+      "step": 8120
+    },
+    {
+      "epoch": 0.08121,
+      "grad_norm": 0.798826528722648,
+      "learning_rate": 0.003,
+      "loss": 4.0531,
+      "step": 8121
+    },
+    {
+      "epoch": 0.08122,
+      "grad_norm": 0.8251701793435979,
+      "learning_rate": 0.003,
+      "loss": 4.0447,
+      "step": 8122
+    },
+    {
+      "epoch": 0.08123,
+      "grad_norm": 0.9500120105243509,
+      "learning_rate": 0.003,
+      "loss": 4.0392,
+      "step": 8123
+    },
+    {
+      "epoch": 0.08124,
+      "grad_norm": 1.1358148106927366,
+      "learning_rate": 0.003,
+      "loss": 4.0583,
+      "step": 8124
+    },
+    {
+      "epoch": 0.08125,
+      "grad_norm": 0.9701373726642997,
+      "learning_rate": 0.003,
+      "loss": 4.0721,
+      "step": 8125
+    },
+    {
+      "epoch": 0.08126,
+      "grad_norm": 1.303900333086603,
+      "learning_rate": 0.003,
+      "loss": 4.0373,
+      "step": 8126
+    },
+    {
+      "epoch": 0.08127,
+      "grad_norm": 0.8348257183122614,
+      "learning_rate": 0.003,
+      "loss": 4.0328,
+      "step": 8127
+    },
+    {
+      "epoch": 0.08128,
+      "grad_norm": 1.0168038658961494,
+      "learning_rate": 0.003,
+      "loss": 4.058,
+      "step": 8128
+    },
+    {
+      "epoch": 0.08129,
+      "grad_norm": 1.2570226327195435,
+      "learning_rate": 0.003,
+      "loss": 4.0332,
+      "step": 8129
+    },
+    {
+      "epoch": 0.0813,
+      "grad_norm": 0.9835919552429621,
+      "learning_rate": 0.003,
+      "loss": 4.0267,
+      "step": 8130
+    },
+    {
+      "epoch": 0.08131,
+      "grad_norm": 1.4786402848670535,
+      "learning_rate": 0.003,
+      "loss": 4.0603,
+      "step": 8131
+    },
+    {
+      "epoch": 0.08132,
+      "grad_norm": 0.967482520755732,
+      "learning_rate": 0.003,
+      "loss": 4.0456,
+      "step": 8132
+    },
+    {
+      "epoch": 0.08133,
+      "grad_norm": 1.0136679388024379,
+      "learning_rate": 0.003,
+      "loss": 4.0404,
+      "step": 8133
+    },
+    {
+      "epoch": 0.08134,
+      "grad_norm": 1.3243507873969376,
+      "learning_rate": 0.003,
+      "loss": 4.04,
+      "step": 8134
+    },
+    {
+      "epoch": 0.08135,
+      "grad_norm": 1.1659701473075903,
+      "learning_rate": 0.003,
+      "loss": 4.0362,
+      "step": 8135
+    },
+    {
+      "epoch": 0.08136,
+      "grad_norm": 1.1146756067652475,
+      "learning_rate": 0.003,
+      "loss": 4.0672,
+      "step": 8136
+    },
+    {
+      "epoch": 0.08137,
+      "grad_norm": 1.0102380938375048,
+      "learning_rate": 0.003,
+      "loss": 4.0077,
+      "step": 8137
+    },
+    {
+      "epoch": 0.08138,
+      "grad_norm": 1.1104761872750726,
+      "learning_rate": 0.003,
+      "loss": 4.0355,
+      "step": 8138
+    },
+    {
+      "epoch": 0.08139,
+      "grad_norm": 0.9806068045939812,
+      "learning_rate": 0.003,
+      "loss": 4.0437,
+      "step": 8139
+    },
+    {
+      "epoch": 0.0814,
+      "grad_norm": 0.9578534939613226,
+      "learning_rate": 0.003,
+      "loss": 4.0528,
+      "step": 8140
+    },
+    {
+      "epoch": 0.08141,
+      "grad_norm": 0.9783961516579012,
+      "learning_rate": 0.003,
+      "loss": 4.0525,
+      "step": 8141
+    },
+    {
+      "epoch": 0.08142,
+      "grad_norm": 1.1102754179258492,
+      "learning_rate": 0.003,
+      "loss": 4.0641,
+      "step": 8142
+    },
+    {
+      "epoch": 0.08143,
+      "grad_norm": 1.013761291015948,
+      "learning_rate": 0.003,
+      "loss": 4.0582,
+      "step": 8143
+    },
+    {
+      "epoch": 0.08144,
+      "grad_norm": 1.0984848305189987,
+      "learning_rate": 0.003,
+      "loss": 4.0298,
+      "step": 8144
+    },
+    {
+      "epoch": 0.08145,
+      "grad_norm": 1.0741146027806776,
+      "learning_rate": 0.003,
+      "loss": 4.0365,
+      "step": 8145
+    },
+    {
+      "epoch": 0.08146,
+      "grad_norm": 1.0850941853188774,
+      "learning_rate": 0.003,
+      "loss": 4.07,
+      "step": 8146
+    },
+    {
+      "epoch": 0.08147,
+      "grad_norm": 1.0900280533229438,
+      "learning_rate": 0.003,
+      "loss": 4.0696,
+      "step": 8147
+    },
+    {
+      "epoch": 0.08148,
+      "grad_norm": 1.1612246962032904,
+      "learning_rate": 0.003,
+      "loss": 4.0266,
+      "step": 8148
+    },
+    {
+      "epoch": 0.08149,
+      "grad_norm": 1.3562111466807953,
+      "learning_rate": 0.003,
+      "loss": 4.0416,
+      "step": 8149
+    },
+    {
+      "epoch": 0.0815,
+      "grad_norm": 0.9650515229317034,
+      "learning_rate": 0.003,
+      "loss": 4.0276,
+      "step": 8150
+    },
+    {
+      "epoch": 0.08151,
+      "grad_norm": 1.1924509745774654,
+      "learning_rate": 0.003,
+      "loss": 4.0557,
+      "step": 8151
+    },
+    {
+      "epoch": 0.08152,
+      "grad_norm": 1.094500106195963,
+      "learning_rate": 0.003,
+      "loss": 4.0562,
+      "step": 8152
+    },
+    {
+      "epoch": 0.08153,
+      "grad_norm": 1.004251942188992,
+      "learning_rate": 0.003,
+      "loss": 4.0309,
+      "step": 8153
+    },
+    {
+      "epoch": 0.08154,
+      "grad_norm": 1.1798560492422325,
+      "learning_rate": 0.003,
+      "loss": 4.0476,
+      "step": 8154
+    },
+    {
+      "epoch": 0.08155,
+      "grad_norm": 1.0157829602982544,
+      "learning_rate": 0.003,
+      "loss": 4.0627,
+      "step": 8155
+    },
+    {
+      "epoch": 0.08156,
+      "grad_norm": 1.2891395825570224,
+      "learning_rate": 0.003,
+      "loss": 4.0552,
+      "step": 8156
+    },
+    {
+      "epoch": 0.08157,
+      "grad_norm": 0.9232969874496607,
+      "learning_rate": 0.003,
+      "loss": 3.9949,
+      "step": 8157
+    },
+    {
+      "epoch": 0.08158,
+      "grad_norm": 1.0373678761694196,
+      "learning_rate": 0.003,
+      "loss": 4.0732,
+      "step": 8158
+    },
+    {
+      "epoch": 0.08159,
+      "grad_norm": 1.3218980216269214,
+      "learning_rate": 0.003,
+      "loss": 4.0782,
+      "step": 8159
+    },
+    {
+      "epoch": 0.0816,
+      "grad_norm": 0.9490086271587503,
+      "learning_rate": 0.003,
+      "loss": 4.0449,
+      "step": 8160
+    },
+    {
+      "epoch": 0.08161,
+      "grad_norm": 1.0833199103673854,
+      "learning_rate": 0.003,
+      "loss": 4.0509,
+      "step": 8161
+    },
+    {
+      "epoch": 0.08162,
+      "grad_norm": 1.2185544482985367,
+      "learning_rate": 0.003,
+      "loss": 4.0555,
+      "step": 8162
+    },
+    {
+      "epoch": 0.08163,
+      "grad_norm": 0.9763505701602616,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 8163
+    },
+    {
+      "epoch": 0.08164,
+      "grad_norm": 1.066961091347535,
+      "learning_rate": 0.003,
+      "loss": 4.0369,
+      "step": 8164
+    },
+    {
+      "epoch": 0.08165,
+      "grad_norm": 1.0509794131255215,
+      "learning_rate": 0.003,
+      "loss": 4.0405,
+      "step": 8165
+    },
+    {
+      "epoch": 0.08166,
+      "grad_norm": 1.0757821468791668,
+      "learning_rate": 0.003,
+      "loss": 4.0432,
+      "step": 8166
+    },
+    {
+      "epoch": 0.08167,
+      "grad_norm": 1.3397321654362855,
+      "learning_rate": 0.003,
+      "loss": 4.0483,
+      "step": 8167
+    },
+    {
+      "epoch": 0.08168,
+      "grad_norm": 1.1362149152066932,
+      "learning_rate": 0.003,
+      "loss": 4.0225,
+      "step": 8168
+    },
+    {
+      "epoch": 0.08169,
+      "grad_norm": 0.8763737391154698,
+      "learning_rate": 0.003,
+      "loss": 4.0496,
+      "step": 8169
+    },
+    {
+      "epoch": 0.0817,
+      "grad_norm": 0.8325229180364018,
+      "learning_rate": 0.003,
+      "loss": 4.0408,
+      "step": 8170
+    },
+    {
+      "epoch": 0.08171,
+      "grad_norm": 0.9119090572200499,
+      "learning_rate": 0.003,
+      "loss": 4.0311,
+      "step": 8171
+    },
+    {
+      "epoch": 0.08172,
+      "grad_norm": 0.9166054245530775,
+      "learning_rate": 0.003,
+      "loss": 4.0538,
+      "step": 8172
+    },
+    {
+      "epoch": 0.08173,
+      "grad_norm": 1.0922618916234996,
+      "learning_rate": 0.003,
+      "loss": 4.0787,
+      "step": 8173
+    },
+    {
+      "epoch": 0.08174,
+      "grad_norm": 1.0948674878262272,
+      "learning_rate": 0.003,
+      "loss": 4.0715,
+      "step": 8174
+    },
+    {
+      "epoch": 0.08175,
+      "grad_norm": 1.076487219894897,
+      "learning_rate": 0.003,
+      "loss": 4.0419,
+      "step": 8175
+    },
+    {
+      "epoch": 0.08176,
+      "grad_norm": 1.0595377058120148,
+      "learning_rate": 0.003,
+      "loss": 4.0157,
+      "step": 8176
+    },
+    {
+      "epoch": 0.08177,
+      "grad_norm": 1.1274198711334715,
+      "learning_rate": 0.003,
+      "loss": 4.034,
+      "step": 8177
+    },
+    {
+      "epoch": 0.08178,
+      "grad_norm": 1.0171853704517495,
+      "learning_rate": 0.003,
+      "loss": 4.0397,
+      "step": 8178
+    },
+    {
+      "epoch": 0.08179,
+      "grad_norm": 1.3061262539701504,
+      "learning_rate": 0.003,
+      "loss": 4.0733,
+      "step": 8179
+    },
+    {
+      "epoch": 0.0818,
+      "grad_norm": 0.9862361455218118,
+      "learning_rate": 0.003,
+      "loss": 4.0422,
+      "step": 8180
+    },
+    {
+      "epoch": 0.08181,
+      "grad_norm": 1.248761174376701,
+      "learning_rate": 0.003,
+      "loss": 4.0423,
+      "step": 8181
+    },
+    {
+      "epoch": 0.08182,
+      "grad_norm": 1.04459958881984,
+      "learning_rate": 0.003,
+      "loss": 4.0772,
+      "step": 8182
+    },
+    {
+      "epoch": 0.08183,
+      "grad_norm": 1.034490751276438,
+      "learning_rate": 0.003,
+      "loss": 4.036,
+      "step": 8183
+    },
+    {
+      "epoch": 0.08184,
+      "grad_norm": 1.3084515904264853,
+      "learning_rate": 0.003,
+      "loss": 4.0385,
+      "step": 8184
+    },
+    {
+      "epoch": 0.08185,
+      "grad_norm": 0.9554920895816984,
+      "learning_rate": 0.003,
+      "loss": 4.0509,
+      "step": 8185
+    },
+    {
+      "epoch": 0.08186,
+      "grad_norm": 1.0543602272809869,
+      "learning_rate": 0.003,
+      "loss": 4.0503,
+      "step": 8186
+    },
+    {
+      "epoch": 0.08187,
+      "grad_norm": 1.1260343697487754,
+      "learning_rate": 0.003,
+      "loss": 4.0225,
+      "step": 8187
+    },
+    {
+      "epoch": 0.08188,
+      "grad_norm": 1.0245037440997349,
+      "learning_rate": 0.003,
+      "loss": 4.068,
+      "step": 8188
+    },
+    {
+      "epoch": 0.08189,
+      "grad_norm": 1.0290894153169319,
+      "learning_rate": 0.003,
+      "loss": 4.0653,
+      "step": 8189
+    },
+    {
+      "epoch": 0.0819,
+      "grad_norm": 0.877806506800442,
+      "learning_rate": 0.003,
+      "loss": 4.0372,
+      "step": 8190
+    },
+    {
+      "epoch": 0.08191,
+      "grad_norm": 0.8759876139628601,
+      "learning_rate": 0.003,
+      "loss": 4.0356,
+      "step": 8191
+    },
+    {
+      "epoch": 0.08192,
+      "grad_norm": 1.0235597494522326,
+      "learning_rate": 0.003,
+      "loss": 4.0018,
+      "step": 8192
+    },
+    {
+      "epoch": 0.08193,
+      "grad_norm": 1.0680156272560604,
+      "learning_rate": 0.003,
+      "loss": 4.0457,
+      "step": 8193
+    },
+    {
+      "epoch": 0.08194,
+      "grad_norm": 0.9866872972933308,
+      "learning_rate": 0.003,
+      "loss": 4.0531,
+      "step": 8194
+    },
+    {
+      "epoch": 0.08195,
+      "grad_norm": 1.0454954573691113,
+      "learning_rate": 0.003,
+      "loss": 4.0347,
+      "step": 8195
+    },
+    {
+      "epoch": 0.08196,
+      "grad_norm": 0.878803096910163,
+      "learning_rate": 0.003,
+      "loss": 4.0606,
+      "step": 8196
+    },
+    {
+      "epoch": 0.08197,
+      "grad_norm": 1.097095395513429,
+      "learning_rate": 0.003,
+      "loss": 4.0459,
+      "step": 8197
+    },
+    {
+      "epoch": 0.08198,
+      "grad_norm": 1.4128502904759463,
+      "learning_rate": 0.003,
+      "loss": 4.0465,
+      "step": 8198
+    },
+    {
+      "epoch": 0.08199,
+      "grad_norm": 0.9349249325846235,
+      "learning_rate": 0.003,
+      "loss": 4.049,
+      "step": 8199
+    },
+    {
+      "epoch": 0.082,
+      "grad_norm": 1.1340924047935328,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 8200
+    },
+    {
+      "epoch": 0.08201,
+      "grad_norm": 1.04912739515356,
+      "learning_rate": 0.003,
+      "loss": 4.0373,
+      "step": 8201
+    },
+    {
+      "epoch": 0.08202,
+      "grad_norm": 1.0714645593844108,
+      "learning_rate": 0.003,
+      "loss": 4.0292,
+      "step": 8202
+    },
+    {
+      "epoch": 0.08203,
+      "grad_norm": 1.164096753838808,
+      "learning_rate": 0.003,
+      "loss": 4.0602,
+      "step": 8203
+    },
+    {
+      "epoch": 0.08204,
+      "grad_norm": 1.1316835410725452,
+      "learning_rate": 0.003,
+      "loss": 4.0466,
+      "step": 8204
+    },
+    {
+      "epoch": 0.08205,
+      "grad_norm": 1.2120673109706728,
+      "learning_rate": 0.003,
+      "loss": 4.045,
+      "step": 8205
+    },
+    {
+      "epoch": 0.08206,
+      "grad_norm": 1.11571994095571,
+      "learning_rate": 0.003,
+      "loss": 4.0773,
+      "step": 8206
+    },
+    {
+      "epoch": 0.08207,
+      "grad_norm": 1.0621146376384545,
+      "learning_rate": 0.003,
+      "loss": 4.0453,
+      "step": 8207
+    },
+    {
+      "epoch": 0.08208,
+      "grad_norm": 1.1747546800940614,
+      "learning_rate": 0.003,
+      "loss": 4.0471,
+      "step": 8208
+    },
+    {
+      "epoch": 0.08209,
+      "grad_norm": 0.8502432613824764,
+      "learning_rate": 0.003,
+      "loss": 4.0572,
+      "step": 8209
+    },
+    {
+      "epoch": 0.0821,
+      "grad_norm": 0.9220719569509254,
+      "learning_rate": 0.003,
+      "loss": 4.0433,
+      "step": 8210
+    },
+    {
+      "epoch": 0.08211,
+      "grad_norm": 1.2327782482092768,
+      "learning_rate": 0.003,
+      "loss": 4.0509,
+      "step": 8211
+    },
+    {
+      "epoch": 0.08212,
+      "grad_norm": 1.019547573968891,
+      "learning_rate": 0.003,
+      "loss": 4.0466,
+      "step": 8212
+    },
+    {
+      "epoch": 0.08213,
+      "grad_norm": 1.1264918883793147,
+      "learning_rate": 0.003,
+      "loss": 4.0323,
+      "step": 8213
+    },
+    {
+      "epoch": 0.08214,
+      "grad_norm": 1.0153304383270647,
+      "learning_rate": 0.003,
+      "loss": 4.0654,
+      "step": 8214
+    },
+    {
+      "epoch": 0.08215,
+      "grad_norm": 1.172513559366227,
+      "learning_rate": 0.003,
+      "loss": 4.0695,
+      "step": 8215
+    },
+    {
+      "epoch": 0.08216,
+      "grad_norm": 1.1761345458122565,
+      "learning_rate": 0.003,
+      "loss": 4.0461,
+      "step": 8216
+    },
+    {
+      "epoch": 0.08217,
+      "grad_norm": 1.0086245946196835,
+      "learning_rate": 0.003,
+      "loss": 4.0716,
+      "step": 8217
+    },
+    {
+      "epoch": 0.08218,
+      "grad_norm": 1.0192220496141946,
+      "learning_rate": 0.003,
+      "loss": 4.0355,
+      "step": 8218
+    },
+    {
+      "epoch": 0.08219,
+      "grad_norm": 1.1072279418839062,
+      "learning_rate": 0.003,
+      "loss": 4.0353,
+      "step": 8219
+    },
+    {
+      "epoch": 0.0822,
+      "grad_norm": 1.2129949090775702,
+      "learning_rate": 0.003,
+      "loss": 4.0497,
+      "step": 8220
+    },
+    {
+      "epoch": 0.08221,
+      "grad_norm": 1.335506660895218,
+      "learning_rate": 0.003,
+      "loss": 4.0702,
+      "step": 8221
+    },
+    {
+      "epoch": 0.08222,
+      "grad_norm": 0.9100821320950854,
+      "learning_rate": 0.003,
+      "loss": 4.0465,
+      "step": 8222
+    },
+    {
+      "epoch": 0.08223,
+      "grad_norm": 1.2359285375629427,
+      "learning_rate": 0.003,
+      "loss": 4.0633,
+      "step": 8223
+    },
+    {
+      "epoch": 0.08224,
+      "grad_norm": 1.1420581603411728,
+      "learning_rate": 0.003,
+      "loss": 4.0587,
+      "step": 8224
+    },
+    {
+      "epoch": 0.08225,
+      "grad_norm": 1.131941471315245,
+      "learning_rate": 0.003,
+      "loss": 4.0686,
+      "step": 8225
+    },
+    {
+      "epoch": 0.08226,
+      "grad_norm": 0.9193337169990431,
+      "learning_rate": 0.003,
+      "loss": 4.0273,
+      "step": 8226
+    },
+    {
+      "epoch": 0.08227,
+      "grad_norm": 0.9322903091927021,
+      "learning_rate": 0.003,
+      "loss": 4.0679,
+      "step": 8227
+    },
+    {
+      "epoch": 0.08228,
+      "grad_norm": 1.015271996777266,
+      "learning_rate": 0.003,
+      "loss": 4.0576,
+      "step": 8228
+    },
+    {
+      "epoch": 0.08229,
+      "grad_norm": 1.134812005676848,
+      "learning_rate": 0.003,
+      "loss": 4.025,
+      "step": 8229
+    },
+    {
+      "epoch": 0.0823,
+      "grad_norm": 0.9067833302627255,
+      "learning_rate": 0.003,
+      "loss": 4.0621,
+      "step": 8230
+    },
+    {
+      "epoch": 0.08231,
+      "grad_norm": 1.1844642289666762,
+      "learning_rate": 0.003,
+      "loss": 4.0212,
+      "step": 8231
+    },
+    {
+      "epoch": 0.08232,
+      "grad_norm": 1.074667737158904,
+      "learning_rate": 0.003,
+      "loss": 4.0322,
+      "step": 8232
+    },
+    {
+      "epoch": 0.08233,
+      "grad_norm": 0.9455096038944548,
+      "learning_rate": 0.003,
+      "loss": 4.0412,
+      "step": 8233
+    },
+    {
+      "epoch": 0.08234,
+      "grad_norm": 0.9451177013419999,
+      "learning_rate": 0.003,
+      "loss": 4.0534,
+      "step": 8234
+    },
+    {
+      "epoch": 0.08235,
+      "grad_norm": 0.9751741426641366,
+      "learning_rate": 0.003,
+      "loss": 4.0713,
+      "step": 8235
+    },
+    {
+      "epoch": 0.08236,
+      "grad_norm": 1.0139644613308296,
+      "learning_rate": 0.003,
+      "loss": 4.0275,
+      "step": 8236
+    },
+    {
+      "epoch": 0.08237,
+      "grad_norm": 1.1102447919282574,
+      "learning_rate": 0.003,
+      "loss": 4.0446,
+      "step": 8237
+    },
+    {
+      "epoch": 0.08238,
+      "grad_norm": 1.090996849580925,
+      "learning_rate": 0.003,
+      "loss": 4.0517,
+      "step": 8238
+    },
+    {
+      "epoch": 0.08239,
+      "grad_norm": 1.1072648040046784,
+      "learning_rate": 0.003,
+      "loss": 4.0273,
+      "step": 8239
+    },
+    {
+      "epoch": 0.0824,
+      "grad_norm": 0.963339170365707,
+      "learning_rate": 0.003,
+      "loss": 4.059,
+      "step": 8240
+    },
+    {
+      "epoch": 0.08241,
+      "grad_norm": 1.1416193830206067,
+      "learning_rate": 0.003,
+      "loss": 4.0451,
+      "step": 8241
+    },
+    {
+      "epoch": 0.08242,
+      "grad_norm": 1.1161581273297645,
+      "learning_rate": 0.003,
+      "loss": 4.0153,
+      "step": 8242
+    },
+    {
+      "epoch": 0.08243,
+      "grad_norm": 0.9836364537147602,
+      "learning_rate": 0.003,
+      "loss": 4.0614,
+      "step": 8243
+    },
+    {
+      "epoch": 0.08244,
+      "grad_norm": 1.0051947565508417,
+      "learning_rate": 0.003,
+      "loss": 4.0357,
+      "step": 8244
+    },
+    {
+      "epoch": 0.08245,
+      "grad_norm": 1.179881665824301,
+      "learning_rate": 0.003,
+      "loss": 4.0362,
+      "step": 8245
+    },
+    {
+      "epoch": 0.08246,
+      "grad_norm": 1.0994155612110759,
+      "learning_rate": 0.003,
+      "loss": 4.0381,
+      "step": 8246
+    },
+    {
+      "epoch": 0.08247,
+      "grad_norm": 1.1514528107070527,
+      "learning_rate": 0.003,
+      "loss": 4.0488,
+      "step": 8247
+    },
+    {
+      "epoch": 0.08248,
+      "grad_norm": 1.1175713935986686,
+      "learning_rate": 0.003,
+      "loss": 4.0343,
+      "step": 8248
+    },
+    {
+      "epoch": 0.08249,
+      "grad_norm": 1.022832537141348,
+      "learning_rate": 0.003,
+      "loss": 4.056,
+      "step": 8249
+    },
+    {
+      "epoch": 0.0825,
+      "grad_norm": 1.2208883258896968,
+      "learning_rate": 0.003,
+      "loss": 4.035,
+      "step": 8250
+    },
+    {
+      "epoch": 0.08251,
+      "grad_norm": 0.9539194420518737,
+      "learning_rate": 0.003,
+      "loss": 4.0491,
+      "step": 8251
+    },
+    {
+      "epoch": 0.08252,
+      "grad_norm": 1.0155458495353762,
+      "learning_rate": 0.003,
+      "loss": 4.0536,
+      "step": 8252
+    },
+    {
+      "epoch": 0.08253,
+      "grad_norm": 1.241505078520073,
+      "learning_rate": 0.003,
+      "loss": 4.0488,
+      "step": 8253
+    },
+    {
+      "epoch": 0.08254,
+      "grad_norm": 0.9520715771435223,
+      "learning_rate": 0.003,
+      "loss": 4.0263,
+      "step": 8254
+    },
+    {
+      "epoch": 0.08255,
+      "grad_norm": 1.024935048858687,
+      "learning_rate": 0.003,
+      "loss": 4.0236,
+      "step": 8255
+    },
+    {
+      "epoch": 0.08256,
+      "grad_norm": 1.233328543047205,
+      "learning_rate": 0.003,
+      "loss": 4.0637,
+      "step": 8256
+    },
+    {
+      "epoch": 0.08257,
+      "grad_norm": 1.190460709783594,
+      "learning_rate": 0.003,
+      "loss": 4.0371,
+      "step": 8257
+    },
+    {
+      "epoch": 0.08258,
+      "grad_norm": 1.0367792756754817,
+      "learning_rate": 0.003,
+      "loss": 4.0349,
+      "step": 8258
+    },
+    {
+      "epoch": 0.08259,
+      "grad_norm": 1.169563709355769,
+      "learning_rate": 0.003,
+      "loss": 4.0764,
+      "step": 8259
+    },
+    {
+      "epoch": 0.0826,
+      "grad_norm": 0.861527127229618,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 8260
+    },
+    {
+      "epoch": 0.08261,
+      "grad_norm": 0.8201507958780685,
+      "learning_rate": 0.003,
+      "loss": 4.028,
+      "step": 8261
+    },
+    {
+      "epoch": 0.08262,
+      "grad_norm": 0.9386291752296059,
+      "learning_rate": 0.003,
+      "loss": 4.0684,
+      "step": 8262
+    },
+    {
+      "epoch": 0.08263,
+      "grad_norm": 1.0230682657821621,
+      "learning_rate": 0.003,
+      "loss": 4.036,
+      "step": 8263
+    },
+    {
+      "epoch": 0.08264,
+      "grad_norm": 1.2158135102547172,
+      "learning_rate": 0.003,
+      "loss": 4.0568,
+      "step": 8264
+    },
+    {
+      "epoch": 0.08265,
+      "grad_norm": 0.9858027331224795,
+      "learning_rate": 0.003,
+      "loss": 4.0398,
+      "step": 8265
+    },
+    {
+      "epoch": 0.08266,
+      "grad_norm": 1.1514602100198823,
+      "learning_rate": 0.003,
+      "loss": 4.0488,
+      "step": 8266
+    },
+    {
+      "epoch": 0.08267,
+      "grad_norm": 1.19867896784868,
+      "learning_rate": 0.003,
+      "loss": 4.0507,
+      "step": 8267
+    },
+    {
+      "epoch": 0.08268,
+      "grad_norm": 0.8924446930144582,
+      "learning_rate": 0.003,
+      "loss": 4.0265,
+      "step": 8268
+    },
+    {
+      "epoch": 0.08269,
+      "grad_norm": 0.9332387923779609,
+      "learning_rate": 0.003,
+      "loss": 4.0595,
+      "step": 8269
+    },
+    {
+      "epoch": 0.0827,
+      "grad_norm": 1.0395207333244374,
+      "learning_rate": 0.003,
+      "loss": 4.036,
+      "step": 8270
+    },
+    {
+      "epoch": 0.08271,
+      "grad_norm": 1.1217717636669475,
+      "learning_rate": 0.003,
+      "loss": 4.058,
+      "step": 8271
+    },
+    {
+      "epoch": 0.08272,
+      "grad_norm": 1.158015224233601,
+      "learning_rate": 0.003,
+      "loss": 4.049,
+      "step": 8272
+    },
+    {
+      "epoch": 0.08273,
+      "grad_norm": 1.2991517374950396,
+      "learning_rate": 0.003,
+      "loss": 4.0446,
+      "step": 8273
+    },
+    {
+      "epoch": 0.08274,
+      "grad_norm": 1.081398113932392,
+      "learning_rate": 0.003,
+      "loss": 4.059,
+      "step": 8274
+    },
+    {
+      "epoch": 0.08275,
+      "grad_norm": 1.1176536482392028,
+      "learning_rate": 0.003,
+      "loss": 4.0382,
+      "step": 8275
+    },
+    {
+      "epoch": 0.08276,
+      "grad_norm": 0.9350722584845963,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 8276
+    },
+    {
+      "epoch": 0.08277,
+      "grad_norm": 0.9894919434780528,
+      "learning_rate": 0.003,
+      "loss": 4.0223,
+      "step": 8277
+    },
+    {
+      "epoch": 0.08278,
+      "grad_norm": 1.1769258157634674,
+      "learning_rate": 0.003,
+      "loss": 4.0377,
+      "step": 8278
+    },
+    {
+      "epoch": 0.08279,
+      "grad_norm": 0.9433131457871408,
+      "learning_rate": 0.003,
+      "loss": 4.0338,
+      "step": 8279
+    },
+    {
+      "epoch": 0.0828,
+      "grad_norm": 0.9576999263892194,
+      "learning_rate": 0.003,
+      "loss": 4.062,
+      "step": 8280
+    },
+    {
+      "epoch": 0.08281,
+      "grad_norm": 1.209178534584828,
+      "learning_rate": 0.003,
+      "loss": 4.0317,
+      "step": 8281
+    },
+    {
+      "epoch": 0.08282,
+      "grad_norm": 0.9614787804606547,
+      "learning_rate": 0.003,
+      "loss": 4.0349,
+      "step": 8282
+    },
+    {
+      "epoch": 0.08283,
+      "grad_norm": 1.0971465514681484,
+      "learning_rate": 0.003,
+      "loss": 4.0525,
+      "step": 8283
+    },
+    {
+      "epoch": 0.08284,
+      "grad_norm": 1.0107622943623467,
+      "learning_rate": 0.003,
+      "loss": 4.0334,
+      "step": 8284
+    },
+    {
+      "epoch": 0.08285,
+      "grad_norm": 1.15514741446281,
+      "learning_rate": 0.003,
+      "loss": 4.0529,
+      "step": 8285
+    },
+    {
+      "epoch": 0.08286,
+      "grad_norm": 1.0676521040179816,
+      "learning_rate": 0.003,
+      "loss": 4.065,
+      "step": 8286
+    },
+    {
+      "epoch": 0.08287,
+      "grad_norm": 1.2393097291371007,
+      "learning_rate": 0.003,
+      "loss": 4.0455,
+      "step": 8287
+    },
+    {
+      "epoch": 0.08288,
+      "grad_norm": 1.229284881723444,
+      "learning_rate": 0.003,
+      "loss": 4.0315,
+      "step": 8288
+    },
+    {
+      "epoch": 0.08289,
+      "grad_norm": 0.8030678208057539,
+      "learning_rate": 0.003,
+      "loss": 4.043,
+      "step": 8289
+    },
+    {
+      "epoch": 0.0829,
+      "grad_norm": 0.9234071041203235,
+      "learning_rate": 0.003,
+      "loss": 4.0207,
+      "step": 8290
+    },
+    {
+      "epoch": 0.08291,
+      "grad_norm": 1.2192535372086108,
+      "learning_rate": 0.003,
+      "loss": 4.0383,
+      "step": 8291
+    },
+    {
+      "epoch": 0.08292,
+      "grad_norm": 1.0516037727758725,
+      "learning_rate": 0.003,
+      "loss": 4.0395,
+      "step": 8292
+    },
+    {
+      "epoch": 0.08293,
+      "grad_norm": 0.964068579815504,
+      "learning_rate": 0.003,
+      "loss": 4.0504,
+      "step": 8293
+    },
+    {
+      "epoch": 0.08294,
+      "grad_norm": 1.07685445953297,
+      "learning_rate": 0.003,
+      "loss": 4.0381,
+      "step": 8294
+    },
+    {
+      "epoch": 0.08295,
+      "grad_norm": 1.2651219299645922,
+      "learning_rate": 0.003,
+      "loss": 4.042,
+      "step": 8295
+    },
+    {
+      "epoch": 0.08296,
+      "grad_norm": 0.9626687885048552,
+      "learning_rate": 0.003,
+      "loss": 4.0367,
+      "step": 8296
+    },
+    {
+      "epoch": 0.08297,
+      "grad_norm": 0.9687755374419604,
+      "learning_rate": 0.003,
+      "loss": 4.0473,
+      "step": 8297
+    },
+    {
+      "epoch": 0.08298,
+      "grad_norm": 1.3012272212633311,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 8298
+    },
+    {
+      "epoch": 0.08299,
+      "grad_norm": 0.9471340632405811,
+      "learning_rate": 0.003,
+      "loss": 4.0364,
+      "step": 8299
+    },
+    {
+      "epoch": 0.083,
+      "grad_norm": 1.1889456485828054,
+      "learning_rate": 0.003,
+      "loss": 4.078,
+      "step": 8300
+    },
+    {
+      "epoch": 0.08301,
+      "grad_norm": 1.0926624866225692,
+      "learning_rate": 0.003,
+      "loss": 4.0576,
+      "step": 8301
+    },
+    {
+      "epoch": 0.08302,
+      "grad_norm": 1.061747983105285,
+      "learning_rate": 0.003,
+      "loss": 4.0669,
+      "step": 8302
+    },
+    {
+      "epoch": 0.08303,
+      "grad_norm": 1.2124144409859756,
+      "learning_rate": 0.003,
+      "loss": 4.0409,
+      "step": 8303
+    },
+    {
+      "epoch": 0.08304,
+      "grad_norm": 1.0062093800648917,
+      "learning_rate": 0.003,
+      "loss": 4.0666,
+      "step": 8304
+    },
+    {
+      "epoch": 0.08305,
+      "grad_norm": 1.0797635537801393,
+      "learning_rate": 0.003,
+      "loss": 4.0375,
+      "step": 8305
+    },
+    {
+      "epoch": 0.08306,
+      "grad_norm": 1.2165041896744566,
+      "learning_rate": 0.003,
+      "loss": 4.0521,
+      "step": 8306
+    },
+    {
+      "epoch": 0.08307,
+      "grad_norm": 0.9371964802211156,
+      "learning_rate": 0.003,
+      "loss": 4.0348,
+      "step": 8307
+    },
+    {
+      "epoch": 0.08308,
+      "grad_norm": 1.0824545701121477,
+      "learning_rate": 0.003,
+      "loss": 4.0232,
+      "step": 8308
+    },
+    {
+      "epoch": 0.08309,
+      "grad_norm": 1.0702785761306515,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 8309
+    },
+    {
+      "epoch": 0.0831,
+      "grad_norm": 0.9850867353661309,
+      "learning_rate": 0.003,
+      "loss": 4.0465,
+      "step": 8310
+    },
+    {
+      "epoch": 0.08311,
+      "grad_norm": 1.059401767428526,
+      "learning_rate": 0.003,
+      "loss": 4.0429,
+      "step": 8311
+    },
+    {
+      "epoch": 0.08312,
+      "grad_norm": 1.1525254824845346,
+      "learning_rate": 0.003,
+      "loss": 4.0705,
+      "step": 8312
+    },
+    {
+      "epoch": 0.08313,
+      "grad_norm": 0.8651922401808925,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 8313
+    },
+    {
+      "epoch": 0.08314,
+      "grad_norm": 1.0602609432745576,
+      "learning_rate": 0.003,
+      "loss": 4.0595,
+      "step": 8314
+    },
+    {
+      "epoch": 0.08315,
+      "grad_norm": 0.8182680455954303,
+      "learning_rate": 0.003,
+      "loss": 4.0078,
+      "step": 8315
+    },
+    {
+      "epoch": 0.08316,
+      "grad_norm": 0.898921435803555,
+      "learning_rate": 0.003,
+      "loss": 4.0305,
+      "step": 8316
+    },
+    {
+      "epoch": 0.08317,
+      "grad_norm": 1.262794585028179,
+      "learning_rate": 0.003,
+      "loss": 4.0334,
+      "step": 8317
+    },
+    {
+      "epoch": 0.08318,
+      "grad_norm": 1.215745444075188,
+      "learning_rate": 0.003,
+      "loss": 4.0311,
+      "step": 8318
+    },
+    {
+      "epoch": 0.08319,
+      "grad_norm": 1.019233573877823,
+      "learning_rate": 0.003,
+      "loss": 4.0213,
+      "step": 8319
+    },
+    {
+      "epoch": 0.0832,
+      "grad_norm": 1.0398703718664923,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 8320
+    },
+    {
+      "epoch": 0.08321,
+      "grad_norm": 1.1361754781004614,
+      "learning_rate": 0.003,
+      "loss": 4.0539,
+      "step": 8321
+    },
+    {
+      "epoch": 0.08322,
+      "grad_norm": 0.9093568729986535,
+      "learning_rate": 0.003,
+      "loss": 4.0267,
+      "step": 8322
+    },
+    {
+      "epoch": 0.08323,
+      "grad_norm": 1.1369192324437696,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 8323
+    },
+    {
+      "epoch": 0.08324,
+      "grad_norm": 1.217916598721158,
+      "learning_rate": 0.003,
+      "loss": 4.0473,
+      "step": 8324
+    },
+    {
+      "epoch": 0.08325,
+      "grad_norm": 1.0915850365976298,
+      "learning_rate": 0.003,
+      "loss": 4.051,
+      "step": 8325
+    },
+    {
+      "epoch": 0.08326,
+      "grad_norm": 1.1565123938632185,
+      "learning_rate": 0.003,
+      "loss": 4.0399,
+      "step": 8326
+    },
+    {
+      "epoch": 0.08327,
+      "grad_norm": 0.8586661169517376,
+      "learning_rate": 0.003,
+      "loss": 4.0266,
+      "step": 8327
+    },
+    {
+      "epoch": 0.08328,
+      "grad_norm": 0.8431082244999714,
+      "learning_rate": 0.003,
+      "loss": 4.0308,
+      "step": 8328
+    },
+    {
+      "epoch": 0.08329,
+      "grad_norm": 0.831022312396331,
+      "learning_rate": 0.003,
+      "loss": 4.0229,
+      "step": 8329
+    },
+    {
+      "epoch": 0.0833,
+      "grad_norm": 0.8854225855827627,
+      "learning_rate": 0.003,
+      "loss": 4.0399,
+      "step": 8330
+    },
+    {
+      "epoch": 0.08331,
+      "grad_norm": 0.9489920933886857,
+      "learning_rate": 0.003,
+      "loss": 4.025,
+      "step": 8331
+    },
+    {
+      "epoch": 0.08332,
+      "grad_norm": 1.0014721691993675,
+      "learning_rate": 0.003,
+      "loss": 4.0475,
+      "step": 8332
+    },
+    {
+      "epoch": 0.08333,
+      "grad_norm": 1.1572373116887549,
+      "learning_rate": 0.003,
+      "loss": 4.0546,
+      "step": 8333
+    },
+    {
+      "epoch": 0.08334,
+      "grad_norm": 1.0696738130518826,
+      "learning_rate": 0.003,
+      "loss": 4.0505,
+      "step": 8334
+    },
+    {
+      "epoch": 0.08335,
+      "grad_norm": 1.134011488039698,
+      "learning_rate": 0.003,
+      "loss": 4.0444,
+      "step": 8335
+    },
+    {
+      "epoch": 0.08336,
+      "grad_norm": 1.171624442165235,
+      "learning_rate": 0.003,
+      "loss": 4.0712,
+      "step": 8336
+    },
+    {
+      "epoch": 0.08337,
+      "grad_norm": 0.9808780299153776,
+      "learning_rate": 0.003,
+      "loss": 4.0556,
+      "step": 8337
+    },
+    {
+      "epoch": 0.08338,
+      "grad_norm": 1.1552726886292524,
+      "learning_rate": 0.003,
+      "loss": 4.0451,
+      "step": 8338
+    },
+    {
+      "epoch": 0.08339,
+      "grad_norm": 1.119935444567191,
+      "learning_rate": 0.003,
+      "loss": 4.0432,
+      "step": 8339
+    },
+    {
+      "epoch": 0.0834,
+      "grad_norm": 1.1254896342797662,
+      "learning_rate": 0.003,
+      "loss": 4.0358,
+      "step": 8340
+    },
+    {
+      "epoch": 0.08341,
+      "grad_norm": 1.1341861913032731,
+      "learning_rate": 0.003,
+      "loss": 4.0683,
+      "step": 8341
+    },
+    {
+      "epoch": 0.08342,
+      "grad_norm": 1.025162475919914,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 8342
+    },
+    {
+      "epoch": 0.08343,
+      "grad_norm": 1.205937843754898,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 8343
+    },
+    {
+      "epoch": 0.08344,
+      "grad_norm": 0.9902520923365202,
+      "learning_rate": 0.003,
+      "loss": 4.036,
+      "step": 8344
+    },
+    {
+      "epoch": 0.08345,
+      "grad_norm": 1.288987691741149,
+      "learning_rate": 0.003,
+      "loss": 4.0358,
+      "step": 8345
+    },
+    {
+      "epoch": 0.08346,
+      "grad_norm": 0.9551763232371178,
+      "learning_rate": 0.003,
+      "loss": 4.046,
+      "step": 8346
+    },
+    {
+      "epoch": 0.08347,
+      "grad_norm": 0.9880135377663202,
+      "learning_rate": 0.003,
+      "loss": 4.043,
+      "step": 8347
+    },
+    {
+      "epoch": 0.08348,
+      "grad_norm": 1.2888100524027102,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 8348
+    },
+    {
+      "epoch": 0.08349,
+      "grad_norm": 0.9525381368631001,
+      "learning_rate": 0.003,
+      "loss": 4.0517,
+      "step": 8349
+    },
+    {
+      "epoch": 0.0835,
+      "grad_norm": 1.1902573539688603,
+      "learning_rate": 0.003,
+      "loss": 4.0782,
+      "step": 8350
+    },
+    {
+      "epoch": 0.08351,
+      "grad_norm": 1.0469716149508292,
+      "learning_rate": 0.003,
+      "loss": 4.0642,
+      "step": 8351
+    },
+    {
+      "epoch": 0.08352,
+      "grad_norm": 1.189680771320039,
+      "learning_rate": 0.003,
+      "loss": 4.0617,
+      "step": 8352
+    },
+    {
+      "epoch": 0.08353,
+      "grad_norm": 0.9911168111334406,
+      "learning_rate": 0.003,
+      "loss": 4.0421,
+      "step": 8353
+    },
+    {
+      "epoch": 0.08354,
+      "grad_norm": 1.0276631861654357,
+      "learning_rate": 0.003,
+      "loss": 4.0469,
+      "step": 8354
+    },
+    {
+      "epoch": 0.08355,
+      "grad_norm": 0.9849728479941415,
+      "learning_rate": 0.003,
+      "loss": 4.0212,
+      "step": 8355
+    },
+    {
+      "epoch": 0.08356,
+      "grad_norm": 1.01503709707315,
+      "learning_rate": 0.003,
+      "loss": 4.0456,
+      "step": 8356
+    },
+    {
+      "epoch": 0.08357,
+      "grad_norm": 1.2200149399048952,
+      "learning_rate": 0.003,
+      "loss": 4.0573,
+      "step": 8357
+    },
+    {
+      "epoch": 0.08358,
+      "grad_norm": 0.9309969506083857,
+      "learning_rate": 0.003,
+      "loss": 4.0413,
+      "step": 8358
+    },
+    {
+      "epoch": 0.08359,
+      "grad_norm": 1.1903852595304782,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 8359
+    },
+    {
+      "epoch": 0.0836,
+      "grad_norm": 1.1339893715722242,
+      "learning_rate": 0.003,
+      "loss": 4.0266,
+      "step": 8360
+    },
+    {
+      "epoch": 0.08361,
+      "grad_norm": 1.0643831400781818,
+      "learning_rate": 0.003,
+      "loss": 4.0443,
+      "step": 8361
+    },
+    {
+      "epoch": 0.08362,
+      "grad_norm": 1.249532330084765,
+      "learning_rate": 0.003,
+      "loss": 4.0648,
+      "step": 8362
+    },
+    {
+      "epoch": 0.08363,
+      "grad_norm": 0.8661252025928181,
+      "learning_rate": 0.003,
+      "loss": 4.0659,
+      "step": 8363
+    },
+    {
+      "epoch": 0.08364,
+      "grad_norm": 0.90648079035645,
+      "learning_rate": 0.003,
+      "loss": 4.0288,
+      "step": 8364
+    },
+    {
+      "epoch": 0.08365,
+      "grad_norm": 0.9973227794538887,
+      "learning_rate": 0.003,
+      "loss": 4.0499,
+      "step": 8365
+    },
+    {
+      "epoch": 0.08366,
+      "grad_norm": 1.185845321832341,
+      "learning_rate": 0.003,
+      "loss": 4.0641,
+      "step": 8366
+    },
+    {
+      "epoch": 0.08367,
+      "grad_norm": 1.0527110344332224,
+      "learning_rate": 0.003,
+      "loss": 4.038,
+      "step": 8367
+    },
+    {
+      "epoch": 0.08368,
+      "grad_norm": 1.1213177889830799,
+      "learning_rate": 0.003,
+      "loss": 4.0593,
+      "step": 8368
+    },
+    {
+      "epoch": 0.08369,
+      "grad_norm": 0.8532852276611194,
+      "learning_rate": 0.003,
+      "loss": 4.0386,
+      "step": 8369
+    },
+    {
+      "epoch": 0.0837,
+      "grad_norm": 0.9223226425587234,
+      "learning_rate": 0.003,
+      "loss": 4.0372,
+      "step": 8370
+    },
+    {
+      "epoch": 0.08371,
+      "grad_norm": 1.0781243641141955,
+      "learning_rate": 0.003,
+      "loss": 4.0482,
+      "step": 8371
+    },
+    {
+      "epoch": 0.08372,
+      "grad_norm": 0.9339538120598732,
+      "learning_rate": 0.003,
+      "loss": 4.0613,
+      "step": 8372
+    },
+    {
+      "epoch": 0.08373,
+      "grad_norm": 1.4452876155141794,
+      "learning_rate": 0.003,
+      "loss": 4.0518,
+      "step": 8373
+    },
+    {
+      "epoch": 0.08374,
+      "grad_norm": 0.9063853871722677,
+      "learning_rate": 0.003,
+      "loss": 4.024,
+      "step": 8374
+    },
+    {
+      "epoch": 0.08375,
+      "grad_norm": 0.993278332875694,
+      "learning_rate": 0.003,
+      "loss": 4.0292,
+      "step": 8375
+    },
+    {
+      "epoch": 0.08376,
+      "grad_norm": 1.1855350373423543,
+      "learning_rate": 0.003,
+      "loss": 4.0372,
+      "step": 8376
+    },
+    {
+      "epoch": 0.08377,
+      "grad_norm": 1.1147746898162996,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 8377
+    },
+    {
+      "epoch": 0.08378,
+      "grad_norm": 1.138214657903677,
+      "learning_rate": 0.003,
+      "loss": 4.0465,
+      "step": 8378
+    },
+    {
+      "epoch": 0.08379,
+      "grad_norm": 1.2479011609022754,
+      "learning_rate": 0.003,
+      "loss": 4.0424,
+      "step": 8379
+    },
+    {
+      "epoch": 0.0838,
+      "grad_norm": 1.1416308067638565,
+      "learning_rate": 0.003,
+      "loss": 4.0391,
+      "step": 8380
+    },
+    {
+      "epoch": 0.08381,
+      "grad_norm": 0.8970625864687897,
+      "learning_rate": 0.003,
+      "loss": 4.0395,
+      "step": 8381
+    },
+    {
+      "epoch": 0.08382,
+      "grad_norm": 1.1310709176053892,
+      "learning_rate": 0.003,
+      "loss": 4.0561,
+      "step": 8382
+    },
+    {
+      "epoch": 0.08383,
+      "grad_norm": 1.2920665241918188,
+      "learning_rate": 0.003,
+      "loss": 4.028,
+      "step": 8383
+    },
+    {
+      "epoch": 0.08384,
+      "grad_norm": 1.0350723875968093,
+      "learning_rate": 0.003,
+      "loss": 4.0264,
+      "step": 8384
+    },
+    {
+      "epoch": 0.08385,
+      "grad_norm": 1.103275398901523,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 8385
+    },
+    {
+      "epoch": 0.08386,
+      "grad_norm": 0.9681045513350701,
+      "learning_rate": 0.003,
+      "loss": 4.0381,
+      "step": 8386
+    },
+    {
+      "epoch": 0.08387,
+      "grad_norm": 1.355083711142688,
+      "learning_rate": 0.003,
+      "loss": 4.0191,
+      "step": 8387
+    },
+    {
+      "epoch": 0.08388,
+      "grad_norm": 0.7934403977447374,
+      "learning_rate": 0.003,
+      "loss": 4.0068,
+      "step": 8388
+    },
+    {
+      "epoch": 0.08389,
+      "grad_norm": 0.9516574206564951,
+      "learning_rate": 0.003,
+      "loss": 4.0549,
+      "step": 8389
+    },
+    {
+      "epoch": 0.0839,
+      "grad_norm": 1.1290072369116717,
+      "learning_rate": 0.003,
+      "loss": 4.0229,
+      "step": 8390
+    },
+    {
+      "epoch": 0.08391,
+      "grad_norm": 1.0983041546433672,
+      "learning_rate": 0.003,
+      "loss": 4.0608,
+      "step": 8391
+    },
+    {
+      "epoch": 0.08392,
+      "grad_norm": 1.1676787780611126,
+      "learning_rate": 0.003,
+      "loss": 4.0307,
+      "step": 8392
+    },
+    {
+      "epoch": 0.08393,
+      "grad_norm": 0.941880845010308,
+      "learning_rate": 0.003,
+      "loss": 4.0559,
+      "step": 8393
+    },
+    {
+      "epoch": 0.08394,
+      "grad_norm": 1.0752262942555577,
+      "learning_rate": 0.003,
+      "loss": 4.0154,
+      "step": 8394
+    },
+    {
+      "epoch": 0.08395,
+      "grad_norm": 1.0717439798578736,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 8395
+    },
+    {
+      "epoch": 0.08396,
+      "grad_norm": 0.923502718166825,
+      "learning_rate": 0.003,
+      "loss": 4.0538,
+      "step": 8396
+    },
+    {
+      "epoch": 0.08397,
+      "grad_norm": 1.03568380249952,
+      "learning_rate": 0.003,
+      "loss": 4.0272,
+      "step": 8397
+    },
+    {
+      "epoch": 0.08398,
+      "grad_norm": 1.2308296952539537,
+      "learning_rate": 0.003,
+      "loss": 4.0489,
+      "step": 8398
+    },
+    {
+      "epoch": 0.08399,
+      "grad_norm": 0.9074167959796567,
+      "learning_rate": 0.003,
+      "loss": 4.0306,
+      "step": 8399
+    },
+    {
+      "epoch": 0.084,
+      "grad_norm": 1.0676950120843756,
+      "learning_rate": 0.003,
+      "loss": 4.0105,
+      "step": 8400
+    },
+    {
+      "epoch": 0.08401,
+      "grad_norm": 1.1876371217178374,
+      "learning_rate": 0.003,
+      "loss": 4.0245,
+      "step": 8401
+    },
+    {
+      "epoch": 0.08402,
+      "grad_norm": 0.9744971214519869,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 8402
+    },
+    {
+      "epoch": 0.08403,
+      "grad_norm": 1.1750219156987762,
+      "learning_rate": 0.003,
+      "loss": 4.0348,
+      "step": 8403
+    },
+    {
+      "epoch": 0.08404,
+      "grad_norm": 1.0407406867360138,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 8404
+    },
+    {
+      "epoch": 0.08405,
+      "grad_norm": 1.2236517472521589,
+      "learning_rate": 0.003,
+      "loss": 4.0291,
+      "step": 8405
+    },
+    {
+      "epoch": 0.08406,
+      "grad_norm": 1.0737329492962817,
+      "learning_rate": 0.003,
+      "loss": 4.0141,
+      "step": 8406
+    },
+    {
+      "epoch": 0.08407,
+      "grad_norm": 1.1563501503982763,
+      "learning_rate": 0.003,
+      "loss": 4.0929,
+      "step": 8407
+    },
+    {
+      "epoch": 0.08408,
+      "grad_norm": 1.151672035981857,
+      "learning_rate": 0.003,
+      "loss": 4.0174,
+      "step": 8408
+    },
+    {
+      "epoch": 0.08409,
+      "grad_norm": 1.052519356480212,
+      "learning_rate": 0.003,
+      "loss": 4.0561,
+      "step": 8409
+    },
+    {
+      "epoch": 0.0841,
+      "grad_norm": 1.3255584980401665,
+      "learning_rate": 0.003,
+      "loss": 4.0595,
+      "step": 8410
+    },
+    {
+      "epoch": 0.08411,
+      "grad_norm": 0.7725053175360581,
+      "learning_rate": 0.003,
+      "loss": 4.0752,
+      "step": 8411
+    },
+    {
+      "epoch": 0.08412,
+      "grad_norm": 0.9932518870157506,
+      "learning_rate": 0.003,
+      "loss": 4.0531,
+      "step": 8412
+    },
+    {
+      "epoch": 0.08413,
+      "grad_norm": 1.3097609444125302,
+      "learning_rate": 0.003,
+      "loss": 4.0236,
+      "step": 8413
+    },
+    {
+      "epoch": 0.08414,
+      "grad_norm": 0.8806084620399991,
+      "learning_rate": 0.003,
+      "loss": 4.057,
+      "step": 8414
+    },
+    {
+      "epoch": 0.08415,
+      "grad_norm": 1.1049915504817205,
+      "learning_rate": 0.003,
+      "loss": 4.0198,
+      "step": 8415
+    },
+    {
+      "epoch": 0.08416,
+      "grad_norm": 1.0825647357972656,
+      "learning_rate": 0.003,
+      "loss": 4.0553,
+      "step": 8416
+    },
+    {
+      "epoch": 0.08417,
+      "grad_norm": 0.982855881055315,
+      "learning_rate": 0.003,
+      "loss": 4.0496,
+      "step": 8417
+    },
+    {
+      "epoch": 0.08418,
+      "grad_norm": 0.9521266964049695,
+      "learning_rate": 0.003,
+      "loss": 4.033,
+      "step": 8418
+    },
+    {
+      "epoch": 0.08419,
+      "grad_norm": 0.9807370150605913,
+      "learning_rate": 0.003,
+      "loss": 4.0458,
+      "step": 8419
+    },
+    {
+      "epoch": 0.0842,
+      "grad_norm": 1.1284553332406668,
+      "learning_rate": 0.003,
+      "loss": 4.0367,
+      "step": 8420
+    },
+    {
+      "epoch": 0.08421,
+      "grad_norm": 1.0701358935667225,
+      "learning_rate": 0.003,
+      "loss": 4.0194,
+      "step": 8421
+    },
+    {
+      "epoch": 0.08422,
+      "grad_norm": 1.110363256131976,
+      "learning_rate": 0.003,
+      "loss": 4.0577,
+      "step": 8422
+    },
+    {
+      "epoch": 0.08423,
+      "grad_norm": 1.0301265941795035,
+      "learning_rate": 0.003,
+      "loss": 4.0428,
+      "step": 8423
+    },
+    {
+      "epoch": 0.08424,
+      "grad_norm": 1.2241888249026445,
+      "learning_rate": 0.003,
+      "loss": 4.0417,
+      "step": 8424
+    },
+    {
+      "epoch": 0.08425,
+      "grad_norm": 1.2132223786728582,
+      "learning_rate": 0.003,
+      "loss": 4.0585,
+      "step": 8425
+    },
+    {
+      "epoch": 0.08426,
+      "grad_norm": 1.053345472153612,
+      "learning_rate": 0.003,
+      "loss": 4.0372,
+      "step": 8426
+    },
+    {
+      "epoch": 0.08427,
+      "grad_norm": 1.062648403306003,
+      "learning_rate": 0.003,
+      "loss": 4.0609,
+      "step": 8427
+    },
+    {
+      "epoch": 0.08428,
+      "grad_norm": 0.9969338637013739,
+      "learning_rate": 0.003,
+      "loss": 4.044,
+      "step": 8428
+    },
+    {
+      "epoch": 0.08429,
+      "grad_norm": 1.1167478557553572,
+      "learning_rate": 0.003,
+      "loss": 4.0367,
+      "step": 8429
+    },
+    {
+      "epoch": 0.0843,
+      "grad_norm": 0.9976920929387573,
+      "learning_rate": 0.003,
+      "loss": 4.0128,
+      "step": 8430
+    },
+    {
+      "epoch": 0.08431,
+      "grad_norm": 1.068620820351791,
+      "learning_rate": 0.003,
+      "loss": 4.0322,
+      "step": 8431
+    },
+    {
+      "epoch": 0.08432,
+      "grad_norm": 1.055723055328784,
+      "learning_rate": 0.003,
+      "loss": 4.0191,
+      "step": 8432
+    },
+    {
+      "epoch": 0.08433,
+      "grad_norm": 1.135026949998236,
+      "learning_rate": 0.003,
+      "loss": 4.0574,
+      "step": 8433
+    },
+    {
+      "epoch": 0.08434,
+      "grad_norm": 1.046614082716392,
+      "learning_rate": 0.003,
+      "loss": 4.0093,
+      "step": 8434
+    },
+    {
+      "epoch": 0.08435,
+      "grad_norm": 1.2281345462765678,
+      "learning_rate": 0.003,
+      "loss": 4.0466,
+      "step": 8435
+    },
+    {
+      "epoch": 0.08436,
+      "grad_norm": 1.0815704353084468,
+      "learning_rate": 0.003,
+      "loss": 4.0451,
+      "step": 8436
+    },
+    {
+      "epoch": 0.08437,
+      "grad_norm": 1.0822120658950827,
+      "learning_rate": 0.003,
+      "loss": 4.0537,
+      "step": 8437
+    },
+    {
+      "epoch": 0.08438,
+      "grad_norm": 1.0610781680082266,
+      "learning_rate": 0.003,
+      "loss": 4.039,
+      "step": 8438
+    },
+    {
+      "epoch": 0.08439,
+      "grad_norm": 1.1633949783616182,
+      "learning_rate": 0.003,
+      "loss": 4.0548,
+      "step": 8439
+    },
+    {
+      "epoch": 0.0844,
+      "grad_norm": 1.2351950497416953,
+      "learning_rate": 0.003,
+      "loss": 4.0698,
+      "step": 8440
+    },
+    {
+      "epoch": 0.08441,
+      "grad_norm": 0.9160963850716966,
+      "learning_rate": 0.003,
+      "loss": 4.0355,
+      "step": 8441
+    },
+    {
+      "epoch": 0.08442,
+      "grad_norm": 1.0403778744976664,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 8442
+    },
+    {
+      "epoch": 0.08443,
+      "grad_norm": 0.9690769171898936,
+      "learning_rate": 0.003,
+      "loss": 4.0387,
+      "step": 8443
+    },
+    {
+      "epoch": 0.08444,
+      "grad_norm": 1.0046811419165038,
+      "learning_rate": 0.003,
+      "loss": 4.0723,
+      "step": 8444
+    },
+    {
+      "epoch": 0.08445,
+      "grad_norm": 1.1457379760825268,
+      "learning_rate": 0.003,
+      "loss": 4.0258,
+      "step": 8445
+    },
+    {
+      "epoch": 0.08446,
+      "grad_norm": 1.0784653343143604,
+      "learning_rate": 0.003,
+      "loss": 4.0465,
+      "step": 8446
+    },
+    {
+      "epoch": 0.08447,
+      "grad_norm": 0.9955575399367267,
+      "learning_rate": 0.003,
+      "loss": 4.053,
+      "step": 8447
+    },
+    {
+      "epoch": 0.08448,
+      "grad_norm": 1.0185612550385477,
+      "learning_rate": 0.003,
+      "loss": 4.0522,
+      "step": 8448
+    },
+    {
+      "epoch": 0.08449,
+      "grad_norm": 1.1385992623128935,
+      "learning_rate": 0.003,
+      "loss": 4.0647,
+      "step": 8449
+    },
+    {
+      "epoch": 0.0845,
+      "grad_norm": 1.1658036316438427,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 8450
+    },
+    {
+      "epoch": 0.08451,
+      "grad_norm": 1.0612990389711034,
+      "learning_rate": 0.003,
+      "loss": 4.0593,
+      "step": 8451
+    },
+    {
+      "epoch": 0.08452,
+      "grad_norm": 1.1305785397594885,
+      "learning_rate": 0.003,
+      "loss": 4.0395,
+      "step": 8452
+    },
+    {
+      "epoch": 0.08453,
+      "grad_norm": 0.8831441944452132,
+      "learning_rate": 0.003,
+      "loss": 4.0527,
+      "step": 8453
+    },
+    {
+      "epoch": 0.08454,
+      "grad_norm": 1.1184930943973512,
+      "learning_rate": 0.003,
+      "loss": 4.0535,
+      "step": 8454
+    },
+    {
+      "epoch": 0.08455,
+      "grad_norm": 1.3097730422669984,
+      "learning_rate": 0.003,
+      "loss": 4.0342,
+      "step": 8455
+    },
+    {
+      "epoch": 0.08456,
+      "grad_norm": 0.9486933571257935,
+      "learning_rate": 0.003,
+      "loss": 4.0362,
+      "step": 8456
+    },
+    {
+      "epoch": 0.08457,
+      "grad_norm": 1.0027555477605425,
+      "learning_rate": 0.003,
+      "loss": 4.0742,
+      "step": 8457
+    },
+    {
+      "epoch": 0.08458,
+      "grad_norm": 1.1962971696576739,
+      "learning_rate": 0.003,
+      "loss": 4.0499,
+      "step": 8458
+    },
+    {
+      "epoch": 0.08459,
+      "grad_norm": 0.9387597135483401,
+      "learning_rate": 0.003,
+      "loss": 4.0238,
+      "step": 8459
+    },
+    {
+      "epoch": 0.0846,
+      "grad_norm": 1.0383172475711102,
+      "learning_rate": 0.003,
+      "loss": 4.0545,
+      "step": 8460
+    },
+    {
+      "epoch": 0.08461,
+      "grad_norm": 1.030965491865683,
+      "learning_rate": 0.003,
+      "loss": 4.058,
+      "step": 8461
+    },
+    {
+      "epoch": 0.08462,
+      "grad_norm": 1.2171453720071779,
+      "learning_rate": 0.003,
+      "loss": 4.0645,
+      "step": 8462
+    },
+    {
+      "epoch": 0.08463,
+      "grad_norm": 1.0376733249187324,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 8463
+    },
+    {
+      "epoch": 0.08464,
+      "grad_norm": 1.0961626196613234,
+      "learning_rate": 0.003,
+      "loss": 4.0688,
+      "step": 8464
+    },
+    {
+      "epoch": 0.08465,
+      "grad_norm": 1.0350888302747112,
+      "learning_rate": 0.003,
+      "loss": 4.047,
+      "step": 8465
+    },
+    {
+      "epoch": 0.08466,
+      "grad_norm": 1.064724029669904,
+      "learning_rate": 0.003,
+      "loss": 4.0635,
+      "step": 8466
+    },
+    {
+      "epoch": 0.08467,
+      "grad_norm": 1.0710400208790887,
+      "learning_rate": 0.003,
+      "loss": 4.0647,
+      "step": 8467
+    },
+    {
+      "epoch": 0.08468,
+      "grad_norm": 1.2789305198093153,
+      "learning_rate": 0.003,
+      "loss": 4.063,
+      "step": 8468
+    },
+    {
+      "epoch": 0.08469,
+      "grad_norm": 1.1452225551848991,
+      "learning_rate": 0.003,
+      "loss": 4.0595,
+      "step": 8469
+    },
+    {
+      "epoch": 0.0847,
+      "grad_norm": 1.13409301786806,
+      "learning_rate": 0.003,
+      "loss": 4.0414,
+      "step": 8470
+    },
+    {
+      "epoch": 0.08471,
+      "grad_norm": 1.1566161039870437,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 8471
+    },
+    {
+      "epoch": 0.08472,
+      "grad_norm": 1.089186577688795,
+      "learning_rate": 0.003,
+      "loss": 4.0343,
+      "step": 8472
+    },
+    {
+      "epoch": 0.08473,
+      "grad_norm": 0.9900808592861112,
+      "learning_rate": 0.003,
+      "loss": 4.0275,
+      "step": 8473
+    },
+    {
+      "epoch": 0.08474,
+      "grad_norm": 1.2998498233820543,
+      "learning_rate": 0.003,
+      "loss": 4.0207,
+      "step": 8474
+    },
+    {
+      "epoch": 0.08475,
+      "grad_norm": 0.9225329005828342,
+      "learning_rate": 0.003,
+      "loss": 4.0405,
+      "step": 8475
+    },
+    {
+      "epoch": 0.08476,
+      "grad_norm": 1.0564013920428952,
+      "learning_rate": 0.003,
+      "loss": 4.0603,
+      "step": 8476
+    },
+    {
+      "epoch": 0.08477,
+      "grad_norm": 0.939864643930408,
+      "learning_rate": 0.003,
+      "loss": 4.0409,
+      "step": 8477
+    },
+    {
+      "epoch": 0.08478,
+      "grad_norm": 0.9883124870718386,
+      "learning_rate": 0.003,
+      "loss": 4.0353,
+      "step": 8478
+    },
+    {
+      "epoch": 0.08479,
+      "grad_norm": 0.9957851651749337,
+      "learning_rate": 0.003,
+      "loss": 4.0542,
+      "step": 8479
+    },
+    {
+      "epoch": 0.0848,
+      "grad_norm": 1.178478915544431,
+      "learning_rate": 0.003,
+      "loss": 4.0577,
+      "step": 8480
+    },
+    {
+      "epoch": 0.08481,
+      "grad_norm": 1.0741787634257696,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 8481
+    },
+    {
+      "epoch": 0.08482,
+      "grad_norm": 1.1194627479559824,
+      "learning_rate": 0.003,
+      "loss": 4.0486,
+      "step": 8482
+    },
+    {
+      "epoch": 0.08483,
+      "grad_norm": 0.9960554568951283,
+      "learning_rate": 0.003,
+      "loss": 4.0406,
+      "step": 8483
+    },
+    {
+      "epoch": 0.08484,
+      "grad_norm": 1.361511264699175,
+      "learning_rate": 0.003,
+      "loss": 4.0261,
+      "step": 8484
+    },
+    {
+      "epoch": 0.08485,
+      "grad_norm": 1.1007501689045969,
+      "learning_rate": 0.003,
+      "loss": 4.0499,
+      "step": 8485
+    },
+    {
+      "epoch": 0.08486,
+      "grad_norm": 1.174240597139128,
+      "learning_rate": 0.003,
+      "loss": 4.0711,
+      "step": 8486
+    },
+    {
+      "epoch": 0.08487,
+      "grad_norm": 0.9639151938457295,
+      "learning_rate": 0.003,
+      "loss": 4.0659,
+      "step": 8487
+    },
+    {
+      "epoch": 0.08488,
+      "grad_norm": 1.0352335647337247,
+      "learning_rate": 0.003,
+      "loss": 4.0093,
+      "step": 8488
+    },
+    {
+      "epoch": 0.08489,
+      "grad_norm": 1.0493143503227644,
+      "learning_rate": 0.003,
+      "loss": 4.0534,
+      "step": 8489
+    },
+    {
+      "epoch": 0.0849,
+      "grad_norm": 1.1208147184681883,
+      "learning_rate": 0.003,
+      "loss": 4.0605,
+      "step": 8490
+    },
+    {
+      "epoch": 0.08491,
+      "grad_norm": 0.9315118881737822,
+      "learning_rate": 0.003,
+      "loss": 4.0531,
+      "step": 8491
+    },
+    {
+      "epoch": 0.08492,
+      "grad_norm": 1.1354408904086009,
+      "learning_rate": 0.003,
+      "loss": 4.0431,
+      "step": 8492
+    },
+    {
+      "epoch": 0.08493,
+      "grad_norm": 0.9756810751721894,
+      "learning_rate": 0.003,
+      "loss": 4.0495,
+      "step": 8493
+    },
+    {
+      "epoch": 0.08494,
+      "grad_norm": 0.943874329273859,
+      "learning_rate": 0.003,
+      "loss": 4.0327,
+      "step": 8494
+    },
+    {
+      "epoch": 0.08495,
+      "grad_norm": 0.9530790823279345,
+      "learning_rate": 0.003,
+      "loss": 4.0425,
+      "step": 8495
+    },
+    {
+      "epoch": 0.08496,
+      "grad_norm": 1.0476242283034254,
+      "learning_rate": 0.003,
+      "loss": 4.0191,
+      "step": 8496
+    },
+    {
+      "epoch": 0.08497,
+      "grad_norm": 1.2230946630382007,
+      "learning_rate": 0.003,
+      "loss": 4.0398,
+      "step": 8497
+    },
+    {
+      "epoch": 0.08498,
+      "grad_norm": 1.1262934455755145,
+      "learning_rate": 0.003,
+      "loss": 4.0511,
+      "step": 8498
+    },
+    {
+      "epoch": 0.08499,
+      "grad_norm": 1.1704572369620323,
+      "learning_rate": 0.003,
+      "loss": 4.0722,
+      "step": 8499
+    },
+    {
+      "epoch": 0.085,
+      "grad_norm": 1.0437922946142013,
+      "learning_rate": 0.003,
+      "loss": 4.0079,
+      "step": 8500
+    },
+    {
+      "epoch": 0.08501,
+      "grad_norm": 1.0686795257894022,
+      "learning_rate": 0.003,
+      "loss": 4.0283,
+      "step": 8501
+    },
+    {
+      "epoch": 0.08502,
+      "grad_norm": 1.1527737710293182,
+      "learning_rate": 0.003,
+      "loss": 4.0449,
+      "step": 8502
+    },
+    {
+      "epoch": 0.08503,
+      "grad_norm": 1.0146162316100584,
+      "learning_rate": 0.003,
+      "loss": 4.0408,
+      "step": 8503
+    },
+    {
+      "epoch": 0.08504,
+      "grad_norm": 1.117321943006214,
+      "learning_rate": 0.003,
+      "loss": 4.0721,
+      "step": 8504
+    },
+    {
+      "epoch": 0.08505,
+      "grad_norm": 1.1211708270006748,
+      "learning_rate": 0.003,
+      "loss": 4.0351,
+      "step": 8505
+    },
+    {
+      "epoch": 0.08506,
+      "grad_norm": 1.0564903712086136,
+      "learning_rate": 0.003,
+      "loss": 4.0494,
+      "step": 8506
+    },
+    {
+      "epoch": 0.08507,
+      "grad_norm": 1.0674811926990067,
+      "learning_rate": 0.003,
+      "loss": 4.0194,
+      "step": 8507
+    },
+    {
+      "epoch": 0.08508,
+      "grad_norm": 1.361628142349855,
+      "learning_rate": 0.003,
+      "loss": 4.0363,
+      "step": 8508
+    },
+    {
+      "epoch": 0.08509,
+      "grad_norm": 0.9588268292198073,
+      "learning_rate": 0.003,
+      "loss": 4.0693,
+      "step": 8509
+    },
+    {
+      "epoch": 0.0851,
+      "grad_norm": 1.4027990342622492,
+      "learning_rate": 0.003,
+      "loss": 4.0501,
+      "step": 8510
+    },
+    {
+      "epoch": 0.08511,
+      "grad_norm": 1.0313983203700878,
+      "learning_rate": 0.003,
+      "loss": 4.0517,
+      "step": 8511
+    },
+    {
+      "epoch": 0.08512,
+      "grad_norm": 1.014747429488357,
+      "learning_rate": 0.003,
+      "loss": 4.0756,
+      "step": 8512
+    },
+    {
+      "epoch": 0.08513,
+      "grad_norm": 1.1188271554621882,
+      "learning_rate": 0.003,
+      "loss": 4.0415,
+      "step": 8513
+    },
+    {
+      "epoch": 0.08514,
+      "grad_norm": 0.9977123655522062,
+      "learning_rate": 0.003,
+      "loss": 4.0457,
+      "step": 8514
+    },
+    {
+      "epoch": 0.08515,
+      "grad_norm": 1.0545034904025057,
+      "learning_rate": 0.003,
+      "loss": 4.0379,
+      "step": 8515
+    },
+    {
+      "epoch": 0.08516,
+      "grad_norm": 1.2561317636760059,
+      "learning_rate": 0.003,
+      "loss": 4.0738,
+      "step": 8516
+    },
+    {
+      "epoch": 0.08517,
+      "grad_norm": 1.015842099395916,
+      "learning_rate": 0.003,
+      "loss": 4.038,
+      "step": 8517
+    },
+    {
+      "epoch": 0.08518,
+      "grad_norm": 1.1154744018152205,
+      "learning_rate": 0.003,
+      "loss": 4.032,
+      "step": 8518
+    },
+    {
+      "epoch": 0.08519,
+      "grad_norm": 0.9308156900461738,
+      "learning_rate": 0.003,
+      "loss": 4.0309,
+      "step": 8519
+    },
+    {
+      "epoch": 0.0852,
+      "grad_norm": 1.0123290179587556,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 8520
+    },
+    {
+      "epoch": 0.08521,
+      "grad_norm": 0.9696253911148752,
+      "learning_rate": 0.003,
+      "loss": 4.0518,
+      "step": 8521
+    },
+    {
+      "epoch": 0.08522,
+      "grad_norm": 1.0651444858320227,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 8522
+    },
+    {
+      "epoch": 0.08523,
+      "grad_norm": 1.221416691858745,
+      "learning_rate": 0.003,
+      "loss": 4.0306,
+      "step": 8523
+    },
+    {
+      "epoch": 0.08524,
+      "grad_norm": 1.0694054720010744,
+      "learning_rate": 0.003,
+      "loss": 4.0352,
+      "step": 8524
+    },
+    {
+      "epoch": 0.08525,
+      "grad_norm": 1.206105613858707,
+      "learning_rate": 0.003,
+      "loss": 4.0362,
+      "step": 8525
+    },
+    {
+      "epoch": 0.08526,
+      "grad_norm": 1.125746365698348,
+      "learning_rate": 0.003,
+      "loss": 4.0376,
+      "step": 8526
+    },
+    {
+      "epoch": 0.08527,
+      "grad_norm": 1.021213535127077,
+      "learning_rate": 0.003,
+      "loss": 4.0356,
+      "step": 8527
+    },
+    {
+      "epoch": 0.08528,
+      "grad_norm": 1.0514486808333348,
+      "learning_rate": 0.003,
+      "loss": 4.02,
+      "step": 8528
+    },
+    {
+      "epoch": 0.08529,
+      "grad_norm": 0.8891881765729944,
+      "learning_rate": 0.003,
+      "loss": 4.0406,
+      "step": 8529
+    },
+    {
+      "epoch": 0.0853,
+      "grad_norm": 1.0090942600896682,
+      "learning_rate": 0.003,
+      "loss": 4.0401,
+      "step": 8530
+    },
+    {
+      "epoch": 0.08531,
+      "grad_norm": 1.077165689433922,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 8531
+    },
+    {
+      "epoch": 0.08532,
+      "grad_norm": 0.8886817388250743,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 8532
+    },
+    {
+      "epoch": 0.08533,
+      "grad_norm": 0.9499157917354463,
+      "learning_rate": 0.003,
+      "loss": 4.0532,
+      "step": 8533
+    },
+    {
+      "epoch": 0.08534,
+      "grad_norm": 1.0462406691660049,
+      "learning_rate": 0.003,
+      "loss": 4.0496,
+      "step": 8534
+    },
+    {
+      "epoch": 0.08535,
+      "grad_norm": 1.0427155178314294,
+      "learning_rate": 0.003,
+      "loss": 4.0338,
+      "step": 8535
+    },
+    {
+      "epoch": 0.08536,
+      "grad_norm": 1.3345887932955967,
+      "learning_rate": 0.003,
+      "loss": 4.0603,
+      "step": 8536
+    },
+    {
+      "epoch": 0.08537,
+      "grad_norm": 1.1424324189819963,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 8537
+    },
+    {
+      "epoch": 0.08538,
+      "grad_norm": 1.1087130620488592,
+      "learning_rate": 0.003,
+      "loss": 4.0242,
+      "step": 8538
+    },
+    {
+      "epoch": 0.08539,
+      "grad_norm": 0.9423257796614134,
+      "learning_rate": 0.003,
+      "loss": 4.06,
+      "step": 8539
+    },
+    {
+      "epoch": 0.0854,
+      "grad_norm": 0.9989943262329959,
+      "learning_rate": 0.003,
+      "loss": 4.0082,
+      "step": 8540
+    },
+    {
+      "epoch": 0.08541,
+      "grad_norm": 1.1159602342043562,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 8541
+    },
+    {
+      "epoch": 0.08542,
+      "grad_norm": 1.1750101404966282,
+      "learning_rate": 0.003,
+      "loss": 4.0604,
+      "step": 8542
+    },
+    {
+      "epoch": 0.08543,
+      "grad_norm": 1.1097646268558357,
+      "learning_rate": 0.003,
+      "loss": 4.0458,
+      "step": 8543
+    },
+    {
+      "epoch": 0.08544,
+      "grad_norm": 1.2246197163952683,
+      "learning_rate": 0.003,
+      "loss": 4.03,
+      "step": 8544
+    },
+    {
+      "epoch": 0.08545,
+      "grad_norm": 0.9006431906440999,
+      "learning_rate": 0.003,
+      "loss": 4.0329,
+      "step": 8545
+    },
+    {
+      "epoch": 0.08546,
+      "grad_norm": 0.8535243027708214,
+      "learning_rate": 0.003,
+      "loss": 4.0364,
+      "step": 8546
+    },
+    {
+      "epoch": 0.08547,
+      "grad_norm": 0.9091940967690285,
+      "learning_rate": 0.003,
+      "loss": 4.0509,
+      "step": 8547
+    },
+    {
+      "epoch": 0.08548,
+      "grad_norm": 1.1204422349465504,
+      "learning_rate": 0.003,
+      "loss": 4.0582,
+      "step": 8548
+    },
+    {
+      "epoch": 0.08549,
+      "grad_norm": 1.1376178894662337,
+      "learning_rate": 0.003,
+      "loss": 4.0538,
+      "step": 8549
+    },
+    {
+      "epoch": 0.0855,
+      "grad_norm": 0.9004834981881831,
+      "learning_rate": 0.003,
+      "loss": 4.0591,
+      "step": 8550
+    },
+    {
+      "epoch": 0.08551,
+      "grad_norm": 1.0908856802279696,
+      "learning_rate": 0.003,
+      "loss": 4.0615,
+      "step": 8551
+    },
+    {
+      "epoch": 0.08552,
+      "grad_norm": 1.0747606537103462,
+      "learning_rate": 0.003,
+      "loss": 4.0433,
+      "step": 8552
+    },
+    {
+      "epoch": 0.08553,
+      "grad_norm": 1.1824536376723846,
+      "learning_rate": 0.003,
+      "loss": 4.0545,
+      "step": 8553
+    },
+    {
+      "epoch": 0.08554,
+      "grad_norm": 0.9327035296984333,
+      "learning_rate": 0.003,
+      "loss": 4.0105,
+      "step": 8554
+    },
+    {
+      "epoch": 0.08555,
+      "grad_norm": 1.0845301764121567,
+      "learning_rate": 0.003,
+      "loss": 4.0657,
+      "step": 8555
+    },
+    {
+      "epoch": 0.08556,
+      "grad_norm": 1.0781500615217907,
+      "learning_rate": 0.003,
+      "loss": 4.0524,
+      "step": 8556
+    },
+    {
+      "epoch": 0.08557,
+      "grad_norm": 1.1114030326740636,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 8557
+    },
+    {
+      "epoch": 0.08558,
+      "grad_norm": 1.1387705031741684,
+      "learning_rate": 0.003,
+      "loss": 4.0458,
+      "step": 8558
+    },
+    {
+      "epoch": 0.08559,
+      "grad_norm": 1.0373792894377512,
+      "learning_rate": 0.003,
+      "loss": 4.048,
+      "step": 8559
+    },
+    {
+      "epoch": 0.0856,
+      "grad_norm": 1.205780356820795,
+      "learning_rate": 0.003,
+      "loss": 4.0397,
+      "step": 8560
+    },
+    {
+      "epoch": 0.08561,
+      "grad_norm": 0.8516562843948833,
+      "learning_rate": 0.003,
+      "loss": 4.032,
+      "step": 8561
+    },
+    {
+      "epoch": 0.08562,
+      "grad_norm": 0.933449495259518,
+      "learning_rate": 0.003,
+      "loss": 4.007,
+      "step": 8562
+    },
+    {
+      "epoch": 0.08563,
+      "grad_norm": 1.0726820040186111,
+      "learning_rate": 0.003,
+      "loss": 4.038,
+      "step": 8563
+    },
+    {
+      "epoch": 0.08564,
+      "grad_norm": 1.0347716349195137,
+      "learning_rate": 0.003,
+      "loss": 4.034,
+      "step": 8564
+    },
+    {
+      "epoch": 0.08565,
+      "grad_norm": 1.0186874520180855,
+      "learning_rate": 0.003,
+      "loss": 4.0232,
+      "step": 8565
+    },
+    {
+      "epoch": 0.08566,
+      "grad_norm": 1.1233566323209978,
+      "learning_rate": 0.003,
+      "loss": 4.0445,
+      "step": 8566
+    },
+    {
+      "epoch": 0.08567,
+      "grad_norm": 1.1961057328383637,
+      "learning_rate": 0.003,
+      "loss": 4.0528,
+      "step": 8567
+    },
+    {
+      "epoch": 0.08568,
+      "grad_norm": 1.0511200423129479,
+      "learning_rate": 0.003,
+      "loss": 4.0645,
+      "step": 8568
+    },
+    {
+      "epoch": 0.08569,
+      "grad_norm": 1.1119472613821024,
+      "learning_rate": 0.003,
+      "loss": 4.0713,
+      "step": 8569
+    },
+    {
+      "epoch": 0.0857,
+      "grad_norm": 1.1318905094833156,
+      "learning_rate": 0.003,
+      "loss": 4.0651,
+      "step": 8570
+    },
+    {
+      "epoch": 0.08571,
+      "grad_norm": 1.3705938071159904,
+      "learning_rate": 0.003,
+      "loss": 4.0578,
+      "step": 8571
+    },
+    {
+      "epoch": 0.08572,
+      "grad_norm": 0.8350836435173717,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 8572
+    },
+    {
+      "epoch": 0.08573,
+      "grad_norm": 0.9539575731341017,
+      "learning_rate": 0.003,
+      "loss": 4.0353,
+      "step": 8573
+    },
+    {
+      "epoch": 0.08574,
+      "grad_norm": 1.2227418528249123,
+      "learning_rate": 0.003,
+      "loss": 4.0332,
+      "step": 8574
+    },
+    {
+      "epoch": 0.08575,
+      "grad_norm": 1.0933729376633516,
+      "learning_rate": 0.003,
+      "loss": 4.053,
+      "step": 8575
+    },
+    {
+      "epoch": 0.08576,
+      "grad_norm": 1.011476248931269,
+      "learning_rate": 0.003,
+      "loss": 4.0444,
+      "step": 8576
+    },
+    {
+      "epoch": 0.08577,
+      "grad_norm": 0.9840937697692118,
+      "learning_rate": 0.003,
+      "loss": 4.0424,
+      "step": 8577
+    },
+    {
+      "epoch": 0.08578,
+      "grad_norm": 1.202980530727398,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 8578
+    },
+    {
+      "epoch": 0.08579,
+      "grad_norm": 0.9372317642711228,
+      "learning_rate": 0.003,
+      "loss": 4.0418,
+      "step": 8579
+    },
+    {
+      "epoch": 0.0858,
+      "grad_norm": 1.1233693209921423,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 8580
+    },
+    {
+      "epoch": 0.08581,
+      "grad_norm": 1.2444195714201942,
+      "learning_rate": 0.003,
+      "loss": 4.0317,
+      "step": 8581
+    },
+    {
+      "epoch": 0.08582,
+      "grad_norm": 1.1497113076403636,
+      "learning_rate": 0.003,
+      "loss": 4.0542,
+      "step": 8582
+    },
+    {
+      "epoch": 0.08583,
+      "grad_norm": 0.8869126729713231,
+      "learning_rate": 0.003,
+      "loss": 4.0611,
+      "step": 8583
+    },
+    {
+      "epoch": 0.08584,
+      "grad_norm": 0.9033828604652565,
+      "learning_rate": 0.003,
+      "loss": 4.0503,
+      "step": 8584
+    },
+    {
+      "epoch": 0.08585,
+      "grad_norm": 1.2413242779151106,
+      "learning_rate": 0.003,
+      "loss": 4.0606,
+      "step": 8585
+    },
+    {
+      "epoch": 0.08586,
+      "grad_norm": 0.8785419954113871,
+      "learning_rate": 0.003,
+      "loss": 4.0722,
+      "step": 8586
+    },
+    {
+      "epoch": 0.08587,
+      "grad_norm": 1.0721276325176368,
+      "learning_rate": 0.003,
+      "loss": 4.0467,
+      "step": 8587
+    },
+    {
+      "epoch": 0.08588,
+      "grad_norm": 1.1679761583906054,
+      "learning_rate": 0.003,
+      "loss": 4.0349,
+      "step": 8588
+    },
+    {
+      "epoch": 0.08589,
+      "grad_norm": 1.080344129233115,
+      "learning_rate": 0.003,
+      "loss": 4.0449,
+      "step": 8589
+    },
+    {
+      "epoch": 0.0859,
+      "grad_norm": 0.9870833943241247,
+      "learning_rate": 0.003,
+      "loss": 4.0362,
+      "step": 8590
+    },
+    {
+      "epoch": 0.08591,
+      "grad_norm": 1.034639577282747,
+      "learning_rate": 0.003,
+      "loss": 4.0394,
+      "step": 8591
+    },
+    {
+      "epoch": 0.08592,
+      "grad_norm": 1.0710719468181558,
+      "learning_rate": 0.003,
+      "loss": 4.0669,
+      "step": 8592
+    },
+    {
+      "epoch": 0.08593,
+      "grad_norm": 0.9888408563640142,
+      "learning_rate": 0.003,
+      "loss": 4.0803,
+      "step": 8593
+    },
+    {
+      "epoch": 0.08594,
+      "grad_norm": 1.2453584999330138,
+      "learning_rate": 0.003,
+      "loss": 4.0433,
+      "step": 8594
+    },
+    {
+      "epoch": 0.08595,
+      "grad_norm": 1.0196678830693755,
+      "learning_rate": 0.003,
+      "loss": 4.0437,
+      "step": 8595
+    },
+    {
+      "epoch": 0.08596,
+      "grad_norm": 1.1945792959997095,
+      "learning_rate": 0.003,
+      "loss": 4.0659,
+      "step": 8596
+    },
+    {
+      "epoch": 0.08597,
+      "grad_norm": 1.1530581283872166,
+      "learning_rate": 0.003,
+      "loss": 4.0589,
+      "step": 8597
+    },
+    {
+      "epoch": 0.08598,
+      "grad_norm": 1.3926960034791933,
+      "learning_rate": 0.003,
+      "loss": 4.0379,
+      "step": 8598
+    },
+    {
+      "epoch": 0.08599,
+      "grad_norm": 0.826192764112842,
+      "learning_rate": 0.003,
+      "loss": 4.0475,
+      "step": 8599
+    },
+    {
+      "epoch": 0.086,
+      "grad_norm": 1.06077334266566,
+      "learning_rate": 0.003,
+      "loss": 4.0737,
+      "step": 8600
+    },
+    {
+      "epoch": 0.08601,
+      "grad_norm": 1.1871115259087262,
+      "learning_rate": 0.003,
+      "loss": 4.0605,
+      "step": 8601
+    },
+    {
+      "epoch": 0.08602,
+      "grad_norm": 1.2181866405394508,
+      "learning_rate": 0.003,
+      "loss": 4.0713,
+      "step": 8602
+    },
+    {
+      "epoch": 0.08603,
+      "grad_norm": 1.0883340096060563,
+      "learning_rate": 0.003,
+      "loss": 4.0617,
+      "step": 8603
+    },
+    {
+      "epoch": 0.08604,
+      "grad_norm": 0.9503342506965254,
+      "learning_rate": 0.003,
+      "loss": 4.0386,
+      "step": 8604
+    },
+    {
+      "epoch": 0.08605,
+      "grad_norm": 1.2172592642031064,
+      "learning_rate": 0.003,
+      "loss": 4.0575,
+      "step": 8605
+    },
+    {
+      "epoch": 0.08606,
+      "grad_norm": 1.136390303673651,
+      "learning_rate": 0.003,
+      "loss": 4.0251,
+      "step": 8606
+    },
+    {
+      "epoch": 0.08607,
+      "grad_norm": 1.1355782479254777,
+      "learning_rate": 0.003,
+      "loss": 4.0685,
+      "step": 8607
+    },
+    {
+      "epoch": 0.08608,
+      "grad_norm": 1.0517606318333685,
+      "learning_rate": 0.003,
+      "loss": 4.0368,
+      "step": 8608
+    },
+    {
+      "epoch": 0.08609,
+      "grad_norm": 0.9420025556721784,
+      "learning_rate": 0.003,
+      "loss": 4.0599,
+      "step": 8609
+    },
+    {
+      "epoch": 0.0861,
+      "grad_norm": 0.9141311653122305,
+      "learning_rate": 0.003,
+      "loss": 4.0578,
+      "step": 8610
+    },
+    {
+      "epoch": 0.08611,
+      "grad_norm": 1.0548509864539932,
+      "learning_rate": 0.003,
+      "loss": 4.0448,
+      "step": 8611
+    },
+    {
+      "epoch": 0.08612,
+      "grad_norm": 1.001800534694237,
+      "learning_rate": 0.003,
+      "loss": 4.0761,
+      "step": 8612
+    },
+    {
+      "epoch": 0.08613,
+      "grad_norm": 1.0972442988257152,
+      "learning_rate": 0.003,
+      "loss": 4.0561,
+      "step": 8613
+    },
+    {
+      "epoch": 0.08614,
+      "grad_norm": 0.9428815080476953,
+      "learning_rate": 0.003,
+      "loss": 4.0358,
+      "step": 8614
+    },
+    {
+      "epoch": 0.08615,
+      "grad_norm": 1.0484637576712974,
+      "learning_rate": 0.003,
+      "loss": 4.0385,
+      "step": 8615
+    },
+    {
+      "epoch": 0.08616,
+      "grad_norm": 1.0917980757047034,
+      "learning_rate": 0.003,
+      "loss": 4.0198,
+      "step": 8616
+    },
+    {
+      "epoch": 0.08617,
+      "grad_norm": 1.2097536460049183,
+      "learning_rate": 0.003,
+      "loss": 4.0324,
+      "step": 8617
+    },
+    {
+      "epoch": 0.08618,
+      "grad_norm": 1.3562182619090937,
+      "learning_rate": 0.003,
+      "loss": 4.051,
+      "step": 8618
+    },
+    {
+      "epoch": 0.08619,
+      "grad_norm": 0.9246195111624731,
+      "learning_rate": 0.003,
+      "loss": 4.0639,
+      "step": 8619
+    },
+    {
+      "epoch": 0.0862,
+      "grad_norm": 0.9824968489316007,
+      "learning_rate": 0.003,
+      "loss": 4.046,
+      "step": 8620
+    },
+    {
+      "epoch": 0.08621,
+      "grad_norm": 1.0258471276787318,
+      "learning_rate": 0.003,
+      "loss": 4.035,
+      "step": 8621
+    },
+    {
+      "epoch": 0.08622,
+      "grad_norm": 1.183578647619695,
+      "learning_rate": 0.003,
+      "loss": 4.0648,
+      "step": 8622
+    },
+    {
+      "epoch": 0.08623,
+      "grad_norm": 1.1381975301836866,
+      "learning_rate": 0.003,
+      "loss": 4.0536,
+      "step": 8623
+    },
+    {
+      "epoch": 0.08624,
+      "grad_norm": 1.1630194311964601,
+      "learning_rate": 0.003,
+      "loss": 4.0391,
+      "step": 8624
+    },
+    {
+      "epoch": 0.08625,
+      "grad_norm": 0.9306587451164217,
+      "learning_rate": 0.003,
+      "loss": 4.0247,
+      "step": 8625
+    },
+    {
+      "epoch": 0.08626,
+      "grad_norm": 1.0498514896735527,
+      "learning_rate": 0.003,
+      "loss": 4.0652,
+      "step": 8626
+    },
+    {
+      "epoch": 0.08627,
+      "grad_norm": 1.1755016236915463,
+      "learning_rate": 0.003,
+      "loss": 4.0468,
+      "step": 8627
+    },
+    {
+      "epoch": 0.08628,
+      "grad_norm": 0.938529239831759,
+      "learning_rate": 0.003,
+      "loss": 4.0452,
+      "step": 8628
+    },
+    {
+      "epoch": 0.08629,
+      "grad_norm": 1.0144244468682728,
+      "learning_rate": 0.003,
+      "loss": 4.043,
+      "step": 8629
+    },
+    {
+      "epoch": 0.0863,
+      "grad_norm": 1.113738693838708,
+      "learning_rate": 0.003,
+      "loss": 4.0354,
+      "step": 8630
+    },
+    {
+      "epoch": 0.08631,
+      "grad_norm": 1.1256675059051566,
+      "learning_rate": 0.003,
+      "loss": 4.0483,
+      "step": 8631
+    },
+    {
+      "epoch": 0.08632,
+      "grad_norm": 0.8974862907760961,
+      "learning_rate": 0.003,
+      "loss": 4.0307,
+      "step": 8632
+    },
+    {
+      "epoch": 0.08633,
+      "grad_norm": 1.0495156855470718,
+      "learning_rate": 0.003,
+      "loss": 4.0698,
+      "step": 8633
+    },
+    {
+      "epoch": 0.08634,
+      "grad_norm": 1.2948675647945798,
+      "learning_rate": 0.003,
+      "loss": 4.0622,
+      "step": 8634
+    },
+    {
+      "epoch": 0.08635,
+      "grad_norm": 1.0184506641521571,
+      "learning_rate": 0.003,
+      "loss": 4.0482,
+      "step": 8635
+    },
+    {
+      "epoch": 0.08636,
+      "grad_norm": 1.368584927067475,
+      "learning_rate": 0.003,
+      "loss": 4.0598,
+      "step": 8636
+    },
+    {
+      "epoch": 0.08637,
+      "grad_norm": 0.9162061134202947,
+      "learning_rate": 0.003,
+      "loss": 4.0686,
+      "step": 8637
+    },
+    {
+      "epoch": 0.08638,
+      "grad_norm": 1.0811639530557986,
+      "learning_rate": 0.003,
+      "loss": 4.0626,
+      "step": 8638
+    },
+    {
+      "epoch": 0.08639,
+      "grad_norm": 0.9376836857950404,
+      "learning_rate": 0.003,
+      "loss": 4.0369,
+      "step": 8639
+    },
+    {
+      "epoch": 0.0864,
+      "grad_norm": 1.0873412665068496,
+      "learning_rate": 0.003,
+      "loss": 4.0443,
+      "step": 8640
+    },
+    {
+      "epoch": 0.08641,
+      "grad_norm": 1.1255192902825057,
+      "learning_rate": 0.003,
+      "loss": 4.049,
+      "step": 8641
+    },
+    {
+      "epoch": 0.08642,
+      "grad_norm": 1.1145569844437186,
+      "learning_rate": 0.003,
+      "loss": 4.0516,
+      "step": 8642
+    },
+    {
+      "epoch": 0.08643,
+      "grad_norm": 1.4460777384526706,
+      "learning_rate": 0.003,
+      "loss": 4.0476,
+      "step": 8643
+    },
+    {
+      "epoch": 0.08644,
+      "grad_norm": 0.9245977533541625,
+      "learning_rate": 0.003,
+      "loss": 4.022,
+      "step": 8644
+    },
+    {
+      "epoch": 0.08645,
+      "grad_norm": 1.0181172313173914,
+      "learning_rate": 0.003,
+      "loss": 4.0351,
+      "step": 8645
+    },
+    {
+      "epoch": 0.08646,
+      "grad_norm": 1.1454056707809717,
+      "learning_rate": 0.003,
+      "loss": 4.0247,
+      "step": 8646
+    },
+    {
+      "epoch": 0.08647,
+      "grad_norm": 1.0065775077778298,
+      "learning_rate": 0.003,
+      "loss": 4.007,
+      "step": 8647
+    },
+    {
+      "epoch": 0.08648,
+      "grad_norm": 1.2202424385911437,
+      "learning_rate": 0.003,
+      "loss": 4.0505,
+      "step": 8648
+    },
+    {
+      "epoch": 0.08649,
+      "grad_norm": 0.9257171603914495,
+      "learning_rate": 0.003,
+      "loss": 4.0684,
+      "step": 8649
+    },
+    {
+      "epoch": 0.0865,
+      "grad_norm": 0.9390491681338301,
+      "learning_rate": 0.003,
+      "loss": 4.0237,
+      "step": 8650
+    },
+    {
+      "epoch": 0.08651,
+      "grad_norm": 1.1931127237305668,
+      "learning_rate": 0.003,
+      "loss": 4.0558,
+      "step": 8651
+    },
+    {
+      "epoch": 0.08652,
+      "grad_norm": 0.9274015309476554,
+      "learning_rate": 0.003,
+      "loss": 4.0153,
+      "step": 8652
+    },
+    {
+      "epoch": 0.08653,
+      "grad_norm": 1.02289631221439,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 8653
+    },
+    {
+      "epoch": 0.08654,
+      "grad_norm": 1.3262485339916,
+      "learning_rate": 0.003,
+      "loss": 4.0502,
+      "step": 8654
+    },
+    {
+      "epoch": 0.08655,
+      "grad_norm": 1.2428022427956695,
+      "learning_rate": 0.003,
+      "loss": 4.0302,
+      "step": 8655
+    },
+    {
+      "epoch": 0.08656,
+      "grad_norm": 1.031840617295688,
+      "learning_rate": 0.003,
+      "loss": 4.0575,
+      "step": 8656
+    },
+    {
+      "epoch": 0.08657,
+      "grad_norm": 1.325567296493681,
+      "learning_rate": 0.003,
+      "loss": 4.076,
+      "step": 8657
+    },
+    {
+      "epoch": 0.08658,
+      "grad_norm": 1.0189750997869491,
+      "learning_rate": 0.003,
+      "loss": 4.0313,
+      "step": 8658
+    },
+    {
+      "epoch": 0.08659,
+      "grad_norm": 1.209380922792147,
+      "learning_rate": 0.003,
+      "loss": 4.0424,
+      "step": 8659
+    },
+    {
+      "epoch": 0.0866,
+      "grad_norm": 1.0610985840216802,
+      "learning_rate": 0.003,
+      "loss": 4.0389,
+      "step": 8660
+    },
+    {
+      "epoch": 0.08661,
+      "grad_norm": 1.0561305118599713,
+      "learning_rate": 0.003,
+      "loss": 4.0393,
+      "step": 8661
+    },
+    {
+      "epoch": 0.08662,
+      "grad_norm": 1.0679146127811068,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 8662
+    },
+    {
+      "epoch": 0.08663,
+      "grad_norm": 0.9064928152737026,
+      "learning_rate": 0.003,
+      "loss": 4.0544,
+      "step": 8663
+    },
+    {
+      "epoch": 0.08664,
+      "grad_norm": 1.0923363230039982,
+      "learning_rate": 0.003,
+      "loss": 4.0441,
+      "step": 8664
+    },
+    {
+      "epoch": 0.08665,
+      "grad_norm": 1.1988464238998455,
+      "learning_rate": 0.003,
+      "loss": 4.0535,
+      "step": 8665
+    },
+    {
+      "epoch": 0.08666,
+      "grad_norm": 1.0248023151858396,
+      "learning_rate": 0.003,
+      "loss": 4.0412,
+      "step": 8666
+    },
+    {
+      "epoch": 0.08667,
+      "grad_norm": 1.1148548362397608,
+      "learning_rate": 0.003,
+      "loss": 4.0313,
+      "step": 8667
+    },
+    {
+      "epoch": 0.08668,
+      "grad_norm": 1.0412822855875483,
+      "learning_rate": 0.003,
+      "loss": 4.032,
+      "step": 8668
+    },
+    {
+      "epoch": 0.08669,
+      "grad_norm": 1.2096435963768866,
+      "learning_rate": 0.003,
+      "loss": 4.0364,
+      "step": 8669
+    },
+    {
+      "epoch": 0.0867,
+      "grad_norm": 1.123266840921136,
+      "learning_rate": 0.003,
+      "loss": 4.0371,
+      "step": 8670
+    },
+    {
+      "epoch": 0.08671,
+      "grad_norm": 1.0877613190872542,
+      "learning_rate": 0.003,
+      "loss": 4.0475,
+      "step": 8671
+    },
+    {
+      "epoch": 0.08672,
+      "grad_norm": 1.1406500907143475,
+      "learning_rate": 0.003,
+      "loss": 4.0336,
+      "step": 8672
+    },
+    {
+      "epoch": 0.08673,
+      "grad_norm": 0.9398367617096632,
+      "learning_rate": 0.003,
+      "loss": 4.0387,
+      "step": 8673
+    },
+    {
+      "epoch": 0.08674,
+      "grad_norm": 1.0253292803580725,
+      "learning_rate": 0.003,
+      "loss": 4.0496,
+      "step": 8674
+    },
+    {
+      "epoch": 0.08675,
+      "grad_norm": 1.1953506016711317,
+      "learning_rate": 0.003,
+      "loss": 4.056,
+      "step": 8675
+    },
+    {
+      "epoch": 0.08676,
+      "grad_norm": 0.9488718707211112,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 8676
+    },
+    {
+      "epoch": 0.08677,
+      "grad_norm": 1.0431682629276453,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 8677
+    },
+    {
+      "epoch": 0.08678,
+      "grad_norm": 1.193005747951154,
+      "learning_rate": 0.003,
+      "loss": 4.0377,
+      "step": 8678
+    },
+    {
+      "epoch": 0.08679,
+      "grad_norm": 1.0144417828651375,
+      "learning_rate": 0.003,
+      "loss": 4.0447,
+      "step": 8679
+    },
+    {
+      "epoch": 0.0868,
+      "grad_norm": 1.1348128884099915,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 8680
+    },
+    {
+      "epoch": 0.08681,
+      "grad_norm": 1.1097911864091226,
+      "learning_rate": 0.003,
+      "loss": 4.0295,
+      "step": 8681
+    },
+    {
+      "epoch": 0.08682,
+      "grad_norm": 1.0697846104835937,
+      "learning_rate": 0.003,
+      "loss": 4.036,
+      "step": 8682
+    },
+    {
+      "epoch": 0.08683,
+      "grad_norm": 1.0607468846889823,
+      "learning_rate": 0.003,
+      "loss": 4.0587,
+      "step": 8683
+    },
+    {
+      "epoch": 0.08684,
+      "grad_norm": 1.0862632076397618,
+      "learning_rate": 0.003,
+      "loss": 4.0513,
+      "step": 8684
+    },
+    {
+      "epoch": 0.08685,
+      "grad_norm": 1.1148108606990261,
+      "learning_rate": 0.003,
+      "loss": 4.0482,
+      "step": 8685
+    },
+    {
+      "epoch": 0.08686,
+      "grad_norm": 1.0892861530880062,
+      "learning_rate": 0.003,
+      "loss": 4.0471,
+      "step": 8686
+    },
+    {
+      "epoch": 0.08687,
+      "grad_norm": 1.2033955582498337,
+      "learning_rate": 0.003,
+      "loss": 4.0521,
+      "step": 8687
+    },
+    {
+      "epoch": 0.08688,
+      "grad_norm": 0.9860469350967052,
+      "learning_rate": 0.003,
+      "loss": 4.0353,
+      "step": 8688
+    },
+    {
+      "epoch": 0.08689,
+      "grad_norm": 0.887845078826922,
+      "learning_rate": 0.003,
+      "loss": 3.9991,
+      "step": 8689
+    },
+    {
+      "epoch": 0.0869,
+      "grad_norm": 0.981947563992108,
+      "learning_rate": 0.003,
+      "loss": 4.0438,
+      "step": 8690
+    },
+    {
+      "epoch": 0.08691,
+      "grad_norm": 1.2386584406913188,
+      "learning_rate": 0.003,
+      "loss": 4.0239,
+      "step": 8691
+    },
+    {
+      "epoch": 0.08692,
+      "grad_norm": 1.1214274326159783,
+      "learning_rate": 0.003,
+      "loss": 4.047,
+      "step": 8692
+    },
+    {
+      "epoch": 0.08693,
+      "grad_norm": 0.9759935969342685,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 8693
+    },
+    {
+      "epoch": 0.08694,
+      "grad_norm": 0.9952282248413553,
+      "learning_rate": 0.003,
+      "loss": 4.0332,
+      "step": 8694
+    },
+    {
+      "epoch": 0.08695,
+      "grad_norm": 1.2054815926609244,
+      "learning_rate": 0.003,
+      "loss": 4.0389,
+      "step": 8695
+    },
+    {
+      "epoch": 0.08696,
+      "grad_norm": 0.9636503750723039,
+      "learning_rate": 0.003,
+      "loss": 4.0591,
+      "step": 8696
+    },
+    {
+      "epoch": 0.08697,
+      "grad_norm": 1.0925520713595345,
+      "learning_rate": 0.003,
+      "loss": 4.0545,
+      "step": 8697
+    },
+    {
+      "epoch": 0.08698,
+      "grad_norm": 0.8854366350443418,
+      "learning_rate": 0.003,
+      "loss": 4.0258,
+      "step": 8698
+    },
+    {
+      "epoch": 0.08699,
+      "grad_norm": 0.9842767035973804,
+      "learning_rate": 0.003,
+      "loss": 4.0429,
+      "step": 8699
+    },
+    {
+      "epoch": 0.087,
+      "grad_norm": 1.1497030489175517,
+      "learning_rate": 0.003,
+      "loss": 4.0692,
+      "step": 8700
+    },
+    {
+      "epoch": 0.08701,
+      "grad_norm": 1.220812201035816,
+      "learning_rate": 0.003,
+      "loss": 4.0541,
+      "step": 8701
+    },
+    {
+      "epoch": 0.08702,
+      "grad_norm": 1.0247846783178878,
+      "learning_rate": 0.003,
+      "loss": 4.0399,
+      "step": 8702
+    },
+    {
+      "epoch": 0.08703,
+      "grad_norm": 1.0010386929193278,
+      "learning_rate": 0.003,
+      "loss": 4.0236,
+      "step": 8703
+    },
+    {
+      "epoch": 0.08704,
+      "grad_norm": 1.0616711530427767,
+      "learning_rate": 0.003,
+      "loss": 4.0059,
+      "step": 8704
+    },
+    {
+      "epoch": 0.08705,
+      "grad_norm": 0.9976320350325579,
+      "learning_rate": 0.003,
+      "loss": 4.0636,
+      "step": 8705
+    },
+    {
+      "epoch": 0.08706,
+      "grad_norm": 1.1942693214140914,
+      "learning_rate": 0.003,
+      "loss": 4.0338,
+      "step": 8706
+    },
+    {
+      "epoch": 0.08707,
+      "grad_norm": 1.073245473169283,
+      "learning_rate": 0.003,
+      "loss": 4.0375,
+      "step": 8707
+    },
+    {
+      "epoch": 0.08708,
+      "grad_norm": 1.1941251771612393,
+      "learning_rate": 0.003,
+      "loss": 4.0455,
+      "step": 8708
+    },
+    {
+      "epoch": 0.08709,
+      "grad_norm": 1.054313122339438,
+      "learning_rate": 0.003,
+      "loss": 4.0546,
+      "step": 8709
+    },
+    {
+      "epoch": 0.0871,
+      "grad_norm": 1.1735283159957155,
+      "learning_rate": 0.003,
+      "loss": 4.0348,
+      "step": 8710
+    },
+    {
+      "epoch": 0.08711,
+      "grad_norm": 1.224731249555308,
+      "learning_rate": 0.003,
+      "loss": 4.0498,
+      "step": 8711
+    },
+    {
+      "epoch": 0.08712,
+      "grad_norm": 1.1071733779330812,
+      "learning_rate": 0.003,
+      "loss": 4.0247,
+      "step": 8712
+    },
+    {
+      "epoch": 0.08713,
+      "grad_norm": 1.207587077778471,
+      "learning_rate": 0.003,
+      "loss": 4.0559,
+      "step": 8713
+    },
+    {
+      "epoch": 0.08714,
+      "grad_norm": 1.035590781407661,
+      "learning_rate": 0.003,
+      "loss": 4.042,
+      "step": 8714
+    },
+    {
+      "epoch": 0.08715,
+      "grad_norm": 0.9210397372280619,
+      "learning_rate": 0.003,
+      "loss": 4.0491,
+      "step": 8715
+    },
+    {
+      "epoch": 0.08716,
+      "grad_norm": 1.0736853335371308,
+      "learning_rate": 0.003,
+      "loss": 4.0514,
+      "step": 8716
+    },
+    {
+      "epoch": 0.08717,
+      "grad_norm": 1.2449278196726106,
+      "learning_rate": 0.003,
+      "loss": 4.0608,
+      "step": 8717
+    },
+    {
+      "epoch": 0.08718,
+      "grad_norm": 0.9356165132727563,
+      "learning_rate": 0.003,
+      "loss": 4.0479,
+      "step": 8718
+    },
+    {
+      "epoch": 0.08719,
+      "grad_norm": 1.1410701425015397,
+      "learning_rate": 0.003,
+      "loss": 4.041,
+      "step": 8719
+    },
+    {
+      "epoch": 0.0872,
+      "grad_norm": 1.0473904852864253,
+      "learning_rate": 0.003,
+      "loss": 4.0288,
+      "step": 8720
+    },
+    {
+      "epoch": 0.08721,
+      "grad_norm": 1.1346851924291141,
+      "learning_rate": 0.003,
+      "loss": 4.0539,
+      "step": 8721
+    },
+    {
+      "epoch": 0.08722,
+      "grad_norm": 0.9947376087216057,
+      "learning_rate": 0.003,
+      "loss": 4.0362,
+      "step": 8722
+    },
+    {
+      "epoch": 0.08723,
+      "grad_norm": 1.2565032752641185,
+      "learning_rate": 0.003,
+      "loss": 4.0374,
+      "step": 8723
+    },
+    {
+      "epoch": 0.08724,
+      "grad_norm": 1.013947154344129,
+      "learning_rate": 0.003,
+      "loss": 4.0362,
+      "step": 8724
+    },
+    {
+      "epoch": 0.08725,
+      "grad_norm": 1.2550507340568873,
+      "learning_rate": 0.003,
+      "loss": 4.0552,
+      "step": 8725
+    },
+    {
+      "epoch": 0.08726,
+      "grad_norm": 1.1552353519239422,
+      "learning_rate": 0.003,
+      "loss": 4.0377,
+      "step": 8726
+    },
+    {
+      "epoch": 0.08727,
+      "grad_norm": 1.0836695734061212,
+      "learning_rate": 0.003,
+      "loss": 4.0302,
+      "step": 8727
+    },
+    {
+      "epoch": 0.08728,
+      "grad_norm": 0.9999247167010374,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 8728
+    },
+    {
+      "epoch": 0.08729,
+      "grad_norm": 1.142902216241174,
+      "learning_rate": 0.003,
+      "loss": 4.0113,
+      "step": 8729
+    },
+    {
+      "epoch": 0.0873,
+      "grad_norm": 1.091022164759464,
+      "learning_rate": 0.003,
+      "loss": 4.0471,
+      "step": 8730
+    },
+    {
+      "epoch": 0.08731,
+      "grad_norm": 1.0210290074405686,
+      "learning_rate": 0.003,
+      "loss": 4.0463,
+      "step": 8731
+    },
+    {
+      "epoch": 0.08732,
+      "grad_norm": 1.1449497076583306,
+      "learning_rate": 0.003,
+      "loss": 4.0303,
+      "step": 8732
+    },
+    {
+      "epoch": 0.08733,
+      "grad_norm": 1.0036816386493732,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 8733
+    },
+    {
+      "epoch": 0.08734,
+      "grad_norm": 1.3034518911251176,
+      "learning_rate": 0.003,
+      "loss": 4.0705,
+      "step": 8734
+    },
+    {
+      "epoch": 0.08735,
+      "grad_norm": 1.035050110835489,
+      "learning_rate": 0.003,
+      "loss": 4.0475,
+      "step": 8735
+    },
+    {
+      "epoch": 0.08736,
+      "grad_norm": 1.059239037825648,
+      "learning_rate": 0.003,
+      "loss": 4.0509,
+      "step": 8736
+    },
+    {
+      "epoch": 0.08737,
+      "grad_norm": 1.1209703870238803,
+      "learning_rate": 0.003,
+      "loss": 4.0706,
+      "step": 8737
+    },
+    {
+      "epoch": 0.08738,
+      "grad_norm": 0.897095918930477,
+      "learning_rate": 0.003,
+      "loss": 4.02,
+      "step": 8738
+    },
+    {
+      "epoch": 0.08739,
+      "grad_norm": 0.9994054796041234,
+      "learning_rate": 0.003,
+      "loss": 4.0402,
+      "step": 8739
+    },
+    {
+      "epoch": 0.0874,
+      "grad_norm": 1.3452908356127442,
+      "learning_rate": 0.003,
+      "loss": 4.0267,
+      "step": 8740
+    },
+    {
+      "epoch": 0.08741,
+      "grad_norm": 1.1093603042004494,
+      "learning_rate": 0.003,
+      "loss": 4.0267,
+      "step": 8741
+    },
+    {
+      "epoch": 0.08742,
+      "grad_norm": 0.9596481129611945,
+      "learning_rate": 0.003,
+      "loss": 4.0241,
+      "step": 8742
+    },
+    {
+      "epoch": 0.08743,
+      "grad_norm": 1.0733682110935197,
+      "learning_rate": 0.003,
+      "loss": 4.0237,
+      "step": 8743
+    },
+    {
+      "epoch": 0.08744,
+      "grad_norm": 1.212621060059208,
+      "learning_rate": 0.003,
+      "loss": 4.066,
+      "step": 8744
+    },
+    {
+      "epoch": 0.08745,
+      "grad_norm": 1.0255959780393786,
+      "learning_rate": 0.003,
+      "loss": 4.0398,
+      "step": 8745
+    },
+    {
+      "epoch": 0.08746,
+      "grad_norm": 1.111182154226899,
+      "learning_rate": 0.003,
+      "loss": 4.0497,
+      "step": 8746
+    },
+    {
+      "epoch": 0.08747,
+      "grad_norm": 1.0813261845403195,
+      "learning_rate": 0.003,
+      "loss": 4.0512,
+      "step": 8747
+    },
+    {
+      "epoch": 0.08748,
+      "grad_norm": 1.2389683997355077,
+      "learning_rate": 0.003,
+      "loss": 4.0433,
+      "step": 8748
+    },
+    {
+      "epoch": 0.08749,
+      "grad_norm": 1.1646233630246605,
+      "learning_rate": 0.003,
+      "loss": 4.0405,
+      "step": 8749
+    },
+    {
+      "epoch": 0.0875,
+      "grad_norm": 0.9882478074471066,
+      "learning_rate": 0.003,
+      "loss": 4.0403,
+      "step": 8750
+    },
+    {
+      "epoch": 0.08751,
+      "grad_norm": 1.0628828379350737,
+      "learning_rate": 0.003,
+      "loss": 4.0458,
+      "step": 8751
+    },
+    {
+      "epoch": 0.08752,
+      "grad_norm": 1.0984158397543857,
+      "learning_rate": 0.003,
+      "loss": 4.0703,
+      "step": 8752
+    },
+    {
+      "epoch": 0.08753,
+      "grad_norm": 1.2193128582910855,
+      "learning_rate": 0.003,
+      "loss": 4.0738,
+      "step": 8753
+    },
+    {
+      "epoch": 0.08754,
+      "grad_norm": 1.2385678850122033,
+      "learning_rate": 0.003,
+      "loss": 4.054,
+      "step": 8754
+    },
+    {
+      "epoch": 0.08755,
+      "grad_norm": 0.9211225646462056,
+      "learning_rate": 0.003,
+      "loss": 4.0306,
+      "step": 8755
+    },
+    {
+      "epoch": 0.08756,
+      "grad_norm": 1.0770635030360587,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 8756
+    },
+    {
+      "epoch": 0.08757,
+      "grad_norm": 1.1930560657485945,
+      "learning_rate": 0.003,
+      "loss": 4.0657,
+      "step": 8757
+    },
+    {
+      "epoch": 0.08758,
+      "grad_norm": 1.1384112467498324,
+      "learning_rate": 0.003,
+      "loss": 4.0265,
+      "step": 8758
+    },
+    {
+      "epoch": 0.08759,
+      "grad_norm": 1.1172445551058783,
+      "learning_rate": 0.003,
+      "loss": 4.0411,
+      "step": 8759
+    },
+    {
+      "epoch": 0.0876,
+      "grad_norm": 1.1801597872088998,
+      "learning_rate": 0.003,
+      "loss": 4.0428,
+      "step": 8760
+    },
+    {
+      "epoch": 0.08761,
+      "grad_norm": 0.9352294073790094,
+      "learning_rate": 0.003,
+      "loss": 4.0692,
+      "step": 8761
+    },
+    {
+      "epoch": 0.08762,
+      "grad_norm": 0.8823537867998336,
+      "learning_rate": 0.003,
+      "loss": 4.0171,
+      "step": 8762
+    },
+    {
+      "epoch": 0.08763,
+      "grad_norm": 1.0898587675324372,
+      "learning_rate": 0.003,
+      "loss": 4.0214,
+      "step": 8763
+    },
+    {
+      "epoch": 0.08764,
+      "grad_norm": 0.9778108833971553,
+      "learning_rate": 0.003,
+      "loss": 4.0173,
+      "step": 8764
+    },
+    {
+      "epoch": 0.08765,
+      "grad_norm": 1.1127307572804943,
+      "learning_rate": 0.003,
+      "loss": 4.0024,
+      "step": 8765
+    },
+    {
+      "epoch": 0.08766,
+      "grad_norm": 1.064542658962637,
+      "learning_rate": 0.003,
+      "loss": 4.0583,
+      "step": 8766
+    },
+    {
+      "epoch": 0.08767,
+      "grad_norm": 1.2769881975725046,
+      "learning_rate": 0.003,
+      "loss": 4.0438,
+      "step": 8767
+    },
+    {
+      "epoch": 0.08768,
+      "grad_norm": 0.9716458694094685,
+      "learning_rate": 0.003,
+      "loss": 4.0334,
+      "step": 8768
+    },
+    {
+      "epoch": 0.08769,
+      "grad_norm": 1.02263370249674,
+      "learning_rate": 0.003,
+      "loss": 4.0286,
+      "step": 8769
+    },
+    {
+      "epoch": 0.0877,
+      "grad_norm": 1.1197891339803154,
+      "learning_rate": 0.003,
+      "loss": 4.0442,
+      "step": 8770
+    },
+    {
+      "epoch": 0.08771,
+      "grad_norm": 1.0343547006779303,
+      "learning_rate": 0.003,
+      "loss": 4.0153,
+      "step": 8771
+    },
+    {
+      "epoch": 0.08772,
+      "grad_norm": 1.1391338162247877,
+      "learning_rate": 0.003,
+      "loss": 4.0277,
+      "step": 8772
+    },
+    {
+      "epoch": 0.08773,
+      "grad_norm": 1.133183973465544,
+      "learning_rate": 0.003,
+      "loss": 4.0593,
+      "step": 8773
+    },
+    {
+      "epoch": 0.08774,
+      "grad_norm": 0.9767687479388363,
+      "learning_rate": 0.003,
+      "loss": 4.0378,
+      "step": 8774
+    },
+    {
+      "epoch": 0.08775,
+      "grad_norm": 1.0614747177631898,
+      "learning_rate": 0.003,
+      "loss": 4.0223,
+      "step": 8775
+    },
+    {
+      "epoch": 0.08776,
+      "grad_norm": 1.2783088947042238,
+      "learning_rate": 0.003,
+      "loss": 4.0445,
+      "step": 8776
+    },
+    {
+      "epoch": 0.08777,
+      "grad_norm": 0.7758444140217153,
+      "learning_rate": 0.003,
+      "loss": 4.0461,
+      "step": 8777
+    },
+    {
+      "epoch": 0.08778,
+      "grad_norm": 0.933691111864922,
+      "learning_rate": 0.003,
+      "loss": 4.038,
+      "step": 8778
+    },
+    {
+      "epoch": 0.08779,
+      "grad_norm": 1.1241941614649351,
+      "learning_rate": 0.003,
+      "loss": 4.057,
+      "step": 8779
+    },
+    {
+      "epoch": 0.0878,
+      "grad_norm": 1.2838395411494257,
+      "learning_rate": 0.003,
+      "loss": 4.0246,
+      "step": 8780
+    },
+    {
+      "epoch": 0.08781,
+      "grad_norm": 1.0245994272171686,
+      "learning_rate": 0.003,
+      "loss": 4.0306,
+      "step": 8781
+    },
+    {
+      "epoch": 0.08782,
+      "grad_norm": 1.166015780846121,
+      "learning_rate": 0.003,
+      "loss": 4.0364,
+      "step": 8782
+    },
+    {
+      "epoch": 0.08783,
+      "grad_norm": 0.94136343088169,
+      "learning_rate": 0.003,
+      "loss": 4.0473,
+      "step": 8783
+    },
+    {
+      "epoch": 0.08784,
+      "grad_norm": 1.0537644571277438,
+      "learning_rate": 0.003,
+      "loss": 4.0368,
+      "step": 8784
+    },
+    {
+      "epoch": 0.08785,
+      "grad_norm": 1.188275684770546,
+      "learning_rate": 0.003,
+      "loss": 4.0128,
+      "step": 8785
+    },
+    {
+      "epoch": 0.08786,
+      "grad_norm": 0.9863606748261747,
+      "learning_rate": 0.003,
+      "loss": 4.0597,
+      "step": 8786
+    },
+    {
+      "epoch": 0.08787,
+      "grad_norm": 1.128286694821681,
+      "learning_rate": 0.003,
+      "loss": 4.0272,
+      "step": 8787
+    },
+    {
+      "epoch": 0.08788,
+      "grad_norm": 1.079026401112349,
+      "learning_rate": 0.003,
+      "loss": 4.0606,
+      "step": 8788
+    },
+    {
+      "epoch": 0.08789,
+      "grad_norm": 1.1880750581034827,
+      "learning_rate": 0.003,
+      "loss": 4.0364,
+      "step": 8789
+    },
+    {
+      "epoch": 0.0879,
+      "grad_norm": 1.0176004388763833,
+      "learning_rate": 0.003,
+      "loss": 4.0519,
+      "step": 8790
+    },
+    {
+      "epoch": 0.08791,
+      "grad_norm": 1.2038507485153178,
+      "learning_rate": 0.003,
+      "loss": 4.0416,
+      "step": 8791
+    },
+    {
+      "epoch": 0.08792,
+      "grad_norm": 0.9980217290009189,
+      "learning_rate": 0.003,
+      "loss": 4.0505,
+      "step": 8792
+    },
+    {
+      "epoch": 0.08793,
+      "grad_norm": 1.1566691894267236,
+      "learning_rate": 0.003,
+      "loss": 4.0452,
+      "step": 8793
+    },
+    {
+      "epoch": 0.08794,
+      "grad_norm": 1.0311955693118802,
+      "learning_rate": 0.003,
+      "loss": 4.0348,
+      "step": 8794
+    },
+    {
+      "epoch": 0.08795,
+      "grad_norm": 1.0735551697097647,
+      "learning_rate": 0.003,
+      "loss": 4.0367,
+      "step": 8795
+    },
+    {
+      "epoch": 0.08796,
+      "grad_norm": 1.2673748855568097,
+      "learning_rate": 0.003,
+      "loss": 4.0458,
+      "step": 8796
+    },
+    {
+      "epoch": 0.08797,
+      "grad_norm": 1.0868436946789142,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 8797
+    },
+    {
+      "epoch": 0.08798,
+      "grad_norm": 1.042119281795597,
+      "learning_rate": 0.003,
+      "loss": 4.0605,
+      "step": 8798
+    },
+    {
+      "epoch": 0.08799,
+      "grad_norm": 1.0602674706510509,
+      "learning_rate": 0.003,
+      "loss": 4.0157,
+      "step": 8799
+    },
+    {
+      "epoch": 0.088,
+      "grad_norm": 1.008077319870118,
+      "learning_rate": 0.003,
+      "loss": 4.0603,
+      "step": 8800
+    },
+    {
+      "epoch": 0.08801,
+      "grad_norm": 1.0969522818507715,
+      "learning_rate": 0.003,
+      "loss": 4.0028,
+      "step": 8801
+    },
+    {
+      "epoch": 0.08802,
+      "grad_norm": 1.1376221068560126,
+      "learning_rate": 0.003,
+      "loss": 4.052,
+      "step": 8802
+    },
+    {
+      "epoch": 0.08803,
+      "grad_norm": 1.0879978004992539,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 8803
+    },
+    {
+      "epoch": 0.08804,
+      "grad_norm": 1.1606225965715604,
+      "learning_rate": 0.003,
+      "loss": 4.025,
+      "step": 8804
+    },
+    {
+      "epoch": 0.08805,
+      "grad_norm": 0.966413249750066,
+      "learning_rate": 0.003,
+      "loss": 4.015,
+      "step": 8805
+    },
+    {
+      "epoch": 0.08806,
+      "grad_norm": 1.178861743093662,
+      "learning_rate": 0.003,
+      "loss": 4.031,
+      "step": 8806
+    },
+    {
+      "epoch": 0.08807,
+      "grad_norm": 0.9970083203505135,
+      "learning_rate": 0.003,
+      "loss": 4.0459,
+      "step": 8807
+    },
+    {
+      "epoch": 0.08808,
+      "grad_norm": 1.148232319300287,
+      "learning_rate": 0.003,
+      "loss": 4.0332,
+      "step": 8808
+    },
+    {
+      "epoch": 0.08809,
+      "grad_norm": 1.02006120805353,
+      "learning_rate": 0.003,
+      "loss": 4.0349,
+      "step": 8809
+    },
+    {
+      "epoch": 0.0881,
+      "grad_norm": 1.2541318447732,
+      "learning_rate": 0.003,
+      "loss": 4.0516,
+      "step": 8810
+    },
+    {
+      "epoch": 0.08811,
+      "grad_norm": 0.9654813555045425,
+      "learning_rate": 0.003,
+      "loss": 4.0419,
+      "step": 8811
+    },
+    {
+      "epoch": 0.08812,
+      "grad_norm": 1.0815748935370195,
+      "learning_rate": 0.003,
+      "loss": 4.0466,
+      "step": 8812
+    },
+    {
+      "epoch": 0.08813,
+      "grad_norm": 1.2017717029030808,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 8813
+    },
+    {
+      "epoch": 0.08814,
+      "grad_norm": 1.061652224882425,
+      "learning_rate": 0.003,
+      "loss": 4.0535,
+      "step": 8814
+    },
+    {
+      "epoch": 0.08815,
+      "grad_norm": 1.1941981502914418,
+      "learning_rate": 0.003,
+      "loss": 4.053,
+      "step": 8815
+    },
+    {
+      "epoch": 0.08816,
+      "grad_norm": 0.9722887512032639,
+      "learning_rate": 0.003,
+      "loss": 4.0388,
+      "step": 8816
+    },
+    {
+      "epoch": 0.08817,
+      "grad_norm": 1.1137225093485594,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 8817
+    },
+    {
+      "epoch": 0.08818,
+      "grad_norm": 0.9632280469657819,
+      "learning_rate": 0.003,
+      "loss": 4.0317,
+      "step": 8818
+    },
+    {
+      "epoch": 0.08819,
+      "grad_norm": 1.2912552481164463,
+      "learning_rate": 0.003,
+      "loss": 4.0728,
+      "step": 8819
+    },
+    {
+      "epoch": 0.0882,
+      "grad_norm": 1.2656702522604,
+      "learning_rate": 0.003,
+      "loss": 4.0467,
+      "step": 8820
+    },
+    {
+      "epoch": 0.08821,
+      "grad_norm": 0.8641185704105159,
+      "learning_rate": 0.003,
+      "loss": 4.0677,
+      "step": 8821
+    },
+    {
+      "epoch": 0.08822,
+      "grad_norm": 0.8526858641972325,
+      "learning_rate": 0.003,
+      "loss": 4.0033,
+      "step": 8822
+    },
+    {
+      "epoch": 0.08823,
+      "grad_norm": 0.9761138588500732,
+      "learning_rate": 0.003,
+      "loss": 4.0351,
+      "step": 8823
+    },
+    {
+      "epoch": 0.08824,
+      "grad_norm": 0.9990171985622588,
+      "learning_rate": 0.003,
+      "loss": 4.032,
+      "step": 8824
+    },
+    {
+      "epoch": 0.08825,
+      "grad_norm": 0.9853141720864859,
+      "learning_rate": 0.003,
+      "loss": 4.0407,
+      "step": 8825
+    },
+    {
+      "epoch": 0.08826,
+      "grad_norm": 1.216924716523876,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 8826
+    },
+    {
+      "epoch": 0.08827,
+      "grad_norm": 1.135457838710045,
+      "learning_rate": 0.003,
+      "loss": 4.0509,
+      "step": 8827
+    },
+    {
+      "epoch": 0.08828,
+      "grad_norm": 0.9724238442453864,
+      "learning_rate": 0.003,
+      "loss": 4.0496,
+      "step": 8828
+    },
+    {
+      "epoch": 0.08829,
+      "grad_norm": 1.1262292498469912,
+      "learning_rate": 0.003,
+      "loss": 4.0512,
+      "step": 8829
+    },
+    {
+      "epoch": 0.0883,
+      "grad_norm": 0.94574518398273,
+      "learning_rate": 0.003,
+      "loss": 4.0366,
+      "step": 8830
+    },
+    {
+      "epoch": 0.08831,
+      "grad_norm": 1.1897119041270636,
+      "learning_rate": 0.003,
+      "loss": 4.0414,
+      "step": 8831
+    },
+    {
+      "epoch": 0.08832,
+      "grad_norm": 1.2115168343495315,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 8832
+    },
+    {
+      "epoch": 0.08833,
+      "grad_norm": 0.996139762537626,
+      "learning_rate": 0.003,
+      "loss": 4.0332,
+      "step": 8833
+    },
+    {
+      "epoch": 0.08834,
+      "grad_norm": 1.2368135937696196,
+      "learning_rate": 0.003,
+      "loss": 4.0282,
+      "step": 8834
+    },
+    {
+      "epoch": 0.08835,
+      "grad_norm": 0.9570660017751258,
+      "learning_rate": 0.003,
+      "loss": 4.0609,
+      "step": 8835
+    },
+    {
+      "epoch": 0.08836,
+      "grad_norm": 0.9995686259154789,
+      "learning_rate": 0.003,
+      "loss": 4.0471,
+      "step": 8836
+    },
+    {
+      "epoch": 0.08837,
+      "grad_norm": 1.170370927533513,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 8837
+    },
+    {
+      "epoch": 0.08838,
+      "grad_norm": 1.0993199810585577,
+      "learning_rate": 0.003,
+      "loss": 4.0419,
+      "step": 8838
+    },
+    {
+      "epoch": 0.08839,
+      "grad_norm": 1.258419211501553,
+      "learning_rate": 0.003,
+      "loss": 4.0344,
+      "step": 8839
+    },
+    {
+      "epoch": 0.0884,
+      "grad_norm": 0.9274678150150035,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 8840
+    },
+    {
+      "epoch": 0.08841,
+      "grad_norm": 1.055913242044825,
+      "learning_rate": 0.003,
+      "loss": 4.0498,
+      "step": 8841
+    },
+    {
+      "epoch": 0.08842,
+      "grad_norm": 1.1426746785347814,
+      "learning_rate": 0.003,
+      "loss": 4.0321,
+      "step": 8842
+    },
+    {
+      "epoch": 0.08843,
+      "grad_norm": 1.1251979418413638,
+      "learning_rate": 0.003,
+      "loss": 4.0284,
+      "step": 8843
+    },
+    {
+      "epoch": 0.08844,
+      "grad_norm": 1.0852468922230825,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 8844
+    },
+    {
+      "epoch": 0.08845,
+      "grad_norm": 1.1567649884610698,
+      "learning_rate": 0.003,
+      "loss": 3.9994,
+      "step": 8845
+    },
+    {
+      "epoch": 0.08846,
+      "grad_norm": 1.082231327966706,
+      "learning_rate": 0.003,
+      "loss": 4.0715,
+      "step": 8846
+    },
+    {
+      "epoch": 0.08847,
+      "grad_norm": 1.3745652698854023,
+      "learning_rate": 0.003,
+      "loss": 4.0699,
+      "step": 8847
+    },
+    {
+      "epoch": 0.08848,
+      "grad_norm": 0.9014879193366324,
+      "learning_rate": 0.003,
+      "loss": 4.0603,
+      "step": 8848
+    },
+    {
+      "epoch": 0.08849,
+      "grad_norm": 1.3017860095067897,
+      "learning_rate": 0.003,
+      "loss": 4.0415,
+      "step": 8849
+    },
+    {
+      "epoch": 0.0885,
+      "grad_norm": 1.1398535241359604,
+      "learning_rate": 0.003,
+      "loss": 4.0508,
+      "step": 8850
+    },
+    {
+      "epoch": 0.08851,
+      "grad_norm": 1.2652223839390064,
+      "learning_rate": 0.003,
+      "loss": 4.05,
+      "step": 8851
+    },
+    {
+      "epoch": 0.08852,
+      "grad_norm": 0.8569529658036033,
+      "learning_rate": 0.003,
+      "loss": 4.0383,
+      "step": 8852
+    },
+    {
+      "epoch": 0.08853,
+      "grad_norm": 0.8417432899055782,
+      "learning_rate": 0.003,
+      "loss": 4.0505,
+      "step": 8853
+    },
+    {
+      "epoch": 0.08854,
+      "grad_norm": 0.9651867070757068,
+      "learning_rate": 0.003,
+      "loss": 4.0687,
+      "step": 8854
+    },
+    {
+      "epoch": 0.08855,
+      "grad_norm": 1.024221501922812,
+      "learning_rate": 0.003,
+      "loss": 4.0272,
+      "step": 8855
+    },
+    {
+      "epoch": 0.08856,
+      "grad_norm": 1.1804109249837946,
+      "learning_rate": 0.003,
+      "loss": 4.0461,
+      "step": 8856
+    },
+    {
+      "epoch": 0.08857,
+      "grad_norm": 0.955306519494686,
+      "learning_rate": 0.003,
+      "loss": 4.042,
+      "step": 8857
+    },
+    {
+      "epoch": 0.08858,
+      "grad_norm": 1.0294596526147064,
+      "learning_rate": 0.003,
+      "loss": 4.0288,
+      "step": 8858
+    },
+    {
+      "epoch": 0.08859,
+      "grad_norm": 1.0845239322361329,
+      "learning_rate": 0.003,
+      "loss": 4.0452,
+      "step": 8859
+    },
+    {
+      "epoch": 0.0886,
+      "grad_norm": 1.0515539677387107,
+      "learning_rate": 0.003,
+      "loss": 4.056,
+      "step": 8860
+    },
+    {
+      "epoch": 0.08861,
+      "grad_norm": 1.1312815390155653,
+      "learning_rate": 0.003,
+      "loss": 4.0299,
+      "step": 8861
+    },
+    {
+      "epoch": 0.08862,
+      "grad_norm": 1.2225356916127106,
+      "learning_rate": 0.003,
+      "loss": 4.0289,
+      "step": 8862
+    },
+    {
+      "epoch": 0.08863,
+      "grad_norm": 1.1065958969223766,
+      "learning_rate": 0.003,
+      "loss": 4.0458,
+      "step": 8863
+    },
+    {
+      "epoch": 0.08864,
+      "grad_norm": 0.8686988551731697,
+      "learning_rate": 0.003,
+      "loss": 4.0479,
+      "step": 8864
+    },
+    {
+      "epoch": 0.08865,
+      "grad_norm": 0.9406066773821836,
+      "learning_rate": 0.003,
+      "loss": 4.0439,
+      "step": 8865
+    },
+    {
+      "epoch": 0.08866,
+      "grad_norm": 1.1068806392407193,
+      "learning_rate": 0.003,
+      "loss": 4.0163,
+      "step": 8866
+    },
+    {
+      "epoch": 0.08867,
+      "grad_norm": 1.0109355114141556,
+      "learning_rate": 0.003,
+      "loss": 4.0437,
+      "step": 8867
+    },
+    {
+      "epoch": 0.08868,
+      "grad_norm": 1.0297444159445588,
+      "learning_rate": 0.003,
+      "loss": 4.0242,
+      "step": 8868
+    },
+    {
+      "epoch": 0.08869,
+      "grad_norm": 1.1859657375260197,
+      "learning_rate": 0.003,
+      "loss": 4.0894,
+      "step": 8869
+    },
+    {
+      "epoch": 0.0887,
+      "grad_norm": 1.0367364996267392,
+      "learning_rate": 0.003,
+      "loss": 4.0186,
+      "step": 8870
+    },
+    {
+      "epoch": 0.08871,
+      "grad_norm": 1.1770481681438267,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 8871
+    },
+    {
+      "epoch": 0.08872,
+      "grad_norm": 1.055437903261702,
+      "learning_rate": 0.003,
+      "loss": 4.073,
+      "step": 8872
+    },
+    {
+      "epoch": 0.08873,
+      "grad_norm": 1.1436383402758115,
+      "learning_rate": 0.003,
+      "loss": 4.0278,
+      "step": 8873
+    },
+    {
+      "epoch": 0.08874,
+      "grad_norm": 1.0270645045752425,
+      "learning_rate": 0.003,
+      "loss": 4.0358,
+      "step": 8874
+    },
+    {
+      "epoch": 0.08875,
+      "grad_norm": 1.2691814590014312,
+      "learning_rate": 0.003,
+      "loss": 4.0603,
+      "step": 8875
+    },
+    {
+      "epoch": 0.08876,
+      "grad_norm": 1.0685113087608666,
+      "learning_rate": 0.003,
+      "loss": 4.0149,
+      "step": 8876
+    },
+    {
+      "epoch": 0.08877,
+      "grad_norm": 0.9066843918350342,
+      "learning_rate": 0.003,
+      "loss": 4.0224,
+      "step": 8877
+    },
+    {
+      "epoch": 0.08878,
+      "grad_norm": 1.1136428586139093,
+      "learning_rate": 0.003,
+      "loss": 4.0369,
+      "step": 8878
+    },
+    {
+      "epoch": 0.08879,
+      "grad_norm": 1.1426232349637628,
+      "learning_rate": 0.003,
+      "loss": 4.0286,
+      "step": 8879
+    },
+    {
+      "epoch": 0.0888,
+      "grad_norm": 0.9763661504928037,
+      "learning_rate": 0.003,
+      "loss": 4.022,
+      "step": 8880
+    },
+    {
+      "epoch": 0.08881,
+      "grad_norm": 0.9760789964859367,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 8881
+    },
+    {
+      "epoch": 0.08882,
+      "grad_norm": 0.993562866244837,
+      "learning_rate": 0.003,
+      "loss": 4.0465,
+      "step": 8882
+    },
+    {
+      "epoch": 0.08883,
+      "grad_norm": 1.1608396003529295,
+      "learning_rate": 0.003,
+      "loss": 4.0876,
+      "step": 8883
+    },
+    {
+      "epoch": 0.08884,
+      "grad_norm": 1.1323551569394956,
+      "learning_rate": 0.003,
+      "loss": 4.042,
+      "step": 8884
+    },
+    {
+      "epoch": 0.08885,
+      "grad_norm": 1.2349009939188973,
+      "learning_rate": 0.003,
+      "loss": 4.0356,
+      "step": 8885
+    },
+    {
+      "epoch": 0.08886,
+      "grad_norm": 1.1914990238513254,
+      "learning_rate": 0.003,
+      "loss": 4.0482,
+      "step": 8886
+    },
+    {
+      "epoch": 0.08887,
+      "grad_norm": 1.0598046198071127,
+      "learning_rate": 0.003,
+      "loss": 4.0334,
+      "step": 8887
+    },
+    {
+      "epoch": 0.08888,
+      "grad_norm": 1.1625466358037462,
+      "learning_rate": 0.003,
+      "loss": 4.036,
+      "step": 8888
+    },
+    {
+      "epoch": 0.08889,
+      "grad_norm": 0.9052379335151111,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 8889
+    },
+    {
+      "epoch": 0.0889,
+      "grad_norm": 0.9819634809947653,
+      "learning_rate": 0.003,
+      "loss": 4.0308,
+      "step": 8890
+    },
+    {
+      "epoch": 0.08891,
+      "grad_norm": 1.093718586313721,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 8891
+    },
+    {
+      "epoch": 0.08892,
+      "grad_norm": 1.213161423531245,
+      "learning_rate": 0.003,
+      "loss": 4.0657,
+      "step": 8892
+    },
+    {
+      "epoch": 0.08893,
+      "grad_norm": 1.0485073867558885,
+      "learning_rate": 0.003,
+      "loss": 4.0364,
+      "step": 8893
+    },
+    {
+      "epoch": 0.08894,
+      "grad_norm": 1.2824133272408857,
+      "learning_rate": 0.003,
+      "loss": 4.0306,
+      "step": 8894
+    },
+    {
+      "epoch": 0.08895,
+      "grad_norm": 0.8423439384851096,
+      "learning_rate": 0.003,
+      "loss": 4.0348,
+      "step": 8895
+    },
+    {
+      "epoch": 0.08896,
+      "grad_norm": 1.0799448562495333,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 8896
+    },
+    {
+      "epoch": 0.08897,
+      "grad_norm": 1.4422293408188802,
+      "learning_rate": 0.003,
+      "loss": 4.0373,
+      "step": 8897
+    },
+    {
+      "epoch": 0.08898,
+      "grad_norm": 1.032118344307113,
+      "learning_rate": 0.003,
+      "loss": 4.042,
+      "step": 8898
+    },
+    {
+      "epoch": 0.08899,
+      "grad_norm": 1.0522805801471267,
+      "learning_rate": 0.003,
+      "loss": 4.0399,
+      "step": 8899
+    },
+    {
+      "epoch": 0.089,
+      "grad_norm": 1.0679360633133137,
+      "learning_rate": 0.003,
+      "loss": 4.0276,
+      "step": 8900
+    },
+    {
+      "epoch": 0.08901,
+      "grad_norm": 1.2000864304391685,
+      "learning_rate": 0.003,
+      "loss": 4.044,
+      "step": 8901
+    },
+    {
+      "epoch": 0.08902,
+      "grad_norm": 1.0038739754064978,
+      "learning_rate": 0.003,
+      "loss": 4.0407,
+      "step": 8902
+    },
+    {
+      "epoch": 0.08903,
+      "grad_norm": 1.4580752514981392,
+      "learning_rate": 0.003,
+      "loss": 4.0378,
+      "step": 8903
+    },
+    {
+      "epoch": 0.08904,
+      "grad_norm": 0.9501849947581935,
+      "learning_rate": 0.003,
+      "loss": 4.0431,
+      "step": 8904
+    },
+    {
+      "epoch": 0.08905,
+      "grad_norm": 1.0865285261488837,
+      "learning_rate": 0.003,
+      "loss": 4.0578,
+      "step": 8905
+    },
+    {
+      "epoch": 0.08906,
+      "grad_norm": 1.192560315216147,
+      "learning_rate": 0.003,
+      "loss": 4.0501,
+      "step": 8906
+    },
+    {
+      "epoch": 0.08907,
+      "grad_norm": 0.86974732874746,
+      "learning_rate": 0.003,
+      "loss": 4.0356,
+      "step": 8907
+    },
+    {
+      "epoch": 0.08908,
+      "grad_norm": 1.1193535519892115,
+      "learning_rate": 0.003,
+      "loss": 4.0618,
+      "step": 8908
+    },
+    {
+      "epoch": 0.08909,
+      "grad_norm": 1.053123300357203,
+      "learning_rate": 0.003,
+      "loss": 4.0552,
+      "step": 8909
+    },
+    {
+      "epoch": 0.0891,
+      "grad_norm": 1.1339836887698285,
+      "learning_rate": 0.003,
+      "loss": 4.0369,
+      "step": 8910
+    },
+    {
+      "epoch": 0.08911,
+      "grad_norm": 0.9277496841752985,
+      "learning_rate": 0.003,
+      "loss": 4.0399,
+      "step": 8911
+    },
+    {
+      "epoch": 0.08912,
+      "grad_norm": 0.9315539212514233,
+      "learning_rate": 0.003,
+      "loss": 4.0368,
+      "step": 8912
+    },
+    {
+      "epoch": 0.08913,
+      "grad_norm": 1.0576553940280595,
+      "learning_rate": 0.003,
+      "loss": 4.0592,
+      "step": 8913
+    },
+    {
+      "epoch": 0.08914,
+      "grad_norm": 1.1640237850058512,
+      "learning_rate": 0.003,
+      "loss": 4.056,
+      "step": 8914
+    },
+    {
+      "epoch": 0.08915,
+      "grad_norm": 1.0188304936594577,
+      "learning_rate": 0.003,
+      "loss": 4.0563,
+      "step": 8915
+    },
+    {
+      "epoch": 0.08916,
+      "grad_norm": 1.426269112438244,
+      "learning_rate": 0.003,
+      "loss": 4.0536,
+      "step": 8916
+    },
+    {
+      "epoch": 0.08917,
+      "grad_norm": 1.0546818705564507,
+      "learning_rate": 0.003,
+      "loss": 4.0315,
+      "step": 8917
+    },
+    {
+      "epoch": 0.08918,
+      "grad_norm": 1.2200135436078994,
+      "learning_rate": 0.003,
+      "loss": 4.0467,
+      "step": 8918
+    },
+    {
+      "epoch": 0.08919,
+      "grad_norm": 0.8818184266787755,
+      "learning_rate": 0.003,
+      "loss": 4.0512,
+      "step": 8919
+    },
+    {
+      "epoch": 0.0892,
+      "grad_norm": 0.9755990420691693,
+      "learning_rate": 0.003,
+      "loss": 4.0218,
+      "step": 8920
+    },
+    {
+      "epoch": 0.08921,
+      "grad_norm": 1.2471123516060463,
+      "learning_rate": 0.003,
+      "loss": 4.0309,
+      "step": 8921
+    },
+    {
+      "epoch": 0.08922,
+      "grad_norm": 0.8822858234291155,
+      "learning_rate": 0.003,
+      "loss": 4.0501,
+      "step": 8922
+    },
+    {
+      "epoch": 0.08923,
+      "grad_norm": 0.9810038940078862,
+      "learning_rate": 0.003,
+      "loss": 4.0546,
+      "step": 8923
+    },
+    {
+      "epoch": 0.08924,
+      "grad_norm": 1.2492114317483891,
+      "learning_rate": 0.003,
+      "loss": 4.0352,
+      "step": 8924
+    },
+    {
+      "epoch": 0.08925,
+      "grad_norm": 0.9702532902239348,
+      "learning_rate": 0.003,
+      "loss": 4.0406,
+      "step": 8925
+    },
+    {
+      "epoch": 0.08926,
+      "grad_norm": 1.2387626568378134,
+      "learning_rate": 0.003,
+      "loss": 4.0302,
+      "step": 8926
+    },
+    {
+      "epoch": 0.08927,
+      "grad_norm": 1.0513850982219397,
+      "learning_rate": 0.003,
+      "loss": 4.0386,
+      "step": 8927
+    },
+    {
+      "epoch": 0.08928,
+      "grad_norm": 1.1575708697727565,
+      "learning_rate": 0.003,
+      "loss": 4.0325,
+      "step": 8928
+    },
+    {
+      "epoch": 0.08929,
+      "grad_norm": 1.1942394751331307,
+      "learning_rate": 0.003,
+      "loss": 4.0398,
+      "step": 8929
+    },
+    {
+      "epoch": 0.0893,
+      "grad_norm": 0.9099352819617809,
+      "learning_rate": 0.003,
+      "loss": 4.0425,
+      "step": 8930
+    },
+    {
+      "epoch": 0.08931,
+      "grad_norm": 1.1574700090391592,
+      "learning_rate": 0.003,
+      "loss": 4.0343,
+      "step": 8931
+    },
+    {
+      "epoch": 0.08932,
+      "grad_norm": 1.1712040522318599,
+      "learning_rate": 0.003,
+      "loss": 4.0485,
+      "step": 8932
+    },
+    {
+      "epoch": 0.08933,
+      "grad_norm": 1.1298101748632317,
+      "learning_rate": 0.003,
+      "loss": 4.0412,
+      "step": 8933
+    },
+    {
+      "epoch": 0.08934,
+      "grad_norm": 0.8845864177793066,
+      "learning_rate": 0.003,
+      "loss": 4.0331,
+      "step": 8934
+    },
+    {
+      "epoch": 0.08935,
+      "grad_norm": 0.9728746053736606,
+      "learning_rate": 0.003,
+      "loss": 4.0481,
+      "step": 8935
+    },
+    {
+      "epoch": 0.08936,
+      "grad_norm": 1.1064146202001022,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 8936
+    },
+    {
+      "epoch": 0.08937,
+      "grad_norm": 1.1728943500806064,
+      "learning_rate": 0.003,
+      "loss": 4.0214,
+      "step": 8937
+    },
+    {
+      "epoch": 0.08938,
+      "grad_norm": 1.1024217274450334,
+      "learning_rate": 0.003,
+      "loss": 4.0577,
+      "step": 8938
+    },
+    {
+      "epoch": 0.08939,
+      "grad_norm": 1.0804309602863862,
+      "learning_rate": 0.003,
+      "loss": 4.0694,
+      "step": 8939
+    },
+    {
+      "epoch": 0.0894,
+      "grad_norm": 1.1974313951117248,
+      "learning_rate": 0.003,
+      "loss": 4.0352,
+      "step": 8940
+    },
+    {
+      "epoch": 0.08941,
+      "grad_norm": 1.0472060105935805,
+      "learning_rate": 0.003,
+      "loss": 4.0478,
+      "step": 8941
+    },
+    {
+      "epoch": 0.08942,
+      "grad_norm": 1.1849247191126862,
+      "learning_rate": 0.003,
+      "loss": 4.0252,
+      "step": 8942
+    },
+    {
+      "epoch": 0.08943,
+      "grad_norm": 1.066833548679808,
+      "learning_rate": 0.003,
+      "loss": 4.0447,
+      "step": 8943
+    },
+    {
+      "epoch": 0.08944,
+      "grad_norm": 1.1269339808223942,
+      "learning_rate": 0.003,
+      "loss": 4.0182,
+      "step": 8944
+    },
+    {
+      "epoch": 0.08945,
+      "grad_norm": 1.163904186867617,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 8945
+    },
+    {
+      "epoch": 0.08946,
+      "grad_norm": 1.1313006895031748,
+      "learning_rate": 0.003,
+      "loss": 4.0655,
+      "step": 8946
+    },
+    {
+      "epoch": 0.08947,
+      "grad_norm": 1.2553855338780335,
+      "learning_rate": 0.003,
+      "loss": 4.0445,
+      "step": 8947
+    },
+    {
+      "epoch": 0.08948,
+      "grad_norm": 1.1756270052552653,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 8948
+    },
+    {
+      "epoch": 0.08949,
+      "grad_norm": 0.9764941068571328,
+      "learning_rate": 0.003,
+      "loss": 4.0333,
+      "step": 8949
+    },
+    {
+      "epoch": 0.0895,
+      "grad_norm": 1.0240642198990701,
+      "learning_rate": 0.003,
+      "loss": 4.0263,
+      "step": 8950
+    },
+    {
+      "epoch": 0.08951,
+      "grad_norm": 1.2604446368244644,
+      "learning_rate": 0.003,
+      "loss": 4.0386,
+      "step": 8951
+    },
+    {
+      "epoch": 0.08952,
+      "grad_norm": 1.069105761223158,
+      "learning_rate": 0.003,
+      "loss": 4.0476,
+      "step": 8952
+    },
+    {
+      "epoch": 0.08953,
+      "grad_norm": 1.2897654616156728,
+      "learning_rate": 0.003,
+      "loss": 4.0465,
+      "step": 8953
+    },
+    {
+      "epoch": 0.08954,
+      "grad_norm": 0.8927523209357381,
+      "learning_rate": 0.003,
+      "loss": 4.0037,
+      "step": 8954
+    },
+    {
+      "epoch": 0.08955,
+      "grad_norm": 0.7994347598547118,
+      "learning_rate": 0.003,
+      "loss": 4.0339,
+      "step": 8955
+    },
+    {
+      "epoch": 0.08956,
+      "grad_norm": 0.7721118918818628,
+      "learning_rate": 0.003,
+      "loss": 4.0382,
+      "step": 8956
+    },
+    {
+      "epoch": 0.08957,
+      "grad_norm": 0.888735574661208,
+      "learning_rate": 0.003,
+      "loss": 4.0468,
+      "step": 8957
+    },
+    {
+      "epoch": 0.08958,
+      "grad_norm": 1.011303363408731,
+      "learning_rate": 0.003,
+      "loss": 4.0063,
+      "step": 8958
+    },
+    {
+      "epoch": 0.08959,
+      "grad_norm": 1.0580277710381667,
+      "learning_rate": 0.003,
+      "loss": 4.0288,
+      "step": 8959
+    },
+    {
+      "epoch": 0.0896,
+      "grad_norm": 1.1815138015719915,
+      "learning_rate": 0.003,
+      "loss": 4.0082,
+      "step": 8960
+    },
+    {
+      "epoch": 0.08961,
+      "grad_norm": 1.0763278082170449,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 8961
+    },
+    {
+      "epoch": 0.08962,
+      "grad_norm": 1.1547543247106453,
+      "learning_rate": 0.003,
+      "loss": 4.0328,
+      "step": 8962
+    },
+    {
+      "epoch": 0.08963,
+      "grad_norm": 0.9474596852663282,
+      "learning_rate": 0.003,
+      "loss": 4.0594,
+      "step": 8963
+    },
+    {
+      "epoch": 0.08964,
+      "grad_norm": 0.9994796332550057,
+      "learning_rate": 0.003,
+      "loss": 4.0399,
+      "step": 8964
+    },
+    {
+      "epoch": 0.08965,
+      "grad_norm": 1.0971047941927174,
+      "learning_rate": 0.003,
+      "loss": 4.0488,
+      "step": 8965
+    },
+    {
+      "epoch": 0.08966,
+      "grad_norm": 0.9978749462040863,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 8966
+    },
+    {
+      "epoch": 0.08967,
+      "grad_norm": 1.091462772934639,
+      "learning_rate": 0.003,
+      "loss": 4.059,
+      "step": 8967
+    },
+    {
+      "epoch": 0.08968,
+      "grad_norm": 0.9928173167320807,
+      "learning_rate": 0.003,
+      "loss": 4.0294,
+      "step": 8968
+    },
+    {
+      "epoch": 0.08969,
+      "grad_norm": 1.2786532555094048,
+      "learning_rate": 0.003,
+      "loss": 4.035,
+      "step": 8969
+    },
+    {
+      "epoch": 0.0897,
+      "grad_norm": 0.9304812924425294,
+      "learning_rate": 0.003,
+      "loss": 4.0399,
+      "step": 8970
+    },
+    {
+      "epoch": 0.08971,
+      "grad_norm": 1.029273303406794,
+      "learning_rate": 0.003,
+      "loss": 4.0321,
+      "step": 8971
+    },
+    {
+      "epoch": 0.08972,
+      "grad_norm": 1.1442621209111814,
+      "learning_rate": 0.003,
+      "loss": 4.0216,
+      "step": 8972
+    },
+    {
+      "epoch": 0.08973,
+      "grad_norm": 1.0166706811028725,
+      "learning_rate": 0.003,
+      "loss": 4.0568,
+      "step": 8973
+    },
+    {
+      "epoch": 0.08974,
+      "grad_norm": 1.3160111354791268,
+      "learning_rate": 0.003,
+      "loss": 4.0599,
+      "step": 8974
+    },
+    {
+      "epoch": 0.08975,
+      "grad_norm": 0.9964973979910691,
+      "learning_rate": 0.003,
+      "loss": 4.034,
+      "step": 8975
+    },
+    {
+      "epoch": 0.08976,
+      "grad_norm": 1.1506428299487088,
+      "learning_rate": 0.003,
+      "loss": 4.0539,
+      "step": 8976
+    },
+    {
+      "epoch": 0.08977,
+      "grad_norm": 1.0330018963705825,
+      "learning_rate": 0.003,
+      "loss": 4.0367,
+      "step": 8977
+    },
+    {
+      "epoch": 0.08978,
+      "grad_norm": 1.0887492374364582,
+      "learning_rate": 0.003,
+      "loss": 4.0419,
+      "step": 8978
+    },
+    {
+      "epoch": 0.08979,
+      "grad_norm": 1.1346166358786325,
+      "learning_rate": 0.003,
+      "loss": 4.024,
+      "step": 8979
+    },
+    {
+      "epoch": 0.0898,
+      "grad_norm": 1.21619016375306,
+      "learning_rate": 0.003,
+      "loss": 4.0509,
+      "step": 8980
+    },
+    {
+      "epoch": 0.08981,
+      "grad_norm": 0.9851538869152199,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 8981
+    },
+    {
+      "epoch": 0.08982,
+      "grad_norm": 1.2126645439081598,
+      "learning_rate": 0.003,
+      "loss": 4.0339,
+      "step": 8982
+    },
+    {
+      "epoch": 0.08983,
+      "grad_norm": 1.0447731466887387,
+      "learning_rate": 0.003,
+      "loss": 4.0122,
+      "step": 8983
+    },
+    {
+      "epoch": 0.08984,
+      "grad_norm": 1.1497594183247177,
+      "learning_rate": 0.003,
+      "loss": 4.0603,
+      "step": 8984
+    },
+    {
+      "epoch": 0.08985,
+      "grad_norm": 1.2483832316844437,
+      "learning_rate": 0.003,
+      "loss": 4.0482,
+      "step": 8985
+    },
+    {
+      "epoch": 0.08986,
+      "grad_norm": 1.2191689592626789,
+      "learning_rate": 0.003,
+      "loss": 4.0458,
+      "step": 8986
+    },
+    {
+      "epoch": 0.08987,
+      "grad_norm": 1.076658986801492,
+      "learning_rate": 0.003,
+      "loss": 4.0286,
+      "step": 8987
+    },
+    {
+      "epoch": 0.08988,
+      "grad_norm": 1.2479820133910688,
+      "learning_rate": 0.003,
+      "loss": 4.0323,
+      "step": 8988
+    },
+    {
+      "epoch": 0.08989,
+      "grad_norm": 0.9034809767704942,
+      "learning_rate": 0.003,
+      "loss": 4.0534,
+      "step": 8989
+    },
+    {
+      "epoch": 0.0899,
+      "grad_norm": 1.1404867965095085,
+      "learning_rate": 0.003,
+      "loss": 4.0247,
+      "step": 8990
+    },
+    {
+      "epoch": 0.08991,
+      "grad_norm": 1.073000683119538,
+      "learning_rate": 0.003,
+      "loss": 4.0346,
+      "step": 8991
+    },
+    {
+      "epoch": 0.08992,
+      "grad_norm": 1.2207306198770145,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 8992
+    },
+    {
+      "epoch": 0.08993,
+      "grad_norm": 1.0842166692368216,
+      "learning_rate": 0.003,
+      "loss": 4.0333,
+      "step": 8993
+    },
+    {
+      "epoch": 0.08994,
+      "grad_norm": 1.0045211383021964,
+      "learning_rate": 0.003,
+      "loss": 4.0298,
+      "step": 8994
+    },
+    {
+      "epoch": 0.08995,
+      "grad_norm": 1.316656487044687,
+      "learning_rate": 0.003,
+      "loss": 4.0477,
+      "step": 8995
+    },
+    {
+      "epoch": 0.08996,
+      "grad_norm": 0.9870905235403588,
+      "learning_rate": 0.003,
+      "loss": 4.063,
+      "step": 8996
+    },
+    {
+      "epoch": 0.08997,
+      "grad_norm": 1.1131944813902874,
+      "learning_rate": 0.003,
+      "loss": 4.0355,
+      "step": 8997
+    },
+    {
+      "epoch": 0.08998,
+      "grad_norm": 0.9575251947968014,
+      "learning_rate": 0.003,
+      "loss": 4.0188,
+      "step": 8998
+    },
+    {
+      "epoch": 0.08999,
+      "grad_norm": 1.079155634238572,
+      "learning_rate": 0.003,
+      "loss": 4.048,
+      "step": 8999
+    },
+    {
+      "epoch": 0.09,
+      "grad_norm": 1.3073657014522342,
+      "learning_rate": 0.003,
+      "loss": 4.0459,
+      "step": 9000
+    },
+    {
+      "epoch": 0.09001,
+      "grad_norm": 0.9878617942632868,
+      "learning_rate": 0.003,
+      "loss": 4.036,
+      "step": 9001
+    },
+    {
+      "epoch": 0.09002,
+      "grad_norm": 1.4475702739335359,
+      "learning_rate": 0.003,
+      "loss": 4.036,
+      "step": 9002
+    },
+    {
+      "epoch": 0.09003,
+      "grad_norm": 1.0308588085625798,
+      "learning_rate": 0.003,
+      "loss": 4.0164,
+      "step": 9003
+    },
+    {
+      "epoch": 0.09004,
+      "grad_norm": 1.221089293392804,
+      "learning_rate": 0.003,
+      "loss": 4.0384,
+      "step": 9004
+    },
+    {
+      "epoch": 0.09005,
+      "grad_norm": 1.0231796835565534,
+      "learning_rate": 0.003,
+      "loss": 4.0569,
+      "step": 9005
+    },
+    {
+      "epoch": 0.09006,
+      "grad_norm": 1.0417871456423533,
+      "learning_rate": 0.003,
+      "loss": 4.0323,
+      "step": 9006
+    },
+    {
+      "epoch": 0.09007,
+      "grad_norm": 1.1191814093275658,
+      "learning_rate": 0.003,
+      "loss": 4.0595,
+      "step": 9007
+    },
+    {
+      "epoch": 0.09008,
+      "grad_norm": 1.1348119020549952,
+      "learning_rate": 0.003,
+      "loss": 4.0426,
+      "step": 9008
+    },
+    {
+      "epoch": 0.09009,
+      "grad_norm": 1.1633559345033404,
+      "learning_rate": 0.003,
+      "loss": 4.0497,
+      "step": 9009
+    },
+    {
+      "epoch": 0.0901,
+      "grad_norm": 1.1458799262789814,
+      "learning_rate": 0.003,
+      "loss": 4.0406,
+      "step": 9010
+    },
+    {
+      "epoch": 0.09011,
+      "grad_norm": 1.0636561316185342,
+      "learning_rate": 0.003,
+      "loss": 4.0382,
+      "step": 9011
+    },
+    {
+      "epoch": 0.09012,
+      "grad_norm": 1.1477528884261912,
+      "learning_rate": 0.003,
+      "loss": 4.0396,
+      "step": 9012
+    },
+    {
+      "epoch": 0.09013,
+      "grad_norm": 1.0045238113278767,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 9013
+    },
+    {
+      "epoch": 0.09014,
+      "grad_norm": 1.1254428234025435,
+      "learning_rate": 0.003,
+      "loss": 4.0657,
+      "step": 9014
+    },
+    {
+      "epoch": 0.09015,
+      "grad_norm": 1.1052529228915646,
+      "learning_rate": 0.003,
+      "loss": 4.0764,
+      "step": 9015
+    },
+    {
+      "epoch": 0.09016,
+      "grad_norm": 1.0143709342989635,
+      "learning_rate": 0.003,
+      "loss": 4.0547,
+      "step": 9016
+    },
+    {
+      "epoch": 0.09017,
+      "grad_norm": 1.0995658886429716,
+      "learning_rate": 0.003,
+      "loss": 4.0607,
+      "step": 9017
+    },
+    {
+      "epoch": 0.09018,
+      "grad_norm": 1.0029225082537132,
+      "learning_rate": 0.003,
+      "loss": 4.0508,
+      "step": 9018
+    },
+    {
+      "epoch": 0.09019,
+      "grad_norm": 1.3935530723752598,
+      "learning_rate": 0.003,
+      "loss": 4.035,
+      "step": 9019
+    },
+    {
+      "epoch": 0.0902,
+      "grad_norm": 0.9681733776693029,
+      "learning_rate": 0.003,
+      "loss": 4.0396,
+      "step": 9020
+    },
+    {
+      "epoch": 0.09021,
+      "grad_norm": 1.3687701542155535,
+      "learning_rate": 0.003,
+      "loss": 4.0636,
+      "step": 9021
+    },
+    {
+      "epoch": 0.09022,
+      "grad_norm": 0.9332102242731483,
+      "learning_rate": 0.003,
+      "loss": 4.0434,
+      "step": 9022
+    },
+    {
+      "epoch": 0.09023,
+      "grad_norm": 1.0483328137620929,
+      "learning_rate": 0.003,
+      "loss": 4.0389,
+      "step": 9023
+    },
+    {
+      "epoch": 0.09024,
+      "grad_norm": 1.1150169635102571,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 9024
+    },
+    {
+      "epoch": 0.09025,
+      "grad_norm": 1.2002769998310732,
+      "learning_rate": 0.003,
+      "loss": 4.0455,
+      "step": 9025
+    },
+    {
+      "epoch": 0.09026,
+      "grad_norm": 1.2011034969162588,
+      "learning_rate": 0.003,
+      "loss": 4.0403,
+      "step": 9026
+    },
+    {
+      "epoch": 0.09027,
+      "grad_norm": 1.146578370959906,
+      "learning_rate": 0.003,
+      "loss": 4.0353,
+      "step": 9027
+    },
+    {
+      "epoch": 0.09028,
+      "grad_norm": 1.2228846480536117,
+      "learning_rate": 0.003,
+      "loss": 4.0515,
+      "step": 9028
+    },
+    {
+      "epoch": 0.09029,
+      "grad_norm": 1.037224821411794,
+      "learning_rate": 0.003,
+      "loss": 4.0376,
+      "step": 9029
+    },
+    {
+      "epoch": 0.0903,
+      "grad_norm": 0.9844125119573323,
+      "learning_rate": 0.003,
+      "loss": 4.0647,
+      "step": 9030
+    },
+    {
+      "epoch": 0.09031,
+      "grad_norm": 1.1495286013814523,
+      "learning_rate": 0.003,
+      "loss": 4.022,
+      "step": 9031
+    },
+    {
+      "epoch": 0.09032,
+      "grad_norm": 1.1742914278318812,
+      "learning_rate": 0.003,
+      "loss": 4.0736,
+      "step": 9032
+    },
+    {
+      "epoch": 0.09033,
+      "grad_norm": 1.3904421332901014,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 9033
+    },
+    {
+      "epoch": 0.09034,
+      "grad_norm": 1.0278814470733673,
+      "learning_rate": 0.003,
+      "loss": 4.0363,
+      "step": 9034
+    },
+    {
+      "epoch": 0.09035,
+      "grad_norm": 1.1229673655711763,
+      "learning_rate": 0.003,
+      "loss": 4.0397,
+      "step": 9035
+    },
+    {
+      "epoch": 0.09036,
+      "grad_norm": 0.9056875520752794,
+      "learning_rate": 0.003,
+      "loss": 4.0301,
+      "step": 9036
+    },
+    {
+      "epoch": 0.09037,
+      "grad_norm": 1.1006054177092766,
+      "learning_rate": 0.003,
+      "loss": 4.0368,
+      "step": 9037
+    },
+    {
+      "epoch": 0.09038,
+      "grad_norm": 1.1348621040993052,
+      "learning_rate": 0.003,
+      "loss": 4.0397,
+      "step": 9038
+    },
+    {
+      "epoch": 0.09039,
+      "grad_norm": 1.05381794931122,
+      "learning_rate": 0.003,
+      "loss": 4.0466,
+      "step": 9039
+    },
+    {
+      "epoch": 0.0904,
+      "grad_norm": 1.0109885109102685,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 9040
+    },
+    {
+      "epoch": 0.09041,
+      "grad_norm": 1.123512949286327,
+      "learning_rate": 0.003,
+      "loss": 4.0177,
+      "step": 9041
+    },
+    {
+      "epoch": 0.09042,
+      "grad_norm": 1.0917985051699848,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 9042
+    },
+    {
+      "epoch": 0.09043,
+      "grad_norm": 1.084941390137943,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 9043
+    },
+    {
+      "epoch": 0.09044,
+      "grad_norm": 1.12950639714153,
+      "learning_rate": 0.003,
+      "loss": 4.0279,
+      "step": 9044
+    },
+    {
+      "epoch": 0.09045,
+      "grad_norm": 0.9435189761788078,
+      "learning_rate": 0.003,
+      "loss": 4.0313,
+      "step": 9045
+    },
+    {
+      "epoch": 0.09046,
+      "grad_norm": 1.1205216961300428,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 9046
+    },
+    {
+      "epoch": 0.09047,
+      "grad_norm": 1.1101023596152269,
+      "learning_rate": 0.003,
+      "loss": 4.0294,
+      "step": 9047
+    },
+    {
+      "epoch": 0.09048,
+      "grad_norm": 1.4711338068553876,
+      "learning_rate": 0.003,
+      "loss": 4.0375,
+      "step": 9048
+    },
+    {
+      "epoch": 0.09049,
+      "grad_norm": 0.7958927509366164,
+      "learning_rate": 0.003,
+      "loss": 4.0539,
+      "step": 9049
+    },
+    {
+      "epoch": 0.0905,
+      "grad_norm": 0.8137554584809007,
+      "learning_rate": 0.003,
+      "loss": 4.0287,
+      "step": 9050
+    },
+    {
+      "epoch": 0.09051,
+      "grad_norm": 0.9025372956999882,
+      "learning_rate": 0.003,
+      "loss": 4.0261,
+      "step": 9051
+    },
+    {
+      "epoch": 0.09052,
+      "grad_norm": 0.8871827348097391,
+      "learning_rate": 0.003,
+      "loss": 4.0347,
+      "step": 9052
+    },
+    {
+      "epoch": 0.09053,
+      "grad_norm": 1.0593382418378432,
+      "learning_rate": 0.003,
+      "loss": 4.0198,
+      "step": 9053
+    },
+    {
+      "epoch": 0.09054,
+      "grad_norm": 1.185043130945484,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 9054
+    },
+    {
+      "epoch": 0.09055,
+      "grad_norm": 1.123987691026757,
+      "learning_rate": 0.003,
+      "loss": 4.0151,
+      "step": 9055
+    },
+    {
+      "epoch": 0.09056,
+      "grad_norm": 1.0852908797471614,
+      "learning_rate": 0.003,
+      "loss": 4.0215,
+      "step": 9056
+    },
+    {
+      "epoch": 0.09057,
+      "grad_norm": 0.9184881520876066,
+      "learning_rate": 0.003,
+      "loss": 4.0444,
+      "step": 9057
+    },
+    {
+      "epoch": 0.09058,
+      "grad_norm": 0.8821076958966486,
+      "learning_rate": 0.003,
+      "loss": 4.0523,
+      "step": 9058
+    },
+    {
+      "epoch": 0.09059,
+      "grad_norm": 0.9653080945803115,
+      "learning_rate": 0.003,
+      "loss": 4.0354,
+      "step": 9059
+    },
+    {
+      "epoch": 0.0906,
+      "grad_norm": 1.157246811985134,
+      "learning_rate": 0.003,
+      "loss": 4.0565,
+      "step": 9060
+    },
+    {
+      "epoch": 0.09061,
+      "grad_norm": 1.106358395927521,
+      "learning_rate": 0.003,
+      "loss": 4.042,
+      "step": 9061
+    },
+    {
+      "epoch": 0.09062,
+      "grad_norm": 1.1308906707844582,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 9062
+    },
+    {
+      "epoch": 0.09063,
+      "grad_norm": 1.3615134157248625,
+      "learning_rate": 0.003,
+      "loss": 4.0304,
+      "step": 9063
+    },
+    {
+      "epoch": 0.09064,
+      "grad_norm": 0.9632769176971355,
+      "learning_rate": 0.003,
+      "loss": 4.0526,
+      "step": 9064
+    },
+    {
+      "epoch": 0.09065,
+      "grad_norm": 1.193055845335768,
+      "learning_rate": 0.003,
+      "loss": 4.0487,
+      "step": 9065
+    },
+    {
+      "epoch": 0.09066,
+      "grad_norm": 1.0217847841679015,
+      "learning_rate": 0.003,
+      "loss": 4.0362,
+      "step": 9066
+    },
+    {
+      "epoch": 0.09067,
+      "grad_norm": 1.2708025422078024,
+      "learning_rate": 0.003,
+      "loss": 4.0368,
+      "step": 9067
+    },
+    {
+      "epoch": 0.09068,
+      "grad_norm": 0.999091796379082,
+      "learning_rate": 0.003,
+      "loss": 4.046,
+      "step": 9068
+    },
+    {
+      "epoch": 0.09069,
+      "grad_norm": 1.1937745015052195,
+      "learning_rate": 0.003,
+      "loss": 4.0863,
+      "step": 9069
+    },
+    {
+      "epoch": 0.0907,
+      "grad_norm": 1.2267119910601545,
+      "learning_rate": 0.003,
+      "loss": 4.0665,
+      "step": 9070
+    },
+    {
+      "epoch": 0.09071,
+      "grad_norm": 0.9307012025024503,
+      "learning_rate": 0.003,
+      "loss": 4.0384,
+      "step": 9071
+    },
+    {
+      "epoch": 0.09072,
+      "grad_norm": 1.0537322841979997,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 9072
+    },
+    {
+      "epoch": 0.09073,
+      "grad_norm": 1.2016861411462987,
+      "learning_rate": 0.003,
+      "loss": 4.0476,
+      "step": 9073
+    },
+    {
+      "epoch": 0.09074,
+      "grad_norm": 0.8496237355918259,
+      "learning_rate": 0.003,
+      "loss": 4.0081,
+      "step": 9074
+    },
+    {
+      "epoch": 0.09075,
+      "grad_norm": 0.9908939634552881,
+      "learning_rate": 0.003,
+      "loss": 4.0518,
+      "step": 9075
+    },
+    {
+      "epoch": 0.09076,
+      "grad_norm": 1.1968746368032812,
+      "learning_rate": 0.003,
+      "loss": 4.0603,
+      "step": 9076
+    },
+    {
+      "epoch": 0.09077,
+      "grad_norm": 0.970351786121277,
+      "learning_rate": 0.003,
+      "loss": 4.0532,
+      "step": 9077
+    },
+    {
+      "epoch": 0.09078,
+      "grad_norm": 1.1497763383817403,
+      "learning_rate": 0.003,
+      "loss": 4.0363,
+      "step": 9078
+    },
+    {
+      "epoch": 0.09079,
+      "grad_norm": 1.0830961717463246,
+      "learning_rate": 0.003,
+      "loss": 4.0462,
+      "step": 9079
+    },
+    {
+      "epoch": 0.0908,
+      "grad_norm": 0.9609418328913767,
+      "learning_rate": 0.003,
+      "loss": 4.0222,
+      "step": 9080
+    },
+    {
+      "epoch": 0.09081,
+      "grad_norm": 1.1000854820573318,
+      "learning_rate": 0.003,
+      "loss": 4.051,
+      "step": 9081
+    },
+    {
+      "epoch": 0.09082,
+      "grad_norm": 1.1636952655382402,
+      "learning_rate": 0.003,
+      "loss": 4.0292,
+      "step": 9082
+    },
+    {
+      "epoch": 0.09083,
+      "grad_norm": 1.1420961957835405,
+      "learning_rate": 0.003,
+      "loss": 4.0443,
+      "step": 9083
+    },
+    {
+      "epoch": 0.09084,
+      "grad_norm": 1.0553826184749373,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 9084
+    },
+    {
+      "epoch": 0.09085,
+      "grad_norm": 1.0255413615584303,
+      "learning_rate": 0.003,
+      "loss": 4.0524,
+      "step": 9085
+    },
+    {
+      "epoch": 0.09086,
+      "grad_norm": 1.215452301963506,
+      "learning_rate": 0.003,
+      "loss": 4.0305,
+      "step": 9086
+    },
+    {
+      "epoch": 0.09087,
+      "grad_norm": 0.9583487655084203,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 9087
+    },
+    {
+      "epoch": 0.09088,
+      "grad_norm": 1.232091628428844,
+      "learning_rate": 0.003,
+      "loss": 4.0254,
+      "step": 9088
+    },
+    {
+      "epoch": 0.09089,
+      "grad_norm": 1.1137319600932658,
+      "learning_rate": 0.003,
+      "loss": 4.0395,
+      "step": 9089
+    },
+    {
+      "epoch": 0.0909,
+      "grad_norm": 1.144755645285606,
+      "learning_rate": 0.003,
+      "loss": 4.0299,
+      "step": 9090
+    },
+    {
+      "epoch": 0.09091,
+      "grad_norm": 1.0300971496763878,
+      "learning_rate": 0.003,
+      "loss": 4.0116,
+      "step": 9091
+    },
+    {
+      "epoch": 0.09092,
+      "grad_norm": 1.0100476409907833,
+      "learning_rate": 0.003,
+      "loss": 4.0704,
+      "step": 9092
+    },
+    {
+      "epoch": 0.09093,
+      "grad_norm": 1.1494952695266714,
+      "learning_rate": 0.003,
+      "loss": 4.032,
+      "step": 9093
+    },
+    {
+      "epoch": 0.09094,
+      "grad_norm": 1.0095676827058278,
+      "learning_rate": 0.003,
+      "loss": 4.0345,
+      "step": 9094
+    },
+    {
+      "epoch": 0.09095,
+      "grad_norm": 1.2093302711410523,
+      "learning_rate": 0.003,
+      "loss": 4.0515,
+      "step": 9095
+    },
+    {
+      "epoch": 0.09096,
+      "grad_norm": 0.976858163705672,
+      "learning_rate": 0.003,
+      "loss": 4.0244,
+      "step": 9096
+    },
+    {
+      "epoch": 0.09097,
+      "grad_norm": 1.3853264618148677,
+      "learning_rate": 0.003,
+      "loss": 4.0478,
+      "step": 9097
+    },
+    {
+      "epoch": 0.09098,
+      "grad_norm": 1.0766280332726217,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 9098
+    },
+    {
+      "epoch": 0.09099,
+      "grad_norm": 1.1136120589991296,
+      "learning_rate": 0.003,
+      "loss": 4.0278,
+      "step": 9099
+    },
+    {
+      "epoch": 0.091,
+      "grad_norm": 1.1601021677903895,
+      "learning_rate": 0.003,
+      "loss": 4.0216,
+      "step": 9100
+    },
+    {
+      "epoch": 0.09101,
+      "grad_norm": 1.3417195527068442,
+      "learning_rate": 0.003,
+      "loss": 4.0446,
+      "step": 9101
+    },
+    {
+      "epoch": 0.09102,
+      "grad_norm": 0.9070301102163859,
+      "learning_rate": 0.003,
+      "loss": 4.0459,
+      "step": 9102
+    },
+    {
+      "epoch": 0.09103,
+      "grad_norm": 0.9573335281707542,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 9103
+    },
+    {
+      "epoch": 0.09104,
+      "grad_norm": 1.0556076158321603,
+      "learning_rate": 0.003,
+      "loss": 4.0377,
+      "step": 9104
+    },
+    {
+      "epoch": 0.09105,
+      "grad_norm": 1.1979973249595275,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 9105
+    },
+    {
+      "epoch": 0.09106,
+      "grad_norm": 1.1589318950983944,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 9106
+    },
+    {
+      "epoch": 0.09107,
+      "grad_norm": 1.1814885851810335,
+      "learning_rate": 0.003,
+      "loss": 4.0346,
+      "step": 9107
+    },
+    {
+      "epoch": 0.09108,
+      "grad_norm": 1.019986448191847,
+      "learning_rate": 0.003,
+      "loss": 4.0232,
+      "step": 9108
+    },
+    {
+      "epoch": 0.09109,
+      "grad_norm": 1.057663996654355,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 9109
+    },
+    {
+      "epoch": 0.0911,
+      "grad_norm": 1.2256223665855766,
+      "learning_rate": 0.003,
+      "loss": 4.0364,
+      "step": 9110
+    },
+    {
+      "epoch": 0.09111,
+      "grad_norm": 0.9461029364575244,
+      "learning_rate": 0.003,
+      "loss": 4.0456,
+      "step": 9111
+    },
+    {
+      "epoch": 0.09112,
+      "grad_norm": 1.049429275804046,
+      "learning_rate": 0.003,
+      "loss": 4.0157,
+      "step": 9112
+    },
+    {
+      "epoch": 0.09113,
+      "grad_norm": 1.031956399076363,
+      "learning_rate": 0.003,
+      "loss": 4.0037,
+      "step": 9113
+    },
+    {
+      "epoch": 0.09114,
+      "grad_norm": 1.0174397153586041,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 9114
+    },
+    {
+      "epoch": 0.09115,
+      "grad_norm": 1.2745379192778026,
+      "learning_rate": 0.003,
+      "loss": 4.0314,
+      "step": 9115
+    },
+    {
+      "epoch": 0.09116,
+      "grad_norm": 0.9837529257881423,
+      "learning_rate": 0.003,
+      "loss": 4.0733,
+      "step": 9116
+    },
+    {
+      "epoch": 0.09117,
+      "grad_norm": 1.1786562942934684,
+      "learning_rate": 0.003,
+      "loss": 4.0508,
+      "step": 9117
+    },
+    {
+      "epoch": 0.09118,
+      "grad_norm": 1.0560853969394606,
+      "learning_rate": 0.003,
+      "loss": 4.037,
+      "step": 9118
+    },
+    {
+      "epoch": 0.09119,
+      "grad_norm": 1.0377188929161931,
+      "learning_rate": 0.003,
+      "loss": 4.0309,
+      "step": 9119
+    },
+    {
+      "epoch": 0.0912,
+      "grad_norm": 1.1207049150632644,
+      "learning_rate": 0.003,
+      "loss": 4.0493,
+      "step": 9120
+    },
+    {
+      "epoch": 0.09121,
+      "grad_norm": 1.0193731745757568,
+      "learning_rate": 0.003,
+      "loss": 4.0677,
+      "step": 9121
+    },
+    {
+      "epoch": 0.09122,
+      "grad_norm": 1.2624197726748643,
+      "learning_rate": 0.003,
+      "loss": 4.0654,
+      "step": 9122
+    },
+    {
+      "epoch": 0.09123,
+      "grad_norm": 1.072056196669728,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 9123
+    },
+    {
+      "epoch": 0.09124,
+      "grad_norm": 1.239441100721789,
+      "learning_rate": 0.003,
+      "loss": 4.039,
+      "step": 9124
+    },
+    {
+      "epoch": 0.09125,
+      "grad_norm": 1.083356005410815,
+      "learning_rate": 0.003,
+      "loss": 4.0569,
+      "step": 9125
+    },
+    {
+      "epoch": 0.09126,
+      "grad_norm": 1.266014219278627,
+      "learning_rate": 0.003,
+      "loss": 4.0144,
+      "step": 9126
+    },
+    {
+      "epoch": 0.09127,
+      "grad_norm": 0.8598933039930851,
+      "learning_rate": 0.003,
+      "loss": 4.0454,
+      "step": 9127
+    },
+    {
+      "epoch": 0.09128,
+      "grad_norm": 0.89093357601972,
+      "learning_rate": 0.003,
+      "loss": 4.0358,
+      "step": 9128
+    },
+    {
+      "epoch": 0.09129,
+      "grad_norm": 1.11080170345324,
+      "learning_rate": 0.003,
+      "loss": 4.0309,
+      "step": 9129
+    },
+    {
+      "epoch": 0.0913,
+      "grad_norm": 1.1921225096267718,
+      "learning_rate": 0.003,
+      "loss": 4.0287,
+      "step": 9130
+    },
+    {
+      "epoch": 0.09131,
+      "grad_norm": 0.9911325172513188,
+      "learning_rate": 0.003,
+      "loss": 4.0246,
+      "step": 9131
+    },
+    {
+      "epoch": 0.09132,
+      "grad_norm": 1.210452388671294,
+      "learning_rate": 0.003,
+      "loss": 4.0558,
+      "step": 9132
+    },
+    {
+      "epoch": 0.09133,
+      "grad_norm": 1.1006247067475978,
+      "learning_rate": 0.003,
+      "loss": 4.0348,
+      "step": 9133
+    },
+    {
+      "epoch": 0.09134,
+      "grad_norm": 1.192482017694382,
+      "learning_rate": 0.003,
+      "loss": 4.0409,
+      "step": 9134
+    },
+    {
+      "epoch": 0.09135,
+      "grad_norm": 1.050325724178467,
+      "learning_rate": 0.003,
+      "loss": 4.0501,
+      "step": 9135
+    },
+    {
+      "epoch": 0.09136,
+      "grad_norm": 1.2143867555226864,
+      "learning_rate": 0.003,
+      "loss": 4.0333,
+      "step": 9136
+    },
+    {
+      "epoch": 0.09137,
+      "grad_norm": 0.9760810890066407,
+      "learning_rate": 0.003,
+      "loss": 4.0629,
+      "step": 9137
+    },
+    {
+      "epoch": 0.09138,
+      "grad_norm": 1.1888855045602282,
+      "learning_rate": 0.003,
+      "loss": 4.0352,
+      "step": 9138
+    },
+    {
+      "epoch": 0.09139,
+      "grad_norm": 0.9681065989709288,
+      "learning_rate": 0.003,
+      "loss": 4.0342,
+      "step": 9139
+    },
+    {
+      "epoch": 0.0914,
+      "grad_norm": 1.3038699850066278,
+      "learning_rate": 0.003,
+      "loss": 4.0459,
+      "step": 9140
+    },
+    {
+      "epoch": 0.09141,
+      "grad_norm": 0.9119873610357526,
+      "learning_rate": 0.003,
+      "loss": 4.069,
+      "step": 9141
+    },
+    {
+      "epoch": 0.09142,
+      "grad_norm": 0.97753456917582,
+      "learning_rate": 0.003,
+      "loss": 4.0566,
+      "step": 9142
+    },
+    {
+      "epoch": 0.09143,
+      "grad_norm": 1.2011099485139207,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 9143
+    },
+    {
+      "epoch": 0.09144,
+      "grad_norm": 0.8653559838348887,
+      "learning_rate": 0.003,
+      "loss": 4.0298,
+      "step": 9144
+    },
+    {
+      "epoch": 0.09145,
+      "grad_norm": 0.6804719677612727,
+      "learning_rate": 0.003,
+      "loss": 4.0486,
+      "step": 9145
+    },
+    {
+      "epoch": 0.09146,
+      "grad_norm": 0.8281509343825025,
+      "learning_rate": 0.003,
+      "loss": 4.0448,
+      "step": 9146
+    },
+    {
+      "epoch": 0.09147,
+      "grad_norm": 1.046932924628029,
+      "learning_rate": 0.003,
+      "loss": 4.0329,
+      "step": 9147
+    },
+    {
+      "epoch": 0.09148,
+      "grad_norm": 0.9481937690438011,
+      "learning_rate": 0.003,
+      "loss": 4.0486,
+      "step": 9148
+    },
+    {
+      "epoch": 0.09149,
+      "grad_norm": 1.1467040624244416,
+      "learning_rate": 0.003,
+      "loss": 4.0004,
+      "step": 9149
+    },
+    {
+      "epoch": 0.0915,
+      "grad_norm": 1.2688544713513863,
+      "learning_rate": 0.003,
+      "loss": 4.0084,
+      "step": 9150
+    },
+    {
+      "epoch": 0.09151,
+      "grad_norm": 0.9410854425311685,
+      "learning_rate": 0.003,
+      "loss": 4.0393,
+      "step": 9151
+    },
+    {
+      "epoch": 0.09152,
+      "grad_norm": 1.0715331038388722,
+      "learning_rate": 0.003,
+      "loss": 4.0491,
+      "step": 9152
+    },
+    {
+      "epoch": 0.09153,
+      "grad_norm": 1.011570665232525,
+      "learning_rate": 0.003,
+      "loss": 4.0425,
+      "step": 9153
+    },
+    {
+      "epoch": 0.09154,
+      "grad_norm": 1.0783280541516371,
+      "learning_rate": 0.003,
+      "loss": 4.048,
+      "step": 9154
+    },
+    {
+      "epoch": 0.09155,
+      "grad_norm": 1.0855343965090731,
+      "learning_rate": 0.003,
+      "loss": 4.0503,
+      "step": 9155
+    },
+    {
+      "epoch": 0.09156,
+      "grad_norm": 1.0564142389909517,
+      "learning_rate": 0.003,
+      "loss": 4.0477,
+      "step": 9156
+    },
+    {
+      "epoch": 0.09157,
+      "grad_norm": 1.0945895233993794,
+      "learning_rate": 0.003,
+      "loss": 4.0027,
+      "step": 9157
+    },
+    {
+      "epoch": 0.09158,
+      "grad_norm": 1.336449148127849,
+      "learning_rate": 0.003,
+      "loss": 4.035,
+      "step": 9158
+    },
+    {
+      "epoch": 0.09159,
+      "grad_norm": 0.991375084782573,
+      "learning_rate": 0.003,
+      "loss": 4.0452,
+      "step": 9159
+    },
+    {
+      "epoch": 0.0916,
+      "grad_norm": 1.0591224251327636,
+      "learning_rate": 0.003,
+      "loss": 4.0174,
+      "step": 9160
+    },
+    {
+      "epoch": 0.09161,
+      "grad_norm": 1.164704576534139,
+      "learning_rate": 0.003,
+      "loss": 4.0273,
+      "step": 9161
+    },
+    {
+      "epoch": 0.09162,
+      "grad_norm": 0.9413438539519919,
+      "learning_rate": 0.003,
+      "loss": 4.0595,
+      "step": 9162
+    },
+    {
+      "epoch": 0.09163,
+      "grad_norm": 0.9714944791493136,
+      "learning_rate": 0.003,
+      "loss": 4.0126,
+      "step": 9163
+    },
+    {
+      "epoch": 0.09164,
+      "grad_norm": 1.0287059172603712,
+      "learning_rate": 0.003,
+      "loss": 4.0339,
+      "step": 9164
+    },
+    {
+      "epoch": 0.09165,
+      "grad_norm": 1.1861303886211818,
+      "learning_rate": 0.003,
+      "loss": 4.0642,
+      "step": 9165
+    },
+    {
+      "epoch": 0.09166,
+      "grad_norm": 1.124122240608982,
+      "learning_rate": 0.003,
+      "loss": 4.0386,
+      "step": 9166
+    },
+    {
+      "epoch": 0.09167,
+      "grad_norm": 1.1459093885614982,
+      "learning_rate": 0.003,
+      "loss": 4.0284,
+      "step": 9167
+    },
+    {
+      "epoch": 0.09168,
+      "grad_norm": 1.119778165627705,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 9168
+    },
+    {
+      "epoch": 0.09169,
+      "grad_norm": 1.1111751785567634,
+      "learning_rate": 0.003,
+      "loss": 4.0496,
+      "step": 9169
+    },
+    {
+      "epoch": 0.0917,
+      "grad_norm": 1.2226524744645328,
+      "learning_rate": 0.003,
+      "loss": 4.0296,
+      "step": 9170
+    },
+    {
+      "epoch": 0.09171,
+      "grad_norm": 0.8656526632807879,
+      "learning_rate": 0.003,
+      "loss": 4.0597,
+      "step": 9171
+    },
+    {
+      "epoch": 0.09172,
+      "grad_norm": 1.099267531571083,
+      "learning_rate": 0.003,
+      "loss": 4.0649,
+      "step": 9172
+    },
+    {
+      "epoch": 0.09173,
+      "grad_norm": 1.1162934002071323,
+      "learning_rate": 0.003,
+      "loss": 4.0357,
+      "step": 9173
+    },
+    {
+      "epoch": 0.09174,
+      "grad_norm": 1.1000518721341896,
+      "learning_rate": 0.003,
+      "loss": 4.0487,
+      "step": 9174
+    },
+    {
+      "epoch": 0.09175,
+      "grad_norm": 1.267658430539927,
+      "learning_rate": 0.003,
+      "loss": 4.0491,
+      "step": 9175
+    },
+    {
+      "epoch": 0.09176,
+      "grad_norm": 1.0571564301320642,
+      "learning_rate": 0.003,
+      "loss": 4.0482,
+      "step": 9176
+    },
+    {
+      "epoch": 0.09177,
+      "grad_norm": 1.1069807499260333,
+      "learning_rate": 0.003,
+      "loss": 4.002,
+      "step": 9177
+    },
+    {
+      "epoch": 0.09178,
+      "grad_norm": 1.0241191541435508,
+      "learning_rate": 0.003,
+      "loss": 4.0513,
+      "step": 9178
+    },
+    {
+      "epoch": 0.09179,
+      "grad_norm": 1.2215349660778567,
+      "learning_rate": 0.003,
+      "loss": 4.0428,
+      "step": 9179
+    },
+    {
+      "epoch": 0.0918,
+      "grad_norm": 0.9063283480014228,
+      "learning_rate": 0.003,
+      "loss": 4.0357,
+      "step": 9180
+    },
+    {
+      "epoch": 0.09181,
+      "grad_norm": 0.8608588303075517,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 9181
+    },
+    {
+      "epoch": 0.09182,
+      "grad_norm": 1.080015607354298,
+      "learning_rate": 0.003,
+      "loss": 4.0339,
+      "step": 9182
+    },
+    {
+      "epoch": 0.09183,
+      "grad_norm": 1.187987053928514,
+      "learning_rate": 0.003,
+      "loss": 4.0297,
+      "step": 9183
+    },
+    {
+      "epoch": 0.09184,
+      "grad_norm": 1.0370353123734928,
+      "learning_rate": 0.003,
+      "loss": 4.0265,
+      "step": 9184
+    },
+    {
+      "epoch": 0.09185,
+      "grad_norm": 1.1348969491194556,
+      "learning_rate": 0.003,
+      "loss": 4.0562,
+      "step": 9185
+    },
+    {
+      "epoch": 0.09186,
+      "grad_norm": 1.2036726155070432,
+      "learning_rate": 0.003,
+      "loss": 4.0028,
+      "step": 9186
+    },
+    {
+      "epoch": 0.09187,
+      "grad_norm": 1.0871963616696243,
+      "learning_rate": 0.003,
+      "loss": 4.0681,
+      "step": 9187
+    },
+    {
+      "epoch": 0.09188,
+      "grad_norm": 1.1723509075908214,
+      "learning_rate": 0.003,
+      "loss": 4.0437,
+      "step": 9188
+    },
+    {
+      "epoch": 0.09189,
+      "grad_norm": 1.1132456681131127,
+      "learning_rate": 0.003,
+      "loss": 4.0488,
+      "step": 9189
+    },
+    {
+      "epoch": 0.0919,
+      "grad_norm": 1.044542911903216,
+      "learning_rate": 0.003,
+      "loss": 4.0571,
+      "step": 9190
+    },
+    {
+      "epoch": 0.09191,
+      "grad_norm": 1.2056609382294916,
+      "learning_rate": 0.003,
+      "loss": 4.0365,
+      "step": 9191
+    },
+    {
+      "epoch": 0.09192,
+      "grad_norm": 1.0206520860594175,
+      "learning_rate": 0.003,
+      "loss": 4.0177,
+      "step": 9192
+    },
+    {
+      "epoch": 0.09193,
+      "grad_norm": 1.1522627595760395,
+      "learning_rate": 0.003,
+      "loss": 4.0139,
+      "step": 9193
+    },
+    {
+      "epoch": 0.09194,
+      "grad_norm": 1.036571230879693,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 9194
+    },
+    {
+      "epoch": 0.09195,
+      "grad_norm": 1.156868074376868,
+      "learning_rate": 0.003,
+      "loss": 4.0382,
+      "step": 9195
+    },
+    {
+      "epoch": 0.09196,
+      "grad_norm": 0.9632802843738769,
+      "learning_rate": 0.003,
+      "loss": 4.0471,
+      "step": 9196
+    },
+    {
+      "epoch": 0.09197,
+      "grad_norm": 1.2366512084187664,
+      "learning_rate": 0.003,
+      "loss": 4.0406,
+      "step": 9197
+    },
+    {
+      "epoch": 0.09198,
+      "grad_norm": 0.9807141780820184,
+      "learning_rate": 0.003,
+      "loss": 4.0334,
+      "step": 9198
+    },
+    {
+      "epoch": 0.09199,
+      "grad_norm": 1.0704002219425641,
+      "learning_rate": 0.003,
+      "loss": 4.0485,
+      "step": 9199
+    },
+    {
+      "epoch": 0.092,
+      "grad_norm": 1.1110145741263948,
+      "learning_rate": 0.003,
+      "loss": 4.0474,
+      "step": 9200
+    },
+    {
+      "epoch": 0.09201,
+      "grad_norm": 1.090835930160533,
+      "learning_rate": 0.003,
+      "loss": 4.0303,
+      "step": 9201
+    },
+    {
+      "epoch": 0.09202,
+      "grad_norm": 1.0317362468150382,
+      "learning_rate": 0.003,
+      "loss": 4.0453,
+      "step": 9202
+    },
+    {
+      "epoch": 0.09203,
+      "grad_norm": 1.2267741986981442,
+      "learning_rate": 0.003,
+      "loss": 4.0498,
+      "step": 9203
+    },
+    {
+      "epoch": 0.09204,
+      "grad_norm": 1.1325105553589676,
+      "learning_rate": 0.003,
+      "loss": 4.0503,
+      "step": 9204
+    },
+    {
+      "epoch": 0.09205,
+      "grad_norm": 1.1638105186655754,
+      "learning_rate": 0.003,
+      "loss": 4.0388,
+      "step": 9205
+    },
+    {
+      "epoch": 0.09206,
+      "grad_norm": 1.0692805133374446,
+      "learning_rate": 0.003,
+      "loss": 4.0452,
+      "step": 9206
+    },
+    {
+      "epoch": 0.09207,
+      "grad_norm": 1.175551262927089,
+      "learning_rate": 0.003,
+      "loss": 4.0653,
+      "step": 9207
+    },
+    {
+      "epoch": 0.09208,
+      "grad_norm": 1.0644580288738281,
+      "learning_rate": 0.003,
+      "loss": 4.0171,
+      "step": 9208
+    },
+    {
+      "epoch": 0.09209,
+      "grad_norm": 0.9664436871225908,
+      "learning_rate": 0.003,
+      "loss": 4.0172,
+      "step": 9209
+    },
+    {
+      "epoch": 0.0921,
+      "grad_norm": 1.1254680317199708,
+      "learning_rate": 0.003,
+      "loss": 4.0444,
+      "step": 9210
+    },
+    {
+      "epoch": 0.09211,
+      "grad_norm": 1.200712594293563,
+      "learning_rate": 0.003,
+      "loss": 4.0261,
+      "step": 9211
+    },
+    {
+      "epoch": 0.09212,
+      "grad_norm": 0.9683431414690981,
+      "learning_rate": 0.003,
+      "loss": 4.0449,
+      "step": 9212
+    },
+    {
+      "epoch": 0.09213,
+      "grad_norm": 0.944968780879836,
+      "learning_rate": 0.003,
+      "loss": 4.0311,
+      "step": 9213
+    },
+    {
+      "epoch": 0.09214,
+      "grad_norm": 1.0413526215293398,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 9214
+    },
+    {
+      "epoch": 0.09215,
+      "grad_norm": 1.1772555410236571,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 9215
+    },
+    {
+      "epoch": 0.09216,
+      "grad_norm": 0.9624083137396001,
+      "learning_rate": 0.003,
+      "loss": 4.0346,
+      "step": 9216
+    },
+    {
+      "epoch": 0.09217,
+      "grad_norm": 1.0511031168385416,
+      "learning_rate": 0.003,
+      "loss": 3.9953,
+      "step": 9217
+    },
+    {
+      "epoch": 0.09218,
+      "grad_norm": 1.3269317543115233,
+      "learning_rate": 0.003,
+      "loss": 4.0618,
+      "step": 9218
+    },
+    {
+      "epoch": 0.09219,
+      "grad_norm": 0.9923317369891772,
+      "learning_rate": 0.003,
+      "loss": 4.0357,
+      "step": 9219
+    },
+    {
+      "epoch": 0.0922,
+      "grad_norm": 1.073030298407734,
+      "learning_rate": 0.003,
+      "loss": 4.0376,
+      "step": 9220
+    },
+    {
+      "epoch": 0.09221,
+      "grad_norm": 1.0446285156707587,
+      "learning_rate": 0.003,
+      "loss": 4.0494,
+      "step": 9221
+    },
+    {
+      "epoch": 0.09222,
+      "grad_norm": 1.1434810207183548,
+      "learning_rate": 0.003,
+      "loss": 4.0379,
+      "step": 9222
+    },
+    {
+      "epoch": 0.09223,
+      "grad_norm": 1.1025201940993825,
+      "learning_rate": 0.003,
+      "loss": 4.0314,
+      "step": 9223
+    },
+    {
+      "epoch": 0.09224,
+      "grad_norm": 0.9566021361855169,
+      "learning_rate": 0.003,
+      "loss": 4.0434,
+      "step": 9224
+    },
+    {
+      "epoch": 0.09225,
+      "grad_norm": 1.2323077112756355,
+      "learning_rate": 0.003,
+      "loss": 4.0287,
+      "step": 9225
+    },
+    {
+      "epoch": 0.09226,
+      "grad_norm": 0.899900247722213,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 9226
+    },
+    {
+      "epoch": 0.09227,
+      "grad_norm": 1.0849893705736053,
+      "learning_rate": 0.003,
+      "loss": 4.0336,
+      "step": 9227
+    },
+    {
+      "epoch": 0.09228,
+      "grad_norm": 1.2542611304862052,
+      "learning_rate": 0.003,
+      "loss": 4.0387,
+      "step": 9228
+    },
+    {
+      "epoch": 0.09229,
+      "grad_norm": 0.8407437326641111,
+      "learning_rate": 0.003,
+      "loss": 4.0374,
+      "step": 9229
+    },
+    {
+      "epoch": 0.0923,
+      "grad_norm": 1.158954671915461,
+      "learning_rate": 0.003,
+      "loss": 4.0383,
+      "step": 9230
+    },
+    {
+      "epoch": 0.09231,
+      "grad_norm": 1.1704792802830555,
+      "learning_rate": 0.003,
+      "loss": 4.0365,
+      "step": 9231
+    },
+    {
+      "epoch": 0.09232,
+      "grad_norm": 1.1029452862218423,
+      "learning_rate": 0.003,
+      "loss": 4.0455,
+      "step": 9232
+    },
+    {
+      "epoch": 0.09233,
+      "grad_norm": 1.2002856333009415,
+      "learning_rate": 0.003,
+      "loss": 4.06,
+      "step": 9233
+    },
+    {
+      "epoch": 0.09234,
+      "grad_norm": 0.8750251942831692,
+      "learning_rate": 0.003,
+      "loss": 4.0605,
+      "step": 9234
+    },
+    {
+      "epoch": 0.09235,
+      "grad_norm": 1.0716615659635735,
+      "learning_rate": 0.003,
+      "loss": 4.0266,
+      "step": 9235
+    },
+    {
+      "epoch": 0.09236,
+      "grad_norm": 1.5813090203643494,
+      "learning_rate": 0.003,
+      "loss": 4.0795,
+      "step": 9236
+    },
+    {
+      "epoch": 0.09237,
+      "grad_norm": 1.0338774462879339,
+      "learning_rate": 0.003,
+      "loss": 4.0314,
+      "step": 9237
+    },
+    {
+      "epoch": 0.09238,
+      "grad_norm": 1.135346462252927,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 9238
+    },
+    {
+      "epoch": 0.09239,
+      "grad_norm": 1.1394764093548577,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 9239
+    },
+    {
+      "epoch": 0.0924,
+      "grad_norm": 1.1428354439905772,
+      "learning_rate": 0.003,
+      "loss": 4.0333,
+      "step": 9240
+    },
+    {
+      "epoch": 0.09241,
+      "grad_norm": 1.0203493481299601,
+      "learning_rate": 0.003,
+      "loss": 4.0383,
+      "step": 9241
+    },
+    {
+      "epoch": 0.09242,
+      "grad_norm": 0.9451602651492341,
+      "learning_rate": 0.003,
+      "loss": 4.0515,
+      "step": 9242
+    },
+    {
+      "epoch": 0.09243,
+      "grad_norm": 1.0754642967463526,
+      "learning_rate": 0.003,
+      "loss": 4.053,
+      "step": 9243
+    },
+    {
+      "epoch": 0.09244,
+      "grad_norm": 1.2105994420477935,
+      "learning_rate": 0.003,
+      "loss": 4.0407,
+      "step": 9244
+    },
+    {
+      "epoch": 0.09245,
+      "grad_norm": 1.1177071678503647,
+      "learning_rate": 0.003,
+      "loss": 4.0214,
+      "step": 9245
+    },
+    {
+      "epoch": 0.09246,
+      "grad_norm": 1.1262767459012595,
+      "learning_rate": 0.003,
+      "loss": 4.0378,
+      "step": 9246
+    },
+    {
+      "epoch": 0.09247,
+      "grad_norm": 1.1101169385517686,
+      "learning_rate": 0.003,
+      "loss": 4.0259,
+      "step": 9247
+    },
+    {
+      "epoch": 0.09248,
+      "grad_norm": 1.039107535835496,
+      "learning_rate": 0.003,
+      "loss": 4.0355,
+      "step": 9248
+    },
+    {
+      "epoch": 0.09249,
+      "grad_norm": 1.051375380829021,
+      "learning_rate": 0.003,
+      "loss": 4.0832,
+      "step": 9249
+    },
+    {
+      "epoch": 0.0925,
+      "grad_norm": 1.0818027535404036,
+      "learning_rate": 0.003,
+      "loss": 4.0546,
+      "step": 9250
+    },
+    {
+      "epoch": 0.09251,
+      "grad_norm": 1.21441854307002,
+      "learning_rate": 0.003,
+      "loss": 4.0564,
+      "step": 9251
+    },
+    {
+      "epoch": 0.09252,
+      "grad_norm": 0.9446590679589214,
+      "learning_rate": 0.003,
+      "loss": 4.0604,
+      "step": 9252
+    },
+    {
+      "epoch": 0.09253,
+      "grad_norm": 0.9688539640875207,
+      "learning_rate": 0.003,
+      "loss": 4.0323,
+      "step": 9253
+    },
+    {
+      "epoch": 0.09254,
+      "grad_norm": 1.1291024219182868,
+      "learning_rate": 0.003,
+      "loss": 4.0566,
+      "step": 9254
+    },
+    {
+      "epoch": 0.09255,
+      "grad_norm": 1.211862634432545,
+      "learning_rate": 0.003,
+      "loss": 4.016,
+      "step": 9255
+    },
+    {
+      "epoch": 0.09256,
+      "grad_norm": 1.2218560976542767,
+      "learning_rate": 0.003,
+      "loss": 4.0505,
+      "step": 9256
+    },
+    {
+      "epoch": 0.09257,
+      "grad_norm": 1.0643221369589915,
+      "learning_rate": 0.003,
+      "loss": 4.0421,
+      "step": 9257
+    },
+    {
+      "epoch": 0.09258,
+      "grad_norm": 1.1527043491418376,
+      "learning_rate": 0.003,
+      "loss": 4.0322,
+      "step": 9258
+    },
+    {
+      "epoch": 0.09259,
+      "grad_norm": 0.9926788558086477,
+      "learning_rate": 0.003,
+      "loss": 4.0492,
+      "step": 9259
+    },
+    {
+      "epoch": 0.0926,
+      "grad_norm": 1.0433542223940273,
+      "learning_rate": 0.003,
+      "loss": 4.0393,
+      "step": 9260
+    },
+    {
+      "epoch": 0.09261,
+      "grad_norm": 1.207804917706118,
+      "learning_rate": 0.003,
+      "loss": 4.0216,
+      "step": 9261
+    },
+    {
+      "epoch": 0.09262,
+      "grad_norm": 1.1324209587635743,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 9262
+    },
+    {
+      "epoch": 0.09263,
+      "grad_norm": 1.172369074851875,
+      "learning_rate": 0.003,
+      "loss": 4.0433,
+      "step": 9263
+    },
+    {
+      "epoch": 0.09264,
+      "grad_norm": 1.1682667843729466,
+      "learning_rate": 0.003,
+      "loss": 4.073,
+      "step": 9264
+    },
+    {
+      "epoch": 0.09265,
+      "grad_norm": 1.253090916375967,
+      "learning_rate": 0.003,
+      "loss": 4.045,
+      "step": 9265
+    },
+    {
+      "epoch": 0.09266,
+      "grad_norm": 1.1781887620358225,
+      "learning_rate": 0.003,
+      "loss": 4.0424,
+      "step": 9266
+    },
+    {
+      "epoch": 0.09267,
+      "grad_norm": 1.0284374738702509,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 9267
+    },
+    {
+      "epoch": 0.09268,
+      "grad_norm": 1.1665787828723402,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 9268
+    },
+    {
+      "epoch": 0.09269,
+      "grad_norm": 1.1264162171369585,
+      "learning_rate": 0.003,
+      "loss": 4.034,
+      "step": 9269
+    },
+    {
+      "epoch": 0.0927,
+      "grad_norm": 1.1949132258262425,
+      "learning_rate": 0.003,
+      "loss": 4.0636,
+      "step": 9270
+    },
+    {
+      "epoch": 0.09271,
+      "grad_norm": 0.9696066482483353,
+      "learning_rate": 0.003,
+      "loss": 4.0424,
+      "step": 9271
+    },
+    {
+      "epoch": 0.09272,
+      "grad_norm": 1.0301396773994387,
+      "learning_rate": 0.003,
+      "loss": 4.0296,
+      "step": 9272
+    },
+    {
+      "epoch": 0.09273,
+      "grad_norm": 1.0893059409180028,
+      "learning_rate": 0.003,
+      "loss": 4.0494,
+      "step": 9273
+    },
+    {
+      "epoch": 0.09274,
+      "grad_norm": 1.0947192726893542,
+      "learning_rate": 0.003,
+      "loss": 4.037,
+      "step": 9274
+    },
+    {
+      "epoch": 0.09275,
+      "grad_norm": 1.0723410183602133,
+      "learning_rate": 0.003,
+      "loss": 4.0596,
+      "step": 9275
+    },
+    {
+      "epoch": 0.09276,
+      "grad_norm": 1.1691696906210551,
+      "learning_rate": 0.003,
+      "loss": 4.05,
+      "step": 9276
+    },
+    {
+      "epoch": 0.09277,
+      "grad_norm": 1.0647822469371042,
+      "learning_rate": 0.003,
+      "loss": 4.0432,
+      "step": 9277
+    },
+    {
+      "epoch": 0.09278,
+      "grad_norm": 1.2167469789912233,
+      "learning_rate": 0.003,
+      "loss": 4.0214,
+      "step": 9278
+    },
+    {
+      "epoch": 0.09279,
+      "grad_norm": 0.9896695614661197,
+      "learning_rate": 0.003,
+      "loss": 4.0407,
+      "step": 9279
+    },
+    {
+      "epoch": 0.0928,
+      "grad_norm": 1.3634170937343293,
+      "learning_rate": 0.003,
+      "loss": 4.0437,
+      "step": 9280
+    },
+    {
+      "epoch": 0.09281,
+      "grad_norm": 0.8620980137769152,
+      "learning_rate": 0.003,
+      "loss": 4.0415,
+      "step": 9281
+    },
+    {
+      "epoch": 0.09282,
+      "grad_norm": 1.0427330391887681,
+      "learning_rate": 0.003,
+      "loss": 4.069,
+      "step": 9282
+    },
+    {
+      "epoch": 0.09283,
+      "grad_norm": 1.1710140561731026,
+      "learning_rate": 0.003,
+      "loss": 4.0652,
+      "step": 9283
+    },
+    {
+      "epoch": 0.09284,
+      "grad_norm": 1.4120083965462074,
+      "learning_rate": 0.003,
+      "loss": 4.0476,
+      "step": 9284
+    },
+    {
+      "epoch": 0.09285,
+      "grad_norm": 1.0005243949124853,
+      "learning_rate": 0.003,
+      "loss": 4.0309,
+      "step": 9285
+    },
+    {
+      "epoch": 0.09286,
+      "grad_norm": 1.2076261862310649,
+      "learning_rate": 0.003,
+      "loss": 4.0543,
+      "step": 9286
+    },
+    {
+      "epoch": 0.09287,
+      "grad_norm": 0.9611404585275731,
+      "learning_rate": 0.003,
+      "loss": 4.0459,
+      "step": 9287
+    },
+    {
+      "epoch": 0.09288,
+      "grad_norm": 0.9896962192199752,
+      "learning_rate": 0.003,
+      "loss": 4.0472,
+      "step": 9288
+    },
+    {
+      "epoch": 0.09289,
+      "grad_norm": 1.1918897636488415,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 9289
+    },
+    {
+      "epoch": 0.0929,
+      "grad_norm": 1.100511638945172,
+      "learning_rate": 0.003,
+      "loss": 4.045,
+      "step": 9290
+    },
+    {
+      "epoch": 0.09291,
+      "grad_norm": 0.9953764094766955,
+      "learning_rate": 0.003,
+      "loss": 4.0331,
+      "step": 9291
+    },
+    {
+      "epoch": 0.09292,
+      "grad_norm": 0.9663852236515597,
+      "learning_rate": 0.003,
+      "loss": 4.0521,
+      "step": 9292
+    },
+    {
+      "epoch": 0.09293,
+      "grad_norm": 1.0635777370604826,
+      "learning_rate": 0.003,
+      "loss": 4.0149,
+      "step": 9293
+    },
+    {
+      "epoch": 0.09294,
+      "grad_norm": 1.2456008375763008,
+      "learning_rate": 0.003,
+      "loss": 4.0359,
+      "step": 9294
+    },
+    {
+      "epoch": 0.09295,
+      "grad_norm": 0.8653819364739406,
+      "learning_rate": 0.003,
+      "loss": 4.0358,
+      "step": 9295
+    },
+    {
+      "epoch": 0.09296,
+      "grad_norm": 1.1680264346725173,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 9296
+    },
+    {
+      "epoch": 0.09297,
+      "grad_norm": 1.1901508293685317,
+      "learning_rate": 0.003,
+      "loss": 4.0427,
+      "step": 9297
+    },
+    {
+      "epoch": 0.09298,
+      "grad_norm": 1.0915606863010314,
+      "learning_rate": 0.003,
+      "loss": 4.0398,
+      "step": 9298
+    },
+    {
+      "epoch": 0.09299,
+      "grad_norm": 1.1592108148961227,
+      "learning_rate": 0.003,
+      "loss": 4.0375,
+      "step": 9299
+    },
+    {
+      "epoch": 0.093,
+      "grad_norm": 1.0708244671033804,
+      "learning_rate": 0.003,
+      "loss": 4.0401,
+      "step": 9300
+    },
+    {
+      "epoch": 0.09301,
+      "grad_norm": 1.1033798817954517,
+      "learning_rate": 0.003,
+      "loss": 4.0358,
+      "step": 9301
+    },
+    {
+      "epoch": 0.09302,
+      "grad_norm": 1.0088796578839236,
+      "learning_rate": 0.003,
+      "loss": 4.0616,
+      "step": 9302
+    },
+    {
+      "epoch": 0.09303,
+      "grad_norm": 1.2858796878215408,
+      "learning_rate": 0.003,
+      "loss": 4.0363,
+      "step": 9303
+    },
+    {
+      "epoch": 0.09304,
+      "grad_norm": 1.2506744639450122,
+      "learning_rate": 0.003,
+      "loss": 4.0291,
+      "step": 9304
+    },
+    {
+      "epoch": 0.09305,
+      "grad_norm": 0.9961386724217615,
+      "learning_rate": 0.003,
+      "loss": 4.0312,
+      "step": 9305
+    },
+    {
+      "epoch": 0.09306,
+      "grad_norm": 1.2149212174491897,
+      "learning_rate": 0.003,
+      "loss": 4.0372,
+      "step": 9306
+    },
+    {
+      "epoch": 0.09307,
+      "grad_norm": 0.9136578619084738,
+      "learning_rate": 0.003,
+      "loss": 4.0348,
+      "step": 9307
+    },
+    {
+      "epoch": 0.09308,
+      "grad_norm": 0.9588826151722423,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 9308
+    },
+    {
+      "epoch": 0.09309,
+      "grad_norm": 0.957182512469483,
+      "learning_rate": 0.003,
+      "loss": 4.0433,
+      "step": 9309
+    },
+    {
+      "epoch": 0.0931,
+      "grad_norm": 1.2935022678548451,
+      "learning_rate": 0.003,
+      "loss": 4.0576,
+      "step": 9310
+    },
+    {
+      "epoch": 0.09311,
+      "grad_norm": 0.9790928444088314,
+      "learning_rate": 0.003,
+      "loss": 4.0349,
+      "step": 9311
+    },
+    {
+      "epoch": 0.09312,
+      "grad_norm": 1.150857565224749,
+      "learning_rate": 0.003,
+      "loss": 4.0404,
+      "step": 9312
+    },
+    {
+      "epoch": 0.09313,
+      "grad_norm": 1.075107605301709,
+      "learning_rate": 0.003,
+      "loss": 4.0687,
+      "step": 9313
+    },
+    {
+      "epoch": 0.09314,
+      "grad_norm": 1.1182712741118235,
+      "learning_rate": 0.003,
+      "loss": 4.036,
+      "step": 9314
+    },
+    {
+      "epoch": 0.09315,
+      "grad_norm": 1.2529463174245064,
+      "learning_rate": 0.003,
+      "loss": 4.0417,
+      "step": 9315
+    },
+    {
+      "epoch": 0.09316,
+      "grad_norm": 0.9771774475983117,
+      "learning_rate": 0.003,
+      "loss": 4.0367,
+      "step": 9316
+    },
+    {
+      "epoch": 0.09317,
+      "grad_norm": 1.3760618473803496,
+      "learning_rate": 0.003,
+      "loss": 4.0644,
+      "step": 9317
+    },
+    {
+      "epoch": 0.09318,
+      "grad_norm": 0.7429498285526682,
+      "learning_rate": 0.003,
+      "loss": 4.0225,
+      "step": 9318
+    },
+    {
+      "epoch": 0.09319,
+      "grad_norm": 0.9792252701782946,
+      "learning_rate": 0.003,
+      "loss": 4.0663,
+      "step": 9319
+    },
+    {
+      "epoch": 0.0932,
+      "grad_norm": 1.4975639708650046,
+      "learning_rate": 0.003,
+      "loss": 4.0513,
+      "step": 9320
+    },
+    {
+      "epoch": 0.09321,
+      "grad_norm": 0.8902587907079172,
+      "learning_rate": 0.003,
+      "loss": 4.0318,
+      "step": 9321
+    },
+    {
+      "epoch": 0.09322,
+      "grad_norm": 0.9093500134430933,
+      "learning_rate": 0.003,
+      "loss": 4.0064,
+      "step": 9322
+    },
+    {
+      "epoch": 0.09323,
+      "grad_norm": 1.0993796211404194,
+      "learning_rate": 0.003,
+      "loss": 4.0385,
+      "step": 9323
+    },
+    {
+      "epoch": 0.09324,
+      "grad_norm": 1.160680899752396,
+      "learning_rate": 0.003,
+      "loss": 4.0577,
+      "step": 9324
+    },
+    {
+      "epoch": 0.09325,
+      "grad_norm": 0.9385567636610579,
+      "learning_rate": 0.003,
+      "loss": 4.0005,
+      "step": 9325
+    },
+    {
+      "epoch": 0.09326,
+      "grad_norm": 0.9621843602737219,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 9326
+    },
+    {
+      "epoch": 0.09327,
+      "grad_norm": 1.2283368544933888,
+      "learning_rate": 0.003,
+      "loss": 4.0431,
+      "step": 9327
+    },
+    {
+      "epoch": 0.09328,
+      "grad_norm": 1.138118155085037,
+      "learning_rate": 0.003,
+      "loss": 4.0375,
+      "step": 9328
+    },
+    {
+      "epoch": 0.09329,
+      "grad_norm": 1.3360636810202806,
+      "learning_rate": 0.003,
+      "loss": 4.0493,
+      "step": 9329
+    },
+    {
+      "epoch": 0.0933,
+      "grad_norm": 0.997995204861989,
+      "learning_rate": 0.003,
+      "loss": 4.0558,
+      "step": 9330
+    },
+    {
+      "epoch": 0.09331,
+      "grad_norm": 1.1201516817051664,
+      "learning_rate": 0.003,
+      "loss": 4.0287,
+      "step": 9331
+    },
+    {
+      "epoch": 0.09332,
+      "grad_norm": 1.204525418131745,
+      "learning_rate": 0.003,
+      "loss": 4.0571,
+      "step": 9332
+    },
+    {
+      "epoch": 0.09333,
+      "grad_norm": 1.0575312400096248,
+      "learning_rate": 0.003,
+      "loss": 4.0266,
+      "step": 9333
+    },
+    {
+      "epoch": 0.09334,
+      "grad_norm": 1.2701572053232315,
+      "learning_rate": 0.003,
+      "loss": 4.0573,
+      "step": 9334
+    },
+    {
+      "epoch": 0.09335,
+      "grad_norm": 1.1585842605178802,
+      "learning_rate": 0.003,
+      "loss": 4.0343,
+      "step": 9335
+    },
+    {
+      "epoch": 0.09336,
+      "grad_norm": 1.0311564658307473,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 9336
+    },
+    {
+      "epoch": 0.09337,
+      "grad_norm": 0.9367345134695763,
+      "learning_rate": 0.003,
+      "loss": 4.0404,
+      "step": 9337
+    },
+    {
+      "epoch": 0.09338,
+      "grad_norm": 0.9632251089955324,
+      "learning_rate": 0.003,
+      "loss": 4.0414,
+      "step": 9338
+    },
+    {
+      "epoch": 0.09339,
+      "grad_norm": 1.0383186326297353,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 9339
+    },
+    {
+      "epoch": 0.0934,
+      "grad_norm": 1.086317533403553,
+      "learning_rate": 0.003,
+      "loss": 4.0169,
+      "step": 9340
+    },
+    {
+      "epoch": 0.09341,
+      "grad_norm": 1.066313152390867,
+      "learning_rate": 0.003,
+      "loss": 4.0412,
+      "step": 9341
+    },
+    {
+      "epoch": 0.09342,
+      "grad_norm": 1.2845490767261778,
+      "learning_rate": 0.003,
+      "loss": 4.0361,
+      "step": 9342
+    },
+    {
+      "epoch": 0.09343,
+      "grad_norm": 1.200116094561525,
+      "learning_rate": 0.003,
+      "loss": 4.0419,
+      "step": 9343
+    },
+    {
+      "epoch": 0.09344,
+      "grad_norm": 1.1152409513420605,
+      "learning_rate": 0.003,
+      "loss": 4.0376,
+      "step": 9344
+    },
+    {
+      "epoch": 0.09345,
+      "grad_norm": 1.1107749483966356,
+      "learning_rate": 0.003,
+      "loss": 4.0577,
+      "step": 9345
+    },
+    {
+      "epoch": 0.09346,
+      "grad_norm": 1.1283130554525544,
+      "learning_rate": 0.003,
+      "loss": 4.0459,
+      "step": 9346
+    },
+    {
+      "epoch": 0.09347,
+      "grad_norm": 1.0736955106456132,
+      "learning_rate": 0.003,
+      "loss": 4.0288,
+      "step": 9347
+    },
+    {
+      "epoch": 0.09348,
+      "grad_norm": 1.0736643024406574,
+      "learning_rate": 0.003,
+      "loss": 4.05,
+      "step": 9348
+    },
+    {
+      "epoch": 0.09349,
+      "grad_norm": 1.1989810783719919,
+      "learning_rate": 0.003,
+      "loss": 4.0503,
+      "step": 9349
+    },
+    {
+      "epoch": 0.0935,
+      "grad_norm": 1.0740480563955976,
+      "learning_rate": 0.003,
+      "loss": 4.0333,
+      "step": 9350
+    },
+    {
+      "epoch": 0.09351,
+      "grad_norm": 0.9996894673701714,
+      "learning_rate": 0.003,
+      "loss": 4.0057,
+      "step": 9351
+    },
+    {
+      "epoch": 0.09352,
+      "grad_norm": 1.2492290188517632,
+      "learning_rate": 0.003,
+      "loss": 4.0404,
+      "step": 9352
+    },
+    {
+      "epoch": 0.09353,
+      "grad_norm": 1.0984353858675837,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 9353
+    },
+    {
+      "epoch": 0.09354,
+      "grad_norm": 1.0494227900731494,
+      "learning_rate": 0.003,
+      "loss": 4.0302,
+      "step": 9354
+    },
+    {
+      "epoch": 0.09355,
+      "grad_norm": 1.0887626536039114,
+      "learning_rate": 0.003,
+      "loss": 4.0534,
+      "step": 9355
+    },
+    {
+      "epoch": 0.09356,
+      "grad_norm": 1.1063458069116743,
+      "learning_rate": 0.003,
+      "loss": 4.0395,
+      "step": 9356
+    },
+    {
+      "epoch": 0.09357,
+      "grad_norm": 1.1772620726746457,
+      "learning_rate": 0.003,
+      "loss": 4.0411,
+      "step": 9357
+    },
+    {
+      "epoch": 0.09358,
+      "grad_norm": 0.8349208876816591,
+      "learning_rate": 0.003,
+      "loss": 4.035,
+      "step": 9358
+    },
+    {
+      "epoch": 0.09359,
+      "grad_norm": 0.8649691894837728,
+      "learning_rate": 0.003,
+      "loss": 4.0461,
+      "step": 9359
+    },
+    {
+      "epoch": 0.0936,
+      "grad_norm": 0.9462358195573604,
+      "learning_rate": 0.003,
+      "loss": 4.0387,
+      "step": 9360
+    },
+    {
+      "epoch": 0.09361,
+      "grad_norm": 1.233886976766547,
+      "learning_rate": 0.003,
+      "loss": 4.0448,
+      "step": 9361
+    },
+    {
+      "epoch": 0.09362,
+      "grad_norm": 1.2087100800252286,
+      "learning_rate": 0.003,
+      "loss": 4.0754,
+      "step": 9362
+    },
+    {
+      "epoch": 0.09363,
+      "grad_norm": 0.9584661416221599,
+      "learning_rate": 0.003,
+      "loss": 4.0569,
+      "step": 9363
+    },
+    {
+      "epoch": 0.09364,
+      "grad_norm": 1.3630115362152988,
+      "learning_rate": 0.003,
+      "loss": 4.037,
+      "step": 9364
+    },
+    {
+      "epoch": 0.09365,
+      "grad_norm": 1.0115250367617443,
+      "learning_rate": 0.003,
+      "loss": 4.0587,
+      "step": 9365
+    },
+    {
+      "epoch": 0.09366,
+      "grad_norm": 1.410143423428131,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 9366
+    },
+    {
+      "epoch": 0.09367,
+      "grad_norm": 1.0026644890567558,
+      "learning_rate": 0.003,
+      "loss": 4.0406,
+      "step": 9367
+    },
+    {
+      "epoch": 0.09368,
+      "grad_norm": 1.2102005850211326,
+      "learning_rate": 0.003,
+      "loss": 4.0521,
+      "step": 9368
+    },
+    {
+      "epoch": 0.09369,
+      "grad_norm": 1.1309040815939808,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 9369
+    },
+    {
+      "epoch": 0.0937,
+      "grad_norm": 0.9973045348674824,
+      "learning_rate": 0.003,
+      "loss": 4.0577,
+      "step": 9370
+    },
+    {
+      "epoch": 0.09371,
+      "grad_norm": 1.231370608565574,
+      "learning_rate": 0.003,
+      "loss": 4.0522,
+      "step": 9371
+    },
+    {
+      "epoch": 0.09372,
+      "grad_norm": 1.0924523718241692,
+      "learning_rate": 0.003,
+      "loss": 4.0283,
+      "step": 9372
+    },
+    {
+      "epoch": 0.09373,
+      "grad_norm": 1.0504664280616463,
+      "learning_rate": 0.003,
+      "loss": 4.0768,
+      "step": 9373
+    },
+    {
+      "epoch": 0.09374,
+      "grad_norm": 1.1272188425084024,
+      "learning_rate": 0.003,
+      "loss": 4.0787,
+      "step": 9374
+    },
+    {
+      "epoch": 0.09375,
+      "grad_norm": 1.1348460508047482,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 9375
+    },
+    {
+      "epoch": 0.09376,
+      "grad_norm": 1.1298959210406103,
+      "learning_rate": 0.003,
+      "loss": 4.0618,
+      "step": 9376
+    },
+    {
+      "epoch": 0.09377,
+      "grad_norm": 1.186524901763047,
+      "learning_rate": 0.003,
+      "loss": 4.0437,
+      "step": 9377
+    },
+    {
+      "epoch": 0.09378,
+      "grad_norm": 1.0314318127210171,
+      "learning_rate": 0.003,
+      "loss": 4.0498,
+      "step": 9378
+    },
+    {
+      "epoch": 0.09379,
+      "grad_norm": 1.5680911633070103,
+      "learning_rate": 0.003,
+      "loss": 4.0282,
+      "step": 9379
+    },
+    {
+      "epoch": 0.0938,
+      "grad_norm": 0.9013424550963559,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 9380
+    },
+    {
+      "epoch": 0.09381,
+      "grad_norm": 0.9574099783748187,
+      "learning_rate": 0.003,
+      "loss": 4.0237,
+      "step": 9381
+    },
+    {
+      "epoch": 0.09382,
+      "grad_norm": 1.2086515825142305,
+      "learning_rate": 0.003,
+      "loss": 4.0502,
+      "step": 9382
+    },
+    {
+      "epoch": 0.09383,
+      "grad_norm": 1.0350405673887666,
+      "learning_rate": 0.003,
+      "loss": 4.0327,
+      "step": 9383
+    },
+    {
+      "epoch": 0.09384,
+      "grad_norm": 1.0680807378772312,
+      "learning_rate": 0.003,
+      "loss": 4.061,
+      "step": 9384
+    },
+    {
+      "epoch": 0.09385,
+      "grad_norm": 1.0961146064007061,
+      "learning_rate": 0.003,
+      "loss": 4.0422,
+      "step": 9385
+    },
+    {
+      "epoch": 0.09386,
+      "grad_norm": 0.9545455608465853,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 9386
+    },
+    {
+      "epoch": 0.09387,
+      "grad_norm": 0.9765112894095569,
+      "learning_rate": 0.003,
+      "loss": 4.0444,
+      "step": 9387
+    },
+    {
+      "epoch": 0.09388,
+      "grad_norm": 1.0782991672647486,
+      "learning_rate": 0.003,
+      "loss": 4.0455,
+      "step": 9388
+    },
+    {
+      "epoch": 0.09389,
+      "grad_norm": 1.0731997920493421,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 9389
+    },
+    {
+      "epoch": 0.0939,
+      "grad_norm": 1.2364650097930605,
+      "learning_rate": 0.003,
+      "loss": 4.0507,
+      "step": 9390
+    },
+    {
+      "epoch": 0.09391,
+      "grad_norm": 1.0663640185817809,
+      "learning_rate": 0.003,
+      "loss": 4.0305,
+      "step": 9391
+    },
+    {
+      "epoch": 0.09392,
+      "grad_norm": 1.1877737611342285,
+      "learning_rate": 0.003,
+      "loss": 4.0653,
+      "step": 9392
+    },
+    {
+      "epoch": 0.09393,
+      "grad_norm": 1.0955254115743633,
+      "learning_rate": 0.003,
+      "loss": 4.0487,
+      "step": 9393
+    },
+    {
+      "epoch": 0.09394,
+      "grad_norm": 1.4092921791527784,
+      "learning_rate": 0.003,
+      "loss": 4.0446,
+      "step": 9394
+    },
+    {
+      "epoch": 0.09395,
+      "grad_norm": 0.9358321234864352,
+      "learning_rate": 0.003,
+      "loss": 4.0409,
+      "step": 9395
+    },
+    {
+      "epoch": 0.09396,
+      "grad_norm": 1.2594661199138193,
+      "learning_rate": 0.003,
+      "loss": 4.0742,
+      "step": 9396
+    },
+    {
+      "epoch": 0.09397,
+      "grad_norm": 0.9883524181089541,
+      "learning_rate": 0.003,
+      "loss": 4.0239,
+      "step": 9397
+    },
+    {
+      "epoch": 0.09398,
+      "grad_norm": 1.0673063130537923,
+      "learning_rate": 0.003,
+      "loss": 4.0573,
+      "step": 9398
+    },
+    {
+      "epoch": 0.09399,
+      "grad_norm": 1.1875700898134038,
+      "learning_rate": 0.003,
+      "loss": 4.0503,
+      "step": 9399
+    },
+    {
+      "epoch": 0.094,
+      "grad_norm": 1.0418263510990502,
+      "learning_rate": 0.003,
+      "loss": 4.0394,
+      "step": 9400
+    },
+    {
+      "epoch": 0.09401,
+      "grad_norm": 1.1642465528112664,
+      "learning_rate": 0.003,
+      "loss": 4.0556,
+      "step": 9401
+    },
+    {
+      "epoch": 0.09402,
+      "grad_norm": 1.2786047972483314,
+      "learning_rate": 0.003,
+      "loss": 4.0455,
+      "step": 9402
+    },
+    {
+      "epoch": 0.09403,
+      "grad_norm": 0.8618399669597347,
+      "learning_rate": 0.003,
+      "loss": 4.0583,
+      "step": 9403
+    },
+    {
+      "epoch": 0.09404,
+      "grad_norm": 1.102847094348384,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 9404
+    },
+    {
+      "epoch": 0.09405,
+      "grad_norm": 1.2834464451783154,
+      "learning_rate": 0.003,
+      "loss": 4.0619,
+      "step": 9405
+    },
+    {
+      "epoch": 0.09406,
+      "grad_norm": 0.9871554834956486,
+      "learning_rate": 0.003,
+      "loss": 4.0556,
+      "step": 9406
+    },
+    {
+      "epoch": 0.09407,
+      "grad_norm": 1.1610654126184679,
+      "learning_rate": 0.003,
+      "loss": 4.0437,
+      "step": 9407
+    },
+    {
+      "epoch": 0.09408,
+      "grad_norm": 1.170917230941906,
+      "learning_rate": 0.003,
+      "loss": 4.0264,
+      "step": 9408
+    },
+    {
+      "epoch": 0.09409,
+      "grad_norm": 1.1318228712154021,
+      "learning_rate": 0.003,
+      "loss": 4.0504,
+      "step": 9409
+    },
+    {
+      "epoch": 0.0941,
+      "grad_norm": 1.1234390873721771,
+      "learning_rate": 0.003,
+      "loss": 4.058,
+      "step": 9410
+    },
+    {
+      "epoch": 0.09411,
+      "grad_norm": 0.8647601508974965,
+      "learning_rate": 0.003,
+      "loss": 4.0585,
+      "step": 9411
+    },
+    {
+      "epoch": 0.09412,
+      "grad_norm": 1.0471840870012292,
+      "learning_rate": 0.003,
+      "loss": 4.0539,
+      "step": 9412
+    },
+    {
+      "epoch": 0.09413,
+      "grad_norm": 1.2190310827593032,
+      "learning_rate": 0.003,
+      "loss": 4.0323,
+      "step": 9413
+    },
+    {
+      "epoch": 0.09414,
+      "grad_norm": 0.8477403736289396,
+      "learning_rate": 0.003,
+      "loss": 4.0455,
+      "step": 9414
+    },
+    {
+      "epoch": 0.09415,
+      "grad_norm": 0.9371919042158918,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 9415
+    },
+    {
+      "epoch": 0.09416,
+      "grad_norm": 1.1490720658755587,
+      "learning_rate": 0.003,
+      "loss": 3.9922,
+      "step": 9416
+    },
+    {
+      "epoch": 0.09417,
+      "grad_norm": 1.1018851741635174,
+      "learning_rate": 0.003,
+      "loss": 4.0277,
+      "step": 9417
+    },
+    {
+      "epoch": 0.09418,
+      "grad_norm": 1.2892671070061477,
+      "learning_rate": 0.003,
+      "loss": 4.0333,
+      "step": 9418
+    },
+    {
+      "epoch": 0.09419,
+      "grad_norm": 1.1101253491606295,
+      "learning_rate": 0.003,
+      "loss": 4.0628,
+      "step": 9419
+    },
+    {
+      "epoch": 0.0942,
+      "grad_norm": 1.3071470583049012,
+      "learning_rate": 0.003,
+      "loss": 4.0428,
+      "step": 9420
+    },
+    {
+      "epoch": 0.09421,
+      "grad_norm": 0.863117611253664,
+      "learning_rate": 0.003,
+      "loss": 4.0319,
+      "step": 9421
+    },
+    {
+      "epoch": 0.09422,
+      "grad_norm": 1.0738994904421413,
+      "learning_rate": 0.003,
+      "loss": 4.0343,
+      "step": 9422
+    },
+    {
+      "epoch": 0.09423,
+      "grad_norm": 1.1740757656465912,
+      "learning_rate": 0.003,
+      "loss": 3.9883,
+      "step": 9423
+    },
+    {
+      "epoch": 0.09424,
+      "grad_norm": 0.9999480734746848,
+      "learning_rate": 0.003,
+      "loss": 4.0379,
+      "step": 9424
+    },
+    {
+      "epoch": 0.09425,
+      "grad_norm": 1.1947521810325221,
+      "learning_rate": 0.003,
+      "loss": 4.059,
+      "step": 9425
+    },
+    {
+      "epoch": 0.09426,
+      "grad_norm": 1.076053138054749,
+      "learning_rate": 0.003,
+      "loss": 4.0679,
+      "step": 9426
+    },
+    {
+      "epoch": 0.09427,
+      "grad_norm": 1.2539611359358045,
+      "learning_rate": 0.003,
+      "loss": 4.0519,
+      "step": 9427
+    },
+    {
+      "epoch": 0.09428,
+      "grad_norm": 1.2042161531904478,
+      "learning_rate": 0.003,
+      "loss": 4.0622,
+      "step": 9428
+    },
+    {
+      "epoch": 0.09429,
+      "grad_norm": 1.2520918442905509,
+      "learning_rate": 0.003,
+      "loss": 4.0305,
+      "step": 9429
+    },
+    {
+      "epoch": 0.0943,
+      "grad_norm": 0.8090989376412021,
+      "learning_rate": 0.003,
+      "loss": 4.034,
+      "step": 9430
+    },
+    {
+      "epoch": 0.09431,
+      "grad_norm": 0.9501739667787066,
+      "learning_rate": 0.003,
+      "loss": 4.0454,
+      "step": 9431
+    },
+    {
+      "epoch": 0.09432,
+      "grad_norm": 1.0564545187269092,
+      "learning_rate": 0.003,
+      "loss": 4.0319,
+      "step": 9432
+    },
+    {
+      "epoch": 0.09433,
+      "grad_norm": 1.1105124478676938,
+      "learning_rate": 0.003,
+      "loss": 4.0343,
+      "step": 9433
+    },
+    {
+      "epoch": 0.09434,
+      "grad_norm": 1.1365861267709172,
+      "learning_rate": 0.003,
+      "loss": 4.0128,
+      "step": 9434
+    },
+    {
+      "epoch": 0.09435,
+      "grad_norm": 1.0626647857526774,
+      "learning_rate": 0.003,
+      "loss": 4.031,
+      "step": 9435
+    },
+    {
+      "epoch": 0.09436,
+      "grad_norm": 1.3048334209489638,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 9436
+    },
+    {
+      "epoch": 0.09437,
+      "grad_norm": 1.192258709260609,
+      "learning_rate": 0.003,
+      "loss": 4.0407,
+      "step": 9437
+    },
+    {
+      "epoch": 0.09438,
+      "grad_norm": 1.141173762966999,
+      "learning_rate": 0.003,
+      "loss": 4.0567,
+      "step": 9438
+    },
+    {
+      "epoch": 0.09439,
+      "grad_norm": 0.9959914893100231,
+      "learning_rate": 0.003,
+      "loss": 4.0454,
+      "step": 9439
+    },
+    {
+      "epoch": 0.0944,
+      "grad_norm": 1.1812893575099583,
+      "learning_rate": 0.003,
+      "loss": 4.0276,
+      "step": 9440
+    },
+    {
+      "epoch": 0.09441,
+      "grad_norm": 0.9472244329470745,
+      "learning_rate": 0.003,
+      "loss": 4.0285,
+      "step": 9441
+    },
+    {
+      "epoch": 0.09442,
+      "grad_norm": 1.1629963337515048,
+      "learning_rate": 0.003,
+      "loss": 4.0287,
+      "step": 9442
+    },
+    {
+      "epoch": 0.09443,
+      "grad_norm": 0.8487426150195475,
+      "learning_rate": 0.003,
+      "loss": 4.0505,
+      "step": 9443
+    },
+    {
+      "epoch": 0.09444,
+      "grad_norm": 0.9207154747911219,
+      "learning_rate": 0.003,
+      "loss": 4.0403,
+      "step": 9444
+    },
+    {
+      "epoch": 0.09445,
+      "grad_norm": 1.115602354686498,
+      "learning_rate": 0.003,
+      "loss": 4.0358,
+      "step": 9445
+    },
+    {
+      "epoch": 0.09446,
+      "grad_norm": 1.3343852809989445,
+      "learning_rate": 0.003,
+      "loss": 4.0332,
+      "step": 9446
+    },
+    {
+      "epoch": 0.09447,
+      "grad_norm": 0.9863278032640667,
+      "learning_rate": 0.003,
+      "loss": 4.046,
+      "step": 9447
+    },
+    {
+      "epoch": 0.09448,
+      "grad_norm": 1.1763365235364014,
+      "learning_rate": 0.003,
+      "loss": 4.0447,
+      "step": 9448
+    },
+    {
+      "epoch": 0.09449,
+      "grad_norm": 1.105703389589132,
+      "learning_rate": 0.003,
+      "loss": 4.0364,
+      "step": 9449
+    },
+    {
+      "epoch": 0.0945,
+      "grad_norm": 1.01551147072131,
+      "learning_rate": 0.003,
+      "loss": 4.0319,
+      "step": 9450
+    },
+    {
+      "epoch": 0.09451,
+      "grad_norm": 1.0383763855846733,
+      "learning_rate": 0.003,
+      "loss": 4.045,
+      "step": 9451
+    },
+    {
+      "epoch": 0.09452,
+      "grad_norm": 1.0456254521002109,
+      "learning_rate": 0.003,
+      "loss": 4.0499,
+      "step": 9452
+    },
+    {
+      "epoch": 0.09453,
+      "grad_norm": 1.3691070796802907,
+      "learning_rate": 0.003,
+      "loss": 4.0074,
+      "step": 9453
+    },
+    {
+      "epoch": 0.09454,
+      "grad_norm": 0.9155459999489047,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 9454
+    },
+    {
+      "epoch": 0.09455,
+      "grad_norm": 1.0062151085426871,
+      "learning_rate": 0.003,
+      "loss": 4.032,
+      "step": 9455
+    },
+    {
+      "epoch": 0.09456,
+      "grad_norm": 1.775196393766159,
+      "learning_rate": 0.003,
+      "loss": 4.0433,
+      "step": 9456
+    },
+    {
+      "epoch": 0.09457,
+      "grad_norm": 0.8268537533403297,
+      "learning_rate": 0.003,
+      "loss": 4.0498,
+      "step": 9457
+    },
+    {
+      "epoch": 0.09458,
+      "grad_norm": 0.9446756093027355,
+      "learning_rate": 0.003,
+      "loss": 4.0281,
+      "step": 9458
+    },
+    {
+      "epoch": 0.09459,
+      "grad_norm": 1.2872893558697978,
+      "learning_rate": 0.003,
+      "loss": 4.0573,
+      "step": 9459
+    },
+    {
+      "epoch": 0.0946,
+      "grad_norm": 1.0414735406519482,
+      "learning_rate": 0.003,
+      "loss": 4.064,
+      "step": 9460
+    },
+    {
+      "epoch": 0.09461,
+      "grad_norm": 1.2923952872869107,
+      "learning_rate": 0.003,
+      "loss": 4.0776,
+      "step": 9461
+    },
+    {
+      "epoch": 0.09462,
+      "grad_norm": 1.0982135647456706,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 9462
+    },
+    {
+      "epoch": 0.09463,
+      "grad_norm": 1.195985441134548,
+      "learning_rate": 0.003,
+      "loss": 4.0327,
+      "step": 9463
+    },
+    {
+      "epoch": 0.09464,
+      "grad_norm": 1.094216310472828,
+      "learning_rate": 0.003,
+      "loss": 4.032,
+      "step": 9464
+    },
+    {
+      "epoch": 0.09465,
+      "grad_norm": 1.1344022479380613,
+      "learning_rate": 0.003,
+      "loss": 4.0347,
+      "step": 9465
+    },
+    {
+      "epoch": 0.09466,
+      "grad_norm": 1.1649272025527642,
+      "learning_rate": 0.003,
+      "loss": 4.0486,
+      "step": 9466
+    },
+    {
+      "epoch": 0.09467,
+      "grad_norm": 1.1437197614588612,
+      "learning_rate": 0.003,
+      "loss": 4.0389,
+      "step": 9467
+    },
+    {
+      "epoch": 0.09468,
+      "grad_norm": 1.134510156394556,
+      "learning_rate": 0.003,
+      "loss": 4.0474,
+      "step": 9468
+    },
+    {
+      "epoch": 0.09469,
+      "grad_norm": 1.1456897293699813,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 9469
+    },
+    {
+      "epoch": 0.0947,
+      "grad_norm": 0.9370646196560669,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 9470
+    },
+    {
+      "epoch": 0.09471,
+      "grad_norm": 1.1605962963311105,
+      "learning_rate": 0.003,
+      "loss": 4.0614,
+      "step": 9471
+    },
+    {
+      "epoch": 0.09472,
+      "grad_norm": 1.0220332905341656,
+      "learning_rate": 0.003,
+      "loss": 4.0359,
+      "step": 9472
+    },
+    {
+      "epoch": 0.09473,
+      "grad_norm": 1.3119106941290712,
+      "learning_rate": 0.003,
+      "loss": 4.0382,
+      "step": 9473
+    },
+    {
+      "epoch": 0.09474,
+      "grad_norm": 1.0696197454349894,
+      "learning_rate": 0.003,
+      "loss": 4.0315,
+      "step": 9474
+    },
+    {
+      "epoch": 0.09475,
+      "grad_norm": 1.183557234231492,
+      "learning_rate": 0.003,
+      "loss": 4.0364,
+      "step": 9475
+    },
+    {
+      "epoch": 0.09476,
+      "grad_norm": 0.936355154816999,
+      "learning_rate": 0.003,
+      "loss": 4.0317,
+      "step": 9476
+    },
+    {
+      "epoch": 0.09477,
+      "grad_norm": 1.0826345363697292,
+      "learning_rate": 0.003,
+      "loss": 4.0328,
+      "step": 9477
+    },
+    {
+      "epoch": 0.09478,
+      "grad_norm": 1.2268375814601145,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 9478
+    },
+    {
+      "epoch": 0.09479,
+      "grad_norm": 0.9077481557382702,
+      "learning_rate": 0.003,
+      "loss": 4.0327,
+      "step": 9479
+    },
+    {
+      "epoch": 0.0948,
+      "grad_norm": 1.1577127173889232,
+      "learning_rate": 0.003,
+      "loss": 4.0225,
+      "step": 9480
+    },
+    {
+      "epoch": 0.09481,
+      "grad_norm": 1.164940507783553,
+      "learning_rate": 0.003,
+      "loss": 4.0643,
+      "step": 9481
+    },
+    {
+      "epoch": 0.09482,
+      "grad_norm": 1.0376186580785653,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 9482
+    },
+    {
+      "epoch": 0.09483,
+      "grad_norm": 1.0668111334949657,
+      "learning_rate": 0.003,
+      "loss": 4.0037,
+      "step": 9483
+    },
+    {
+      "epoch": 0.09484,
+      "grad_norm": 0.9544000956467396,
+      "learning_rate": 0.003,
+      "loss": 4.0486,
+      "step": 9484
+    },
+    {
+      "epoch": 0.09485,
+      "grad_norm": 1.3563985618454755,
+      "learning_rate": 0.003,
+      "loss": 4.0384,
+      "step": 9485
+    },
+    {
+      "epoch": 0.09486,
+      "grad_norm": 0.9907132939829558,
+      "learning_rate": 0.003,
+      "loss": 4.046,
+      "step": 9486
+    },
+    {
+      "epoch": 0.09487,
+      "grad_norm": 1.140513002856383,
+      "learning_rate": 0.003,
+      "loss": 4.0355,
+      "step": 9487
+    },
+    {
+      "epoch": 0.09488,
+      "grad_norm": 1.2992557947553103,
+      "learning_rate": 0.003,
+      "loss": 4.0848,
+      "step": 9488
+    },
+    {
+      "epoch": 0.09489,
+      "grad_norm": 0.9910288087212725,
+      "learning_rate": 0.003,
+      "loss": 4.0457,
+      "step": 9489
+    },
+    {
+      "epoch": 0.0949,
+      "grad_norm": 1.281341022406897,
+      "learning_rate": 0.003,
+      "loss": 4.0446,
+      "step": 9490
+    },
+    {
+      "epoch": 0.09491,
+      "grad_norm": 1.0178254732217256,
+      "learning_rate": 0.003,
+      "loss": 4.0237,
+      "step": 9491
+    },
+    {
+      "epoch": 0.09492,
+      "grad_norm": 1.0599592630101493,
+      "learning_rate": 0.003,
+      "loss": 4.0338,
+      "step": 9492
+    },
+    {
+      "epoch": 0.09493,
+      "grad_norm": 1.0707052087473112,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 9493
+    },
+    {
+      "epoch": 0.09494,
+      "grad_norm": 1.184811169391914,
+      "learning_rate": 0.003,
+      "loss": 4.0531,
+      "step": 9494
+    },
+    {
+      "epoch": 0.09495,
+      "grad_norm": 1.1050306365464493,
+      "learning_rate": 0.003,
+      "loss": 4.0401,
+      "step": 9495
+    },
+    {
+      "epoch": 0.09496,
+      "grad_norm": 1.060159425305823,
+      "learning_rate": 0.003,
+      "loss": 4.0543,
+      "step": 9496
+    },
+    {
+      "epoch": 0.09497,
+      "grad_norm": 1.0825941688780625,
+      "learning_rate": 0.003,
+      "loss": 4.0381,
+      "step": 9497
+    },
+    {
+      "epoch": 0.09498,
+      "grad_norm": 0.9601571426627881,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 9498
+    },
+    {
+      "epoch": 0.09499,
+      "grad_norm": 1.0590750166016245,
+      "learning_rate": 0.003,
+      "loss": 4.0224,
+      "step": 9499
+    },
+    {
+      "epoch": 0.095,
+      "grad_norm": 1.2786584082845458,
+      "learning_rate": 0.003,
+      "loss": 4.0735,
+      "step": 9500
+    },
+    {
+      "epoch": 0.09501,
+      "grad_norm": 1.1290326342601904,
+      "learning_rate": 0.003,
+      "loss": 4.0346,
+      "step": 9501
+    },
+    {
+      "epoch": 0.09502,
+      "grad_norm": 0.9912457750329704,
+      "learning_rate": 0.003,
+      "loss": 4.0292,
+      "step": 9502
+    },
+    {
+      "epoch": 0.09503,
+      "grad_norm": 1.152658098138242,
+      "learning_rate": 0.003,
+      "loss": 3.9938,
+      "step": 9503
+    },
+    {
+      "epoch": 0.09504,
+      "grad_norm": 1.0919690729103646,
+      "learning_rate": 0.003,
+      "loss": 4.0455,
+      "step": 9504
+    },
+    {
+      "epoch": 0.09505,
+      "grad_norm": 1.1519487718748778,
+      "learning_rate": 0.003,
+      "loss": 4.0328,
+      "step": 9505
+    },
+    {
+      "epoch": 0.09506,
+      "grad_norm": 1.262006574899147,
+      "learning_rate": 0.003,
+      "loss": 4.0526,
+      "step": 9506
+    },
+    {
+      "epoch": 0.09507,
+      "grad_norm": 1.0711373212381228,
+      "learning_rate": 0.003,
+      "loss": 4.0603,
+      "step": 9507
+    },
+    {
+      "epoch": 0.09508,
+      "grad_norm": 1.1239192938184404,
+      "learning_rate": 0.003,
+      "loss": 4.0323,
+      "step": 9508
+    },
+    {
+      "epoch": 0.09509,
+      "grad_norm": 1.3389515718167913,
+      "learning_rate": 0.003,
+      "loss": 4.0356,
+      "step": 9509
+    },
+    {
+      "epoch": 0.0951,
+      "grad_norm": 0.8958060198263552,
+      "learning_rate": 0.003,
+      "loss": 4.036,
+      "step": 9510
+    },
+    {
+      "epoch": 0.09511,
+      "grad_norm": 1.0719956101779906,
+      "learning_rate": 0.003,
+      "loss": 4.026,
+      "step": 9511
+    },
+    {
+      "epoch": 0.09512,
+      "grad_norm": 1.305626481744099,
+      "learning_rate": 0.003,
+      "loss": 4.0343,
+      "step": 9512
+    },
+    {
+      "epoch": 0.09513,
+      "grad_norm": 0.9176719274352512,
+      "learning_rate": 0.003,
+      "loss": 4.0157,
+      "step": 9513
+    },
+    {
+      "epoch": 0.09514,
+      "grad_norm": 0.9499238409052401,
+      "learning_rate": 0.003,
+      "loss": 4.0255,
+      "step": 9514
+    },
+    {
+      "epoch": 0.09515,
+      "grad_norm": 1.063622459645422,
+      "learning_rate": 0.003,
+      "loss": 4.0396,
+      "step": 9515
+    },
+    {
+      "epoch": 0.09516,
+      "grad_norm": 1.1652384446120476,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 9516
+    },
+    {
+      "epoch": 0.09517,
+      "grad_norm": 1.178061476075233,
+      "learning_rate": 0.003,
+      "loss": 4.0317,
+      "step": 9517
+    },
+    {
+      "epoch": 0.09518,
+      "grad_norm": 1.3678980044323894,
+      "learning_rate": 0.003,
+      "loss": 4.0563,
+      "step": 9518
+    },
+    {
+      "epoch": 0.09519,
+      "grad_norm": 1.046217095028117,
+      "learning_rate": 0.003,
+      "loss": 4.0595,
+      "step": 9519
+    },
+    {
+      "epoch": 0.0952,
+      "grad_norm": 1.1818557546526918,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 9520
+    },
+    {
+      "epoch": 0.09521,
+      "grad_norm": 1.1304356171833927,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 9521
+    },
+    {
+      "epoch": 0.09522,
+      "grad_norm": 1.120019603471895,
+      "learning_rate": 0.003,
+      "loss": 4.0239,
+      "step": 9522
+    },
+    {
+      "epoch": 0.09523,
+      "grad_norm": 1.104879448193623,
+      "learning_rate": 0.003,
+      "loss": 4.0292,
+      "step": 9523
+    },
+    {
+      "epoch": 0.09524,
+      "grad_norm": 0.873368086596089,
+      "learning_rate": 0.003,
+      "loss": 4.0157,
+      "step": 9524
+    },
+    {
+      "epoch": 0.09525,
+      "grad_norm": 0.9961985148368994,
+      "learning_rate": 0.003,
+      "loss": 4.0362,
+      "step": 9525
+    },
+    {
+      "epoch": 0.09526,
+      "grad_norm": 1.2676375654165073,
+      "learning_rate": 0.003,
+      "loss": 4.0368,
+      "step": 9526
+    },
+    {
+      "epoch": 0.09527,
+      "grad_norm": 1.1529943279598172,
+      "learning_rate": 0.003,
+      "loss": 4.0349,
+      "step": 9527
+    },
+    {
+      "epoch": 0.09528,
+      "grad_norm": 1.131678617939542,
+      "learning_rate": 0.003,
+      "loss": 4.0542,
+      "step": 9528
+    },
+    {
+      "epoch": 0.09529,
+      "grad_norm": 1.0147106663564123,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 9529
+    },
+    {
+      "epoch": 0.0953,
+      "grad_norm": 1.2878762896528653,
+      "learning_rate": 0.003,
+      "loss": 4.0403,
+      "step": 9530
+    },
+    {
+      "epoch": 0.09531,
+      "grad_norm": 1.0096287930269905,
+      "learning_rate": 0.003,
+      "loss": 4.0209,
+      "step": 9531
+    },
+    {
+      "epoch": 0.09532,
+      "grad_norm": 1.278489534695974,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 9532
+    },
+    {
+      "epoch": 0.09533,
+      "grad_norm": 1.068314907618256,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 9533
+    },
+    {
+      "epoch": 0.09534,
+      "grad_norm": 1.0541105462925286,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 9534
+    },
+    {
+      "epoch": 0.09535,
+      "grad_norm": 1.0542416896874893,
+      "learning_rate": 0.003,
+      "loss": 4.0297,
+      "step": 9535
+    },
+    {
+      "epoch": 0.09536,
+      "grad_norm": 1.203990932473719,
+      "learning_rate": 0.003,
+      "loss": 4.0479,
+      "step": 9536
+    },
+    {
+      "epoch": 0.09537,
+      "grad_norm": 1.0609650874854466,
+      "learning_rate": 0.003,
+      "loss": 4.0571,
+      "step": 9537
+    },
+    {
+      "epoch": 0.09538,
+      "grad_norm": 1.0566099485690075,
+      "learning_rate": 0.003,
+      "loss": 4.0353,
+      "step": 9538
+    },
+    {
+      "epoch": 0.09539,
+      "grad_norm": 1.1711495828610736,
+      "learning_rate": 0.003,
+      "loss": 4.0565,
+      "step": 9539
+    },
+    {
+      "epoch": 0.0954,
+      "grad_norm": 1.0370146448146371,
+      "learning_rate": 0.003,
+      "loss": 4.0139,
+      "step": 9540
+    },
+    {
+      "epoch": 0.09541,
+      "grad_norm": 1.0159923428799353,
+      "learning_rate": 0.003,
+      "loss": 4.0424,
+      "step": 9541
+    },
+    {
+      "epoch": 0.09542,
+      "grad_norm": 1.1293172537885205,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 9542
+    },
+    {
+      "epoch": 0.09543,
+      "grad_norm": 1.0450117640491212,
+      "learning_rate": 0.003,
+      "loss": 4.0254,
+      "step": 9543
+    },
+    {
+      "epoch": 0.09544,
+      "grad_norm": 1.1603566723882397,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 9544
+    },
+    {
+      "epoch": 0.09545,
+      "grad_norm": 1.0447933808065626,
+      "learning_rate": 0.003,
+      "loss": 4.0353,
+      "step": 9545
+    },
+    {
+      "epoch": 0.09546,
+      "grad_norm": 0.9458484110007981,
+      "learning_rate": 0.003,
+      "loss": 4.0467,
+      "step": 9546
+    },
+    {
+      "epoch": 0.09547,
+      "grad_norm": 1.0876570915875363,
+      "learning_rate": 0.003,
+      "loss": 4.0358,
+      "step": 9547
+    },
+    {
+      "epoch": 0.09548,
+      "grad_norm": 1.1875507966969954,
+      "learning_rate": 0.003,
+      "loss": 4.0327,
+      "step": 9548
+    },
+    {
+      "epoch": 0.09549,
+      "grad_norm": 0.8814604626466929,
+      "learning_rate": 0.003,
+      "loss": 4.063,
+      "step": 9549
+    },
+    {
+      "epoch": 0.0955,
+      "grad_norm": 1.0246586128840154,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 9550
+    },
+    {
+      "epoch": 0.09551,
+      "grad_norm": 1.2387660010338577,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 9551
+    },
+    {
+      "epoch": 0.09552,
+      "grad_norm": 0.9754198125538579,
+      "learning_rate": 0.003,
+      "loss": 4.0826,
+      "step": 9552
+    },
+    {
+      "epoch": 0.09553,
+      "grad_norm": 1.1577414898591938,
+      "learning_rate": 0.003,
+      "loss": 4.0435,
+      "step": 9553
+    },
+    {
+      "epoch": 0.09554,
+      "grad_norm": 1.0570088924704193,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 9554
+    },
+    {
+      "epoch": 0.09555,
+      "grad_norm": 1.2106521594790243,
+      "learning_rate": 0.003,
+      "loss": 4.0234,
+      "step": 9555
+    },
+    {
+      "epoch": 0.09556,
+      "grad_norm": 1.0733450373625477,
+      "learning_rate": 0.003,
+      "loss": 4.0348,
+      "step": 9556
+    },
+    {
+      "epoch": 0.09557,
+      "grad_norm": 1.1919871564102347,
+      "learning_rate": 0.003,
+      "loss": 4.0546,
+      "step": 9557
+    },
+    {
+      "epoch": 0.09558,
+      "grad_norm": 1.146997350654266,
+      "learning_rate": 0.003,
+      "loss": 4.0441,
+      "step": 9558
+    },
+    {
+      "epoch": 0.09559,
+      "grad_norm": 1.168927128415256,
+      "learning_rate": 0.003,
+      "loss": 4.0657,
+      "step": 9559
+    },
+    {
+      "epoch": 0.0956,
+      "grad_norm": 1.11230556582203,
+      "learning_rate": 0.003,
+      "loss": 4.0285,
+      "step": 9560
+    },
+    {
+      "epoch": 0.09561,
+      "grad_norm": 1.0881575661118905,
+      "learning_rate": 0.003,
+      "loss": 4.0542,
+      "step": 9561
+    },
+    {
+      "epoch": 0.09562,
+      "grad_norm": 1.3389741513893663,
+      "learning_rate": 0.003,
+      "loss": 4.0088,
+      "step": 9562
+    },
+    {
+      "epoch": 0.09563,
+      "grad_norm": 1.1779078269635281,
+      "learning_rate": 0.003,
+      "loss": 4.0709,
+      "step": 9563
+    },
+    {
+      "epoch": 0.09564,
+      "grad_norm": 1.2633874376703453,
+      "learning_rate": 0.003,
+      "loss": 4.0607,
+      "step": 9564
+    },
+    {
+      "epoch": 0.09565,
+      "grad_norm": 0.8461819044779642,
+      "learning_rate": 0.003,
+      "loss": 4.0181,
+      "step": 9565
+    },
+    {
+      "epoch": 0.09566,
+      "grad_norm": 0.9430171254625663,
+      "learning_rate": 0.003,
+      "loss": 4.0318,
+      "step": 9566
+    },
+    {
+      "epoch": 0.09567,
+      "grad_norm": 1.0543925802682974,
+      "learning_rate": 0.003,
+      "loss": 4.033,
+      "step": 9567
+    },
+    {
+      "epoch": 0.09568,
+      "grad_norm": 1.2147511761418746,
+      "learning_rate": 0.003,
+      "loss": 4.0403,
+      "step": 9568
+    },
+    {
+      "epoch": 0.09569,
+      "grad_norm": 1.139812511049965,
+      "learning_rate": 0.003,
+      "loss": 4.0433,
+      "step": 9569
+    },
+    {
+      "epoch": 0.0957,
+      "grad_norm": 1.2066618266931726,
+      "learning_rate": 0.003,
+      "loss": 4.0448,
+      "step": 9570
+    },
+    {
+      "epoch": 0.09571,
+      "grad_norm": 0.9969088728201041,
+      "learning_rate": 0.003,
+      "loss": 4.0405,
+      "step": 9571
+    },
+    {
+      "epoch": 0.09572,
+      "grad_norm": 1.521564608038327,
+      "learning_rate": 0.003,
+      "loss": 4.0454,
+      "step": 9572
+    },
+    {
+      "epoch": 0.09573,
+      "grad_norm": 0.767016893061818,
+      "learning_rate": 0.003,
+      "loss": 4.0353,
+      "step": 9573
+    },
+    {
+      "epoch": 0.09574,
+      "grad_norm": 0.9077964583623702,
+      "learning_rate": 0.003,
+      "loss": 4.0367,
+      "step": 9574
+    },
+    {
+      "epoch": 0.09575,
+      "grad_norm": 0.9789511548929908,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 9575
+    },
+    {
+      "epoch": 0.09576,
+      "grad_norm": 1.1272087126973815,
+      "learning_rate": 0.003,
+      "loss": 4.0228,
+      "step": 9576
+    },
+    {
+      "epoch": 0.09577,
+      "grad_norm": 1.2010384957358553,
+      "learning_rate": 0.003,
+      "loss": 4.0128,
+      "step": 9577
+    },
+    {
+      "epoch": 0.09578,
+      "grad_norm": 1.1758915609790765,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 9578
+    },
+    {
+      "epoch": 0.09579,
+      "grad_norm": 1.1677535297765045,
+      "learning_rate": 0.003,
+      "loss": 4.0194,
+      "step": 9579
+    },
+    {
+      "epoch": 0.0958,
+      "grad_norm": 1.1752106864410679,
+      "learning_rate": 0.003,
+      "loss": 4.0497,
+      "step": 9580
+    },
+    {
+      "epoch": 0.09581,
+      "grad_norm": 1.0390685668953878,
+      "learning_rate": 0.003,
+      "loss": 4.0799,
+      "step": 9581
+    },
+    {
+      "epoch": 0.09582,
+      "grad_norm": 0.9448991333037483,
+      "learning_rate": 0.003,
+      "loss": 4.0186,
+      "step": 9582
+    },
+    {
+      "epoch": 0.09583,
+      "grad_norm": 1.0699012101296939,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 9583
+    },
+    {
+      "epoch": 0.09584,
+      "grad_norm": 1.1114200647456693,
+      "learning_rate": 0.003,
+      "loss": 4.0345,
+      "step": 9584
+    },
+    {
+      "epoch": 0.09585,
+      "grad_norm": 1.1110522535404246,
+      "learning_rate": 0.003,
+      "loss": 4.0172,
+      "step": 9585
+    },
+    {
+      "epoch": 0.09586,
+      "grad_norm": 1.0693682178529131,
+      "learning_rate": 0.003,
+      "loss": 4.0489,
+      "step": 9586
+    },
+    {
+      "epoch": 0.09587,
+      "grad_norm": 1.0907022200921574,
+      "learning_rate": 0.003,
+      "loss": 4.0018,
+      "step": 9587
+    },
+    {
+      "epoch": 0.09588,
+      "grad_norm": 1.3052673040023657,
+      "learning_rate": 0.003,
+      "loss": 4.037,
+      "step": 9588
+    },
+    {
+      "epoch": 0.09589,
+      "grad_norm": 1.1273974285859623,
+      "learning_rate": 0.003,
+      "loss": 4.0397,
+      "step": 9589
+    },
+    {
+      "epoch": 0.0959,
+      "grad_norm": 0.9575377458395997,
+      "learning_rate": 0.003,
+      "loss": 4.0474,
+      "step": 9590
+    },
+    {
+      "epoch": 0.09591,
+      "grad_norm": 1.2408320836823072,
+      "learning_rate": 0.003,
+      "loss": 4.0813,
+      "step": 9591
+    },
+    {
+      "epoch": 0.09592,
+      "grad_norm": 1.0230179861054507,
+      "learning_rate": 0.003,
+      "loss": 4.0441,
+      "step": 9592
+    },
+    {
+      "epoch": 0.09593,
+      "grad_norm": 1.2007397484572089,
+      "learning_rate": 0.003,
+      "loss": 4.0598,
+      "step": 9593
+    },
+    {
+      "epoch": 0.09594,
+      "grad_norm": 0.9589259749600783,
+      "learning_rate": 0.003,
+      "loss": 4.0188,
+      "step": 9594
+    },
+    {
+      "epoch": 0.09595,
+      "grad_norm": 1.0812271670511917,
+      "learning_rate": 0.003,
+      "loss": 4.0239,
+      "step": 9595
+    },
+    {
+      "epoch": 0.09596,
+      "grad_norm": 1.215276423755771,
+      "learning_rate": 0.003,
+      "loss": 4.0375,
+      "step": 9596
+    },
+    {
+      "epoch": 0.09597,
+      "grad_norm": 1.055514060891504,
+      "learning_rate": 0.003,
+      "loss": 4.0307,
+      "step": 9597
+    },
+    {
+      "epoch": 0.09598,
+      "grad_norm": 1.0847458195628048,
+      "learning_rate": 0.003,
+      "loss": 4.0415,
+      "step": 9598
+    },
+    {
+      "epoch": 0.09599,
+      "grad_norm": 1.3389087793122878,
+      "learning_rate": 0.003,
+      "loss": 4.0282,
+      "step": 9599
+    },
+    {
+      "epoch": 0.096,
+      "grad_norm": 0.9055383982261854,
+      "learning_rate": 0.003,
+      "loss": 4.0234,
+      "step": 9600
+    },
+    {
+      "epoch": 0.09601,
+      "grad_norm": 0.9811940695731026,
+      "learning_rate": 0.003,
+      "loss": 4.0306,
+      "step": 9601
+    },
+    {
+      "epoch": 0.09602,
+      "grad_norm": 1.0789527786902395,
+      "learning_rate": 0.003,
+      "loss": 4.0641,
+      "step": 9602
+    },
+    {
+      "epoch": 0.09603,
+      "grad_norm": 1.1727478141730425,
+      "learning_rate": 0.003,
+      "loss": 4.0413,
+      "step": 9603
+    },
+    {
+      "epoch": 0.09604,
+      "grad_norm": 1.023524788242411,
+      "learning_rate": 0.003,
+      "loss": 4.0031,
+      "step": 9604
+    },
+    {
+      "epoch": 0.09605,
+      "grad_norm": 1.2858580097981922,
+      "learning_rate": 0.003,
+      "loss": 4.0521,
+      "step": 9605
+    },
+    {
+      "epoch": 0.09606,
+      "grad_norm": 0.8845738717710074,
+      "learning_rate": 0.003,
+      "loss": 4.0423,
+      "step": 9606
+    },
+    {
+      "epoch": 0.09607,
+      "grad_norm": 0.9417694564288732,
+      "learning_rate": 0.003,
+      "loss": 4.0527,
+      "step": 9607
+    },
+    {
+      "epoch": 0.09608,
+      "grad_norm": 1.328516456493561,
+      "learning_rate": 0.003,
+      "loss": 4.0717,
+      "step": 9608
+    },
+    {
+      "epoch": 0.09609,
+      "grad_norm": 1.2629011203498186,
+      "learning_rate": 0.003,
+      "loss": 4.0411,
+      "step": 9609
+    },
+    {
+      "epoch": 0.0961,
+      "grad_norm": 1.1729883232988318,
+      "learning_rate": 0.003,
+      "loss": 4.0427,
+      "step": 9610
+    },
+    {
+      "epoch": 0.09611,
+      "grad_norm": 1.393118460283264,
+      "learning_rate": 0.003,
+      "loss": 4.0425,
+      "step": 9611
+    },
+    {
+      "epoch": 0.09612,
+      "grad_norm": 1.0698096910097765,
+      "learning_rate": 0.003,
+      "loss": 4.0378,
+      "step": 9612
+    },
+    {
+      "epoch": 0.09613,
+      "grad_norm": 1.007696738311522,
+      "learning_rate": 0.003,
+      "loss": 4.051,
+      "step": 9613
+    },
+    {
+      "epoch": 0.09614,
+      "grad_norm": 1.4803986138018956,
+      "learning_rate": 0.003,
+      "loss": 4.054,
+      "step": 9614
+    },
+    {
+      "epoch": 0.09615,
+      "grad_norm": 1.0559026629823391,
+      "learning_rate": 0.003,
+      "loss": 4.0191,
+      "step": 9615
+    },
+    {
+      "epoch": 0.09616,
+      "grad_norm": 1.1782897391468559,
+      "learning_rate": 0.003,
+      "loss": 4.0498,
+      "step": 9616
+    },
+    {
+      "epoch": 0.09617,
+      "grad_norm": 1.0127744559386622,
+      "learning_rate": 0.003,
+      "loss": 4.0198,
+      "step": 9617
+    },
+    {
+      "epoch": 0.09618,
+      "grad_norm": 1.2681610401673449,
+      "learning_rate": 0.003,
+      "loss": 4.0457,
+      "step": 9618
+    },
+    {
+      "epoch": 0.09619,
+      "grad_norm": 0.9294594817912581,
+      "learning_rate": 0.003,
+      "loss": 4.0352,
+      "step": 9619
+    },
+    {
+      "epoch": 0.0962,
+      "grad_norm": 0.9905671912694861,
+      "learning_rate": 0.003,
+      "loss": 4.0249,
+      "step": 9620
+    },
+    {
+      "epoch": 0.09621,
+      "grad_norm": 1.3373852398607728,
+      "learning_rate": 0.003,
+      "loss": 4.0521,
+      "step": 9621
+    },
+    {
+      "epoch": 0.09622,
+      "grad_norm": 1.1541351242021758,
+      "learning_rate": 0.003,
+      "loss": 4.0344,
+      "step": 9622
+    },
+    {
+      "epoch": 0.09623,
+      "grad_norm": 1.112301616615713,
+      "learning_rate": 0.003,
+      "loss": 4.0288,
+      "step": 9623
+    },
+    {
+      "epoch": 0.09624,
+      "grad_norm": 1.3430937174739355,
+      "learning_rate": 0.003,
+      "loss": 4.0476,
+      "step": 9624
+    },
+    {
+      "epoch": 0.09625,
+      "grad_norm": 1.080047687143114,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 9625
+    },
+    {
+      "epoch": 0.09626,
+      "grad_norm": 1.0694079912518233,
+      "learning_rate": 0.003,
+      "loss": 4.03,
+      "step": 9626
+    },
+    {
+      "epoch": 0.09627,
+      "grad_norm": 1.139432118757624,
+      "learning_rate": 0.003,
+      "loss": 4.0252,
+      "step": 9627
+    },
+    {
+      "epoch": 0.09628,
+      "grad_norm": 0.958627843592087,
+      "learning_rate": 0.003,
+      "loss": 4.0266,
+      "step": 9628
+    },
+    {
+      "epoch": 0.09629,
+      "grad_norm": 1.0662067967300446,
+      "learning_rate": 0.003,
+      "loss": 4.0442,
+      "step": 9629
+    },
+    {
+      "epoch": 0.0963,
+      "grad_norm": 1.0412238973539465,
+      "learning_rate": 0.003,
+      "loss": 4.0334,
+      "step": 9630
+    },
+    {
+      "epoch": 0.09631,
+      "grad_norm": 1.0125046957428312,
+      "learning_rate": 0.003,
+      "loss": 4.0658,
+      "step": 9631
+    },
+    {
+      "epoch": 0.09632,
+      "grad_norm": 1.3555424752709917,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 9632
+    },
+    {
+      "epoch": 0.09633,
+      "grad_norm": 1.063757740872899,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 9633
+    },
+    {
+      "epoch": 0.09634,
+      "grad_norm": 1.20136927846276,
+      "learning_rate": 0.003,
+      "loss": 4.0121,
+      "step": 9634
+    },
+    {
+      "epoch": 0.09635,
+      "grad_norm": 1.1423655529057926,
+      "learning_rate": 0.003,
+      "loss": 4.0522,
+      "step": 9635
+    },
+    {
+      "epoch": 0.09636,
+      "grad_norm": 1.3277021817787018,
+      "learning_rate": 0.003,
+      "loss": 4.0606,
+      "step": 9636
+    },
+    {
+      "epoch": 0.09637,
+      "grad_norm": 0.916236476287278,
+      "learning_rate": 0.003,
+      "loss": 4.0239,
+      "step": 9637
+    },
+    {
+      "epoch": 0.09638,
+      "grad_norm": 1.1712812625089448,
+      "learning_rate": 0.003,
+      "loss": 4.0211,
+      "step": 9638
+    },
+    {
+      "epoch": 0.09639,
+      "grad_norm": 1.1016378675492433,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 9639
+    },
+    {
+      "epoch": 0.0964,
+      "grad_norm": 1.0341626312817283,
+      "learning_rate": 0.003,
+      "loss": 4.0674,
+      "step": 9640
+    },
+    {
+      "epoch": 0.09641,
+      "grad_norm": 1.1660677439533391,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 9641
+    },
+    {
+      "epoch": 0.09642,
+      "grad_norm": 0.9807879097387026,
+      "learning_rate": 0.003,
+      "loss": 4.0559,
+      "step": 9642
+    },
+    {
+      "epoch": 0.09643,
+      "grad_norm": 1.2769541125682597,
+      "learning_rate": 0.003,
+      "loss": 4.0352,
+      "step": 9643
+    },
+    {
+      "epoch": 0.09644,
+      "grad_norm": 0.9284721817809056,
+      "learning_rate": 0.003,
+      "loss": 4.0505,
+      "step": 9644
+    },
+    {
+      "epoch": 0.09645,
+      "grad_norm": 1.2692913801981347,
+      "learning_rate": 0.003,
+      "loss": 4.05,
+      "step": 9645
+    },
+    {
+      "epoch": 0.09646,
+      "grad_norm": 0.9226038318267903,
+      "learning_rate": 0.003,
+      "loss": 4.0399,
+      "step": 9646
+    },
+    {
+      "epoch": 0.09647,
+      "grad_norm": 1.1716846999519674,
+      "learning_rate": 0.003,
+      "loss": 4.0031,
+      "step": 9647
+    },
+    {
+      "epoch": 0.09648,
+      "grad_norm": 1.0984624781628638,
+      "learning_rate": 0.003,
+      "loss": 4.0797,
+      "step": 9648
+    },
+    {
+      "epoch": 0.09649,
+      "grad_norm": 1.032713451324886,
+      "learning_rate": 0.003,
+      "loss": 4.0118,
+      "step": 9649
+    },
+    {
+      "epoch": 0.0965,
+      "grad_norm": 1.2893129243430874,
+      "learning_rate": 0.003,
+      "loss": 4.0299,
+      "step": 9650
+    },
+    {
+      "epoch": 0.09651,
+      "grad_norm": 1.2914240519466096,
+      "learning_rate": 0.003,
+      "loss": 4.0503,
+      "step": 9651
+    },
+    {
+      "epoch": 0.09652,
+      "grad_norm": 1.0304038397431154,
+      "learning_rate": 0.003,
+      "loss": 4.0333,
+      "step": 9652
+    },
+    {
+      "epoch": 0.09653,
+      "grad_norm": 1.1558865874046644,
+      "learning_rate": 0.003,
+      "loss": 4.0609,
+      "step": 9653
+    },
+    {
+      "epoch": 0.09654,
+      "grad_norm": 1.0769712394558766,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 9654
+    },
+    {
+      "epoch": 0.09655,
+      "grad_norm": 1.0046687966997174,
+      "learning_rate": 0.003,
+      "loss": 4.0427,
+      "step": 9655
+    },
+    {
+      "epoch": 0.09656,
+      "grad_norm": 1.208629121263327,
+      "learning_rate": 0.003,
+      "loss": 4.0589,
+      "step": 9656
+    },
+    {
+      "epoch": 0.09657,
+      "grad_norm": 1.0524663213041456,
+      "learning_rate": 0.003,
+      "loss": 4.0534,
+      "step": 9657
+    },
+    {
+      "epoch": 0.09658,
+      "grad_norm": 1.1446885277300836,
+      "learning_rate": 0.003,
+      "loss": 4.031,
+      "step": 9658
+    },
+    {
+      "epoch": 0.09659,
+      "grad_norm": 1.1799030583312893,
+      "learning_rate": 0.003,
+      "loss": 4.0627,
+      "step": 9659
+    },
+    {
+      "epoch": 0.0966,
+      "grad_norm": 1.1396175467308287,
+      "learning_rate": 0.003,
+      "loss": 4.054,
+      "step": 9660
+    },
+    {
+      "epoch": 0.09661,
+      "grad_norm": 1.2031365613861558,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 9661
+    },
+    {
+      "epoch": 0.09662,
+      "grad_norm": 1.0450126941262252,
+      "learning_rate": 0.003,
+      "loss": 4.0269,
+      "step": 9662
+    },
+    {
+      "epoch": 0.09663,
+      "grad_norm": 1.1872140736780354,
+      "learning_rate": 0.003,
+      "loss": 4.059,
+      "step": 9663
+    },
+    {
+      "epoch": 0.09664,
+      "grad_norm": 1.10816766795799,
+      "learning_rate": 0.003,
+      "loss": 4.0531,
+      "step": 9664
+    },
+    {
+      "epoch": 0.09665,
+      "grad_norm": 1.0920304192442827,
+      "learning_rate": 0.003,
+      "loss": 4.0328,
+      "step": 9665
+    },
+    {
+      "epoch": 0.09666,
+      "grad_norm": 1.0614448386527346,
+      "learning_rate": 0.003,
+      "loss": 4.0478,
+      "step": 9666
+    },
+    {
+      "epoch": 0.09667,
+      "grad_norm": 1.222612568204848,
+      "learning_rate": 0.003,
+      "loss": 4.0526,
+      "step": 9667
+    },
+    {
+      "epoch": 0.09668,
+      "grad_norm": 1.0545430433180965,
+      "learning_rate": 0.003,
+      "loss": 4.0222,
+      "step": 9668
+    },
+    {
+      "epoch": 0.09669,
+      "grad_norm": 1.0956783811426631,
+      "learning_rate": 0.003,
+      "loss": 4.0295,
+      "step": 9669
+    },
+    {
+      "epoch": 0.0967,
+      "grad_norm": 0.9610892180277101,
+      "learning_rate": 0.003,
+      "loss": 4.0449,
+      "step": 9670
+    },
+    {
+      "epoch": 0.09671,
+      "grad_norm": 1.36766207674131,
+      "learning_rate": 0.003,
+      "loss": 4.0319,
+      "step": 9671
+    },
+    {
+      "epoch": 0.09672,
+      "grad_norm": 0.9365680632039836,
+      "learning_rate": 0.003,
+      "loss": 4.0558,
+      "step": 9672
+    },
+    {
+      "epoch": 0.09673,
+      "grad_norm": 1.0029650441296576,
+      "learning_rate": 0.003,
+      "loss": 4.0531,
+      "step": 9673
+    },
+    {
+      "epoch": 0.09674,
+      "grad_norm": 1.1463824116993444,
+      "learning_rate": 0.003,
+      "loss": 4.0862,
+      "step": 9674
+    },
+    {
+      "epoch": 0.09675,
+      "grad_norm": 1.058735123681513,
+      "learning_rate": 0.003,
+      "loss": 4.0194,
+      "step": 9675
+    },
+    {
+      "epoch": 0.09676,
+      "grad_norm": 1.0766125342365733,
+      "learning_rate": 0.003,
+      "loss": 4.0394,
+      "step": 9676
+    },
+    {
+      "epoch": 0.09677,
+      "grad_norm": 1.1203720989393562,
+      "learning_rate": 0.003,
+      "loss": 4.0015,
+      "step": 9677
+    },
+    {
+      "epoch": 0.09678,
+      "grad_norm": 1.0951111296319187,
+      "learning_rate": 0.003,
+      "loss": 4.0651,
+      "step": 9678
+    },
+    {
+      "epoch": 0.09679,
+      "grad_norm": 1.0249113062787711,
+      "learning_rate": 0.003,
+      "loss": 4.051,
+      "step": 9679
+    },
+    {
+      "epoch": 0.0968,
+      "grad_norm": 1.057027117497527,
+      "learning_rate": 0.003,
+      "loss": 4.0427,
+      "step": 9680
+    },
+    {
+      "epoch": 0.09681,
+      "grad_norm": 1.2583816959197518,
+      "learning_rate": 0.003,
+      "loss": 4.0522,
+      "step": 9681
+    },
+    {
+      "epoch": 0.09682,
+      "grad_norm": 1.2002373803491195,
+      "learning_rate": 0.003,
+      "loss": 4.0379,
+      "step": 9682
+    },
+    {
+      "epoch": 0.09683,
+      "grad_norm": 0.9521536845012109,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 9683
+    },
+    {
+      "epoch": 0.09684,
+      "grad_norm": 1.176715675692431,
+      "learning_rate": 0.003,
+      "loss": 4.0581,
+      "step": 9684
+    },
+    {
+      "epoch": 0.09685,
+      "grad_norm": 1.2536000297265582,
+      "learning_rate": 0.003,
+      "loss": 4.0399,
+      "step": 9685
+    },
+    {
+      "epoch": 0.09686,
+      "grad_norm": 1.2657578935031644,
+      "learning_rate": 0.003,
+      "loss": 4.0475,
+      "step": 9686
+    },
+    {
+      "epoch": 0.09687,
+      "grad_norm": 1.045426925577382,
+      "learning_rate": 0.003,
+      "loss": 4.0442,
+      "step": 9687
+    },
+    {
+      "epoch": 0.09688,
+      "grad_norm": 1.1541282081642033,
+      "learning_rate": 0.003,
+      "loss": 4.0731,
+      "step": 9688
+    },
+    {
+      "epoch": 0.09689,
+      "grad_norm": 1.0905131544606357,
+      "learning_rate": 0.003,
+      "loss": 4.0249,
+      "step": 9689
+    },
+    {
+      "epoch": 0.0969,
+      "grad_norm": 1.2174044997009796,
+      "learning_rate": 0.003,
+      "loss": 4.0436,
+      "step": 9690
+    },
+    {
+      "epoch": 0.09691,
+      "grad_norm": 0.9988922116041921,
+      "learning_rate": 0.003,
+      "loss": 4.0481,
+      "step": 9691
+    },
+    {
+      "epoch": 0.09692,
+      "grad_norm": 1.425363077395206,
+      "learning_rate": 0.003,
+      "loss": 4.0585,
+      "step": 9692
+    },
+    {
+      "epoch": 0.09693,
+      "grad_norm": 0.9614291217845489,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 9693
+    },
+    {
+      "epoch": 0.09694,
+      "grad_norm": 1.1263744616665738,
+      "learning_rate": 0.003,
+      "loss": 4.0355,
+      "step": 9694
+    },
+    {
+      "epoch": 0.09695,
+      "grad_norm": 0.9927369724604576,
+      "learning_rate": 0.003,
+      "loss": 4.0415,
+      "step": 9695
+    },
+    {
+      "epoch": 0.09696,
+      "grad_norm": 1.204799634186295,
+      "learning_rate": 0.003,
+      "loss": 4.0299,
+      "step": 9696
+    },
+    {
+      "epoch": 0.09697,
+      "grad_norm": 1.1019920635375835,
+      "learning_rate": 0.003,
+      "loss": 4.0378,
+      "step": 9697
+    },
+    {
+      "epoch": 0.09698,
+      "grad_norm": 1.3083890446725224,
+      "learning_rate": 0.003,
+      "loss": 4.054,
+      "step": 9698
+    },
+    {
+      "epoch": 0.09699,
+      "grad_norm": 0.8673207887854278,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 9699
+    },
+    {
+      "epoch": 0.097,
+      "grad_norm": 1.0937709937276758,
+      "learning_rate": 0.003,
+      "loss": 4.0585,
+      "step": 9700
+    },
+    {
+      "epoch": 0.09701,
+      "grad_norm": 1.1096124083859713,
+      "learning_rate": 0.003,
+      "loss": 4.0336,
+      "step": 9701
+    },
+    {
+      "epoch": 0.09702,
+      "grad_norm": 1.0470332399648397,
+      "learning_rate": 0.003,
+      "loss": 4.0154,
+      "step": 9702
+    },
+    {
+      "epoch": 0.09703,
+      "grad_norm": 1.116033751774405,
+      "learning_rate": 0.003,
+      "loss": 4.0285,
+      "step": 9703
+    },
+    {
+      "epoch": 0.09704,
+      "grad_norm": 1.322938797815828,
+      "learning_rate": 0.003,
+      "loss": 4.0362,
+      "step": 9704
+    },
+    {
+      "epoch": 0.09705,
+      "grad_norm": 1.1674145052754212,
+      "learning_rate": 0.003,
+      "loss": 4.0204,
+      "step": 9705
+    },
+    {
+      "epoch": 0.09706,
+      "grad_norm": 1.080731872467136,
+      "learning_rate": 0.003,
+      "loss": 4.0391,
+      "step": 9706
+    },
+    {
+      "epoch": 0.09707,
+      "grad_norm": 1.0930211810900559,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 9707
+    },
+    {
+      "epoch": 0.09708,
+      "grad_norm": 1.1779520488029591,
+      "learning_rate": 0.003,
+      "loss": 4.0047,
+      "step": 9708
+    },
+    {
+      "epoch": 0.09709,
+      "grad_norm": 1.1874834288052714,
+      "learning_rate": 0.003,
+      "loss": 4.0396,
+      "step": 9709
+    },
+    {
+      "epoch": 0.0971,
+      "grad_norm": 1.070110334968628,
+      "learning_rate": 0.003,
+      "loss": 4.0282,
+      "step": 9710
+    },
+    {
+      "epoch": 0.09711,
+      "grad_norm": 0.9584275425436,
+      "learning_rate": 0.003,
+      "loss": 4.0325,
+      "step": 9711
+    },
+    {
+      "epoch": 0.09712,
+      "grad_norm": 1.2305623246651165,
+      "learning_rate": 0.003,
+      "loss": 4.0779,
+      "step": 9712
+    },
+    {
+      "epoch": 0.09713,
+      "grad_norm": 1.0142149218985719,
+      "learning_rate": 0.003,
+      "loss": 3.9958,
+      "step": 9713
+    },
+    {
+      "epoch": 0.09714,
+      "grad_norm": 1.0721750280658653,
+      "learning_rate": 0.003,
+      "loss": 4.0164,
+      "step": 9714
+    },
+    {
+      "epoch": 0.09715,
+      "grad_norm": 1.0682520259093737,
+      "learning_rate": 0.003,
+      "loss": 4.0407,
+      "step": 9715
+    },
+    {
+      "epoch": 0.09716,
+      "grad_norm": 1.0242254335123158,
+      "learning_rate": 0.003,
+      "loss": 4.0074,
+      "step": 9716
+    },
+    {
+      "epoch": 0.09717,
+      "grad_norm": 1.1149187761150097,
+      "learning_rate": 0.003,
+      "loss": 4.0653,
+      "step": 9717
+    },
+    {
+      "epoch": 0.09718,
+      "grad_norm": 0.9890961402875876,
+      "learning_rate": 0.003,
+      "loss": 4.0325,
+      "step": 9718
+    },
+    {
+      "epoch": 0.09719,
+      "grad_norm": 1.0299139105443698,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 9719
+    },
+    {
+      "epoch": 0.0972,
+      "grad_norm": 1.1558417735093496,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 9720
+    },
+    {
+      "epoch": 0.09721,
+      "grad_norm": 0.9555209453551541,
+      "learning_rate": 0.003,
+      "loss": 4.0562,
+      "step": 9721
+    },
+    {
+      "epoch": 0.09722,
+      "grad_norm": 1.1785724250641767,
+      "learning_rate": 0.003,
+      "loss": 4.038,
+      "step": 9722
+    },
+    {
+      "epoch": 0.09723,
+      "grad_norm": 0.9949897584530741,
+      "learning_rate": 0.003,
+      "loss": 4.0003,
+      "step": 9723
+    },
+    {
+      "epoch": 0.09724,
+      "grad_norm": 1.2568210621473228,
+      "learning_rate": 0.003,
+      "loss": 4.0306,
+      "step": 9724
+    },
+    {
+      "epoch": 0.09725,
+      "grad_norm": 1.332241847275785,
+      "learning_rate": 0.003,
+      "loss": 3.9916,
+      "step": 9725
+    },
+    {
+      "epoch": 0.09726,
+      "grad_norm": 1.2144800862045018,
+      "learning_rate": 0.003,
+      "loss": 4.0489,
+      "step": 9726
+    },
+    {
+      "epoch": 0.09727,
+      "grad_norm": 1.011363287246841,
+      "learning_rate": 0.003,
+      "loss": 4.0351,
+      "step": 9727
+    },
+    {
+      "epoch": 0.09728,
+      "grad_norm": 1.2120085901786424,
+      "learning_rate": 0.003,
+      "loss": 4.0308,
+      "step": 9728
+    },
+    {
+      "epoch": 0.09729,
+      "grad_norm": 0.867741516356202,
+      "learning_rate": 0.003,
+      "loss": 4.0264,
+      "step": 9729
+    },
+    {
+      "epoch": 0.0973,
+      "grad_norm": 0.8878505496669983,
+      "learning_rate": 0.003,
+      "loss": 4.0376,
+      "step": 9730
+    },
+    {
+      "epoch": 0.09731,
+      "grad_norm": 1.0837727849054648,
+      "learning_rate": 0.003,
+      "loss": 3.9926,
+      "step": 9731
+    },
+    {
+      "epoch": 0.09732,
+      "grad_norm": 1.2212446559927213,
+      "learning_rate": 0.003,
+      "loss": 4.043,
+      "step": 9732
+    },
+    {
+      "epoch": 0.09733,
+      "grad_norm": 1.2393726295211063,
+      "learning_rate": 0.003,
+      "loss": 4.0276,
+      "step": 9733
+    },
+    {
+      "epoch": 0.09734,
+      "grad_norm": 1.188214463071944,
+      "learning_rate": 0.003,
+      "loss": 4.0605,
+      "step": 9734
+    },
+    {
+      "epoch": 0.09735,
+      "grad_norm": 0.9872370881140842,
+      "learning_rate": 0.003,
+      "loss": 4.0504,
+      "step": 9735
+    },
+    {
+      "epoch": 0.09736,
+      "grad_norm": 1.2580153206408196,
+      "learning_rate": 0.003,
+      "loss": 4.0333,
+      "step": 9736
+    },
+    {
+      "epoch": 0.09737,
+      "grad_norm": 0.8841844781969095,
+      "learning_rate": 0.003,
+      "loss": 3.9805,
+      "step": 9737
+    },
+    {
+      "epoch": 0.09738,
+      "grad_norm": 0.976812400102225,
+      "learning_rate": 0.003,
+      "loss": 4.0472,
+      "step": 9738
+    },
+    {
+      "epoch": 0.09739,
+      "grad_norm": 1.1508306667326322,
+      "learning_rate": 0.003,
+      "loss": 4.048,
+      "step": 9739
+    },
+    {
+      "epoch": 0.0974,
+      "grad_norm": 1.196271566840404,
+      "learning_rate": 0.003,
+      "loss": 4.0357,
+      "step": 9740
+    },
+    {
+      "epoch": 0.09741,
+      "grad_norm": 1.3082102267631772,
+      "learning_rate": 0.003,
+      "loss": 4.0845,
+      "step": 9741
+    },
+    {
+      "epoch": 0.09742,
+      "grad_norm": 0.8994765241690306,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 9742
+    },
+    {
+      "epoch": 0.09743,
+      "grad_norm": 1.1682198162883546,
+      "learning_rate": 0.003,
+      "loss": 4.0486,
+      "step": 9743
+    },
+    {
+      "epoch": 0.09744,
+      "grad_norm": 1.1469753871425274,
+      "learning_rate": 0.003,
+      "loss": 4.0123,
+      "step": 9744
+    },
+    {
+      "epoch": 0.09745,
+      "grad_norm": 1.0422496922123192,
+      "learning_rate": 0.003,
+      "loss": 4.0645,
+      "step": 9745
+    },
+    {
+      "epoch": 0.09746,
+      "grad_norm": 1.1368341501666346,
+      "learning_rate": 0.003,
+      "loss": 4.0263,
+      "step": 9746
+    },
+    {
+      "epoch": 0.09747,
+      "grad_norm": 1.0608575439585237,
+      "learning_rate": 0.003,
+      "loss": 4.036,
+      "step": 9747
+    },
+    {
+      "epoch": 0.09748,
+      "grad_norm": 1.1013061298997167,
+      "learning_rate": 0.003,
+      "loss": 4.0687,
+      "step": 9748
+    },
+    {
+      "epoch": 0.09749,
+      "grad_norm": 1.0678436420733588,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 9749
+    },
+    {
+      "epoch": 0.0975,
+      "grad_norm": 1.2786918164008214,
+      "learning_rate": 0.003,
+      "loss": 4.0254,
+      "step": 9750
+    },
+    {
+      "epoch": 0.09751,
+      "grad_norm": 1.28463107870462,
+      "learning_rate": 0.003,
+      "loss": 4.0028,
+      "step": 9751
+    },
+    {
+      "epoch": 0.09752,
+      "grad_norm": 1.0428808856437923,
+      "learning_rate": 0.003,
+      "loss": 4.0461,
+      "step": 9752
+    },
+    {
+      "epoch": 0.09753,
+      "grad_norm": 1.192666194400153,
+      "learning_rate": 0.003,
+      "loss": 3.9985,
+      "step": 9753
+    },
+    {
+      "epoch": 0.09754,
+      "grad_norm": 0.9605862543799322,
+      "learning_rate": 0.003,
+      "loss": 4.0428,
+      "step": 9754
+    },
+    {
+      "epoch": 0.09755,
+      "grad_norm": 0.9697232496137559,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 9755
+    },
+    {
+      "epoch": 0.09756,
+      "grad_norm": 1.224040574050268,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 9756
+    },
+    {
+      "epoch": 0.09757,
+      "grad_norm": 1.1631102520521532,
+      "learning_rate": 0.003,
+      "loss": 4.036,
+      "step": 9757
+    },
+    {
+      "epoch": 0.09758,
+      "grad_norm": 1.2505629557746405,
+      "learning_rate": 0.003,
+      "loss": 4.0342,
+      "step": 9758
+    },
+    {
+      "epoch": 0.09759,
+      "grad_norm": 0.9191423900203707,
+      "learning_rate": 0.003,
+      "loss": 4.028,
+      "step": 9759
+    },
+    {
+      "epoch": 0.0976,
+      "grad_norm": 1.1424671614519222,
+      "learning_rate": 0.003,
+      "loss": 4.0381,
+      "step": 9760
+    },
+    {
+      "epoch": 0.09761,
+      "grad_norm": 1.102658611997917,
+      "learning_rate": 0.003,
+      "loss": 4.0292,
+      "step": 9761
+    },
+    {
+      "epoch": 0.09762,
+      "grad_norm": 1.1444051134380946,
+      "learning_rate": 0.003,
+      "loss": 4.0489,
+      "step": 9762
+    },
+    {
+      "epoch": 0.09763,
+      "grad_norm": 0.9264161068395702,
+      "learning_rate": 0.003,
+      "loss": 3.9995,
+      "step": 9763
+    },
+    {
+      "epoch": 0.09764,
+      "grad_norm": 1.1546408453955734,
+      "learning_rate": 0.003,
+      "loss": 4.0017,
+      "step": 9764
+    },
+    {
+      "epoch": 0.09765,
+      "grad_norm": 1.0603790276325908,
+      "learning_rate": 0.003,
+      "loss": 4.0603,
+      "step": 9765
+    },
+    {
+      "epoch": 0.09766,
+      "grad_norm": 1.1751196941995863,
+      "learning_rate": 0.003,
+      "loss": 4.0349,
+      "step": 9766
+    },
+    {
+      "epoch": 0.09767,
+      "grad_norm": 1.0177083794650912,
+      "learning_rate": 0.003,
+      "loss": 4.046,
+      "step": 9767
+    },
+    {
+      "epoch": 0.09768,
+      "grad_norm": 1.273731379095236,
+      "learning_rate": 0.003,
+      "loss": 4.0246,
+      "step": 9768
+    },
+    {
+      "epoch": 0.09769,
+      "grad_norm": 1.0996220193558155,
+      "learning_rate": 0.003,
+      "loss": 4.054,
+      "step": 9769
+    },
+    {
+      "epoch": 0.0977,
+      "grad_norm": 1.0840911902806654,
+      "learning_rate": 0.003,
+      "loss": 4.0522,
+      "step": 9770
+    },
+    {
+      "epoch": 0.09771,
+      "grad_norm": 1.2555935788729693,
+      "learning_rate": 0.003,
+      "loss": 4.0252,
+      "step": 9771
+    },
+    {
+      "epoch": 0.09772,
+      "grad_norm": 1.0357209296089733,
+      "learning_rate": 0.003,
+      "loss": 4.0157,
+      "step": 9772
+    },
+    {
+      "epoch": 0.09773,
+      "grad_norm": 1.1082243796424802,
+      "learning_rate": 0.003,
+      "loss": 4.0361,
+      "step": 9773
+    },
+    {
+      "epoch": 0.09774,
+      "grad_norm": 1.1536368515267816,
+      "learning_rate": 0.003,
+      "loss": 4.0561,
+      "step": 9774
+    },
+    {
+      "epoch": 0.09775,
+      "grad_norm": 1.1867161847229521,
+      "learning_rate": 0.003,
+      "loss": 4.0533,
+      "step": 9775
+    },
+    {
+      "epoch": 0.09776,
+      "grad_norm": 0.9452459057379259,
+      "learning_rate": 0.003,
+      "loss": 4.0194,
+      "step": 9776
+    },
+    {
+      "epoch": 0.09777,
+      "grad_norm": 1.302622907890426,
+      "learning_rate": 0.003,
+      "loss": 4.0577,
+      "step": 9777
+    },
+    {
+      "epoch": 0.09778,
+      "grad_norm": 0.9532508643293588,
+      "learning_rate": 0.003,
+      "loss": 4.0086,
+      "step": 9778
+    },
+    {
+      "epoch": 0.09779,
+      "grad_norm": 1.1242482678635257,
+      "learning_rate": 0.003,
+      "loss": 4.0233,
+      "step": 9779
+    },
+    {
+      "epoch": 0.0978,
+      "grad_norm": 0.9839145612462165,
+      "learning_rate": 0.003,
+      "loss": 4.0225,
+      "step": 9780
+    },
+    {
+      "epoch": 0.09781,
+      "grad_norm": 1.1498232001451654,
+      "learning_rate": 0.003,
+      "loss": 4.0402,
+      "step": 9781
+    },
+    {
+      "epoch": 0.09782,
+      "grad_norm": 1.271609091807339,
+      "learning_rate": 0.003,
+      "loss": 4.0452,
+      "step": 9782
+    },
+    {
+      "epoch": 0.09783,
+      "grad_norm": 1.058335316998838,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 9783
+    },
+    {
+      "epoch": 0.09784,
+      "grad_norm": 1.37604134703034,
+      "learning_rate": 0.003,
+      "loss": 4.0016,
+      "step": 9784
+    },
+    {
+      "epoch": 0.09785,
+      "grad_norm": 1.1945900295219143,
+      "learning_rate": 0.003,
+      "loss": 4.0399,
+      "step": 9785
+    },
+    {
+      "epoch": 0.09786,
+      "grad_norm": 1.1892035335129034,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 9786
+    },
+    {
+      "epoch": 0.09787,
+      "grad_norm": 0.9198710416706617,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 9787
+    },
+    {
+      "epoch": 0.09788,
+      "grad_norm": 1.0963105756254876,
+      "learning_rate": 0.003,
+      "loss": 4.0428,
+      "step": 9788
+    },
+    {
+      "epoch": 0.09789,
+      "grad_norm": 1.0434621221623521,
+      "learning_rate": 0.003,
+      "loss": 4.0109,
+      "step": 9789
+    },
+    {
+      "epoch": 0.0979,
+      "grad_norm": 1.1550849940577819,
+      "learning_rate": 0.003,
+      "loss": 4.0417,
+      "step": 9790
+    },
+    {
+      "epoch": 0.09791,
+      "grad_norm": 1.357987097806882,
+      "learning_rate": 0.003,
+      "loss": 4.0531,
+      "step": 9791
+    },
+    {
+      "epoch": 0.09792,
+      "grad_norm": 1.042044201245543,
+      "learning_rate": 0.003,
+      "loss": 4.0172,
+      "step": 9792
+    },
+    {
+      "epoch": 0.09793,
+      "grad_norm": 1.1919512358344675,
+      "learning_rate": 0.003,
+      "loss": 4.0301,
+      "step": 9793
+    },
+    {
+      "epoch": 0.09794,
+      "grad_norm": 1.2241595549926594,
+      "learning_rate": 0.003,
+      "loss": 4.0452,
+      "step": 9794
+    },
+    {
+      "epoch": 0.09795,
+      "grad_norm": 1.0586631623737053,
+      "learning_rate": 0.003,
+      "loss": 4.0508,
+      "step": 9795
+    },
+    {
+      "epoch": 0.09796,
+      "grad_norm": 1.1914802085441085,
+      "learning_rate": 0.003,
+      "loss": 4.0284,
+      "step": 9796
+    },
+    {
+      "epoch": 0.09797,
+      "grad_norm": 1.1115787515349747,
+      "learning_rate": 0.003,
+      "loss": 4.0345,
+      "step": 9797
+    },
+    {
+      "epoch": 0.09798,
+      "grad_norm": 0.9816749943466317,
+      "learning_rate": 0.003,
+      "loss": 4.0263,
+      "step": 9798
+    },
+    {
+      "epoch": 0.09799,
+      "grad_norm": 1.1660406158959165,
+      "learning_rate": 0.003,
+      "loss": 4.0616,
+      "step": 9799
+    },
+    {
+      "epoch": 0.098,
+      "grad_norm": 1.0709459229028373,
+      "learning_rate": 0.003,
+      "loss": 4.0334,
+      "step": 9800
+    },
+    {
+      "epoch": 0.09801,
+      "grad_norm": 1.1990212956387598,
+      "learning_rate": 0.003,
+      "loss": 4.0449,
+      "step": 9801
+    },
+    {
+      "epoch": 0.09802,
+      "grad_norm": 1.0567665044317882,
+      "learning_rate": 0.003,
+      "loss": 4.0296,
+      "step": 9802
+    },
+    {
+      "epoch": 0.09803,
+      "grad_norm": 1.0778757992404968,
+      "learning_rate": 0.003,
+      "loss": 4.0198,
+      "step": 9803
+    },
+    {
+      "epoch": 0.09804,
+      "grad_norm": 1.0525175964682807,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 9804
+    },
+    {
+      "epoch": 0.09805,
+      "grad_norm": 1.241338187112226,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 9805
+    },
+    {
+      "epoch": 0.09806,
+      "grad_norm": 1.0640857294444321,
+      "learning_rate": 0.003,
+      "loss": 4.0118,
+      "step": 9806
+    },
+    {
+      "epoch": 0.09807,
+      "grad_norm": 1.1456311229706166,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 9807
+    },
+    {
+      "epoch": 0.09808,
+      "grad_norm": 1.1124715383770616,
+      "learning_rate": 0.003,
+      "loss": 4.0605,
+      "step": 9808
+    },
+    {
+      "epoch": 0.09809,
+      "grad_norm": 1.0282435668770187,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 9809
+    },
+    {
+      "epoch": 0.0981,
+      "grad_norm": 1.0398795512010792,
+      "learning_rate": 0.003,
+      "loss": 4.0563,
+      "step": 9810
+    },
+    {
+      "epoch": 0.09811,
+      "grad_norm": 0.9279075100819835,
+      "learning_rate": 0.003,
+      "loss": 4.0531,
+      "step": 9811
+    },
+    {
+      "epoch": 0.09812,
+      "grad_norm": 1.0782186992787437,
+      "learning_rate": 0.003,
+      "loss": 4.0246,
+      "step": 9812
+    },
+    {
+      "epoch": 0.09813,
+      "grad_norm": 1.3085300578739432,
+      "learning_rate": 0.003,
+      "loss": 4.0437,
+      "step": 9813
+    },
+    {
+      "epoch": 0.09814,
+      "grad_norm": 1.1101588548323373,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 9814
+    },
+    {
+      "epoch": 0.09815,
+      "grad_norm": 1.1431391579353671,
+      "learning_rate": 0.003,
+      "loss": 3.9915,
+      "step": 9815
+    },
+    {
+      "epoch": 0.09816,
+      "grad_norm": 1.164627516774537,
+      "learning_rate": 0.003,
+      "loss": 3.9982,
+      "step": 9816
+    },
+    {
+      "epoch": 0.09817,
+      "grad_norm": 1.1241257621100056,
+      "learning_rate": 0.003,
+      "loss": 4.0224,
+      "step": 9817
+    },
+    {
+      "epoch": 0.09818,
+      "grad_norm": 1.1105822418561284,
+      "learning_rate": 0.003,
+      "loss": 4.0456,
+      "step": 9818
+    },
+    {
+      "epoch": 0.09819,
+      "grad_norm": 1.2260148961373039,
+      "learning_rate": 0.003,
+      "loss": 4.046,
+      "step": 9819
+    },
+    {
+      "epoch": 0.0982,
+      "grad_norm": 1.0867720780444274,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 9820
+    },
+    {
+      "epoch": 0.09821,
+      "grad_norm": 0.925875521249806,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 9821
+    },
+    {
+      "epoch": 0.09822,
+      "grad_norm": 1.4487529076721064,
+      "learning_rate": 0.003,
+      "loss": 4.0513,
+      "step": 9822
+    },
+    {
+      "epoch": 0.09823,
+      "grad_norm": 0.8294595735585626,
+      "learning_rate": 0.003,
+      "loss": 4.0496,
+      "step": 9823
+    },
+    {
+      "epoch": 0.09824,
+      "grad_norm": 1.073309157510469,
+      "learning_rate": 0.003,
+      "loss": 4.0428,
+      "step": 9824
+    },
+    {
+      "epoch": 0.09825,
+      "grad_norm": 1.279144134720854,
+      "learning_rate": 0.003,
+      "loss": 4.0372,
+      "step": 9825
+    },
+    {
+      "epoch": 0.09826,
+      "grad_norm": 1.1476433885186508,
+      "learning_rate": 0.003,
+      "loss": 4.0141,
+      "step": 9826
+    },
+    {
+      "epoch": 0.09827,
+      "grad_norm": 1.043838880236461,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 9827
+    },
+    {
+      "epoch": 0.09828,
+      "grad_norm": 1.1968693614717298,
+      "learning_rate": 0.003,
+      "loss": 4.0402,
+      "step": 9828
+    },
+    {
+      "epoch": 0.09829,
+      "grad_norm": 1.0457138129997414,
+      "learning_rate": 0.003,
+      "loss": 4.0275,
+      "step": 9829
+    },
+    {
+      "epoch": 0.0983,
+      "grad_norm": 1.5123833188618667,
+      "learning_rate": 0.003,
+      "loss": 4.0458,
+      "step": 9830
+    },
+    {
+      "epoch": 0.09831,
+      "grad_norm": 0.8711665475933555,
+      "learning_rate": 0.003,
+      "loss": 4.0456,
+      "step": 9831
+    },
+    {
+      "epoch": 0.09832,
+      "grad_norm": 1.144386517076489,
+      "learning_rate": 0.003,
+      "loss": 4.0093,
+      "step": 9832
+    },
+    {
+      "epoch": 0.09833,
+      "grad_norm": 1.246511769111947,
+      "learning_rate": 0.003,
+      "loss": 4.0599,
+      "step": 9833
+    },
+    {
+      "epoch": 0.09834,
+      "grad_norm": 1.13349377739888,
+      "learning_rate": 0.003,
+      "loss": 4.0412,
+      "step": 9834
+    },
+    {
+      "epoch": 0.09835,
+      "grad_norm": 1.0894251468551766,
+      "learning_rate": 0.003,
+      "loss": 4.0385,
+      "step": 9835
+    },
+    {
+      "epoch": 0.09836,
+      "grad_norm": 1.1354298533553584,
+      "learning_rate": 0.003,
+      "loss": 4.0041,
+      "step": 9836
+    },
+    {
+      "epoch": 0.09837,
+      "grad_norm": 1.1000173143853216,
+      "learning_rate": 0.003,
+      "loss": 3.996,
+      "step": 9837
+    },
+    {
+      "epoch": 0.09838,
+      "grad_norm": 1.119210791465233,
+      "learning_rate": 0.003,
+      "loss": 4.0269,
+      "step": 9838
+    },
+    {
+      "epoch": 0.09839,
+      "grad_norm": 0.9524635366093233,
+      "learning_rate": 0.003,
+      "loss": 4.0565,
+      "step": 9839
+    },
+    {
+      "epoch": 0.0984,
+      "grad_norm": 1.0797554846674096,
+      "learning_rate": 0.003,
+      "loss": 4.0297,
+      "step": 9840
+    },
+    {
+      "epoch": 0.09841,
+      "grad_norm": 1.1411790679575085,
+      "learning_rate": 0.003,
+      "loss": 4.045,
+      "step": 9841
+    },
+    {
+      "epoch": 0.09842,
+      "grad_norm": 1.1281791126318816,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 9842
+    },
+    {
+      "epoch": 0.09843,
+      "grad_norm": 0.9134744520495551,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 9843
+    },
+    {
+      "epoch": 0.09844,
+      "grad_norm": 1.0160022299473974,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 9844
+    },
+    {
+      "epoch": 0.09845,
+      "grad_norm": 1.3428743136543202,
+      "learning_rate": 0.003,
+      "loss": 4.0307,
+      "step": 9845
+    },
+    {
+      "epoch": 0.09846,
+      "grad_norm": 1.0189991251253638,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 9846
+    },
+    {
+      "epoch": 0.09847,
+      "grad_norm": 1.1704243872927704,
+      "learning_rate": 0.003,
+      "loss": 4.0374,
+      "step": 9847
+    },
+    {
+      "epoch": 0.09848,
+      "grad_norm": 0.9081125793690473,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 9848
+    },
+    {
+      "epoch": 0.09849,
+      "grad_norm": 1.0336695774661442,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 9849
+    },
+    {
+      "epoch": 0.0985,
+      "grad_norm": 1.185375263926777,
+      "learning_rate": 0.003,
+      "loss": 4.0593,
+      "step": 9850
+    },
+    {
+      "epoch": 0.09851,
+      "grad_norm": 1.1305392937955534,
+      "learning_rate": 0.003,
+      "loss": 4.0613,
+      "step": 9851
+    },
+    {
+      "epoch": 0.09852,
+      "grad_norm": 0.9581592333628471,
+      "learning_rate": 0.003,
+      "loss": 4.0315,
+      "step": 9852
+    },
+    {
+      "epoch": 0.09853,
+      "grad_norm": 1.2114861648250734,
+      "learning_rate": 0.003,
+      "loss": 4.0301,
+      "step": 9853
+    },
+    {
+      "epoch": 0.09854,
+      "grad_norm": 1.0284507718101719,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 9854
+    },
+    {
+      "epoch": 0.09855,
+      "grad_norm": 1.2232319748941225,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 9855
+    },
+    {
+      "epoch": 0.09856,
+      "grad_norm": 1.0855515860790759,
+      "learning_rate": 0.003,
+      "loss": 4.062,
+      "step": 9856
+    },
+    {
+      "epoch": 0.09857,
+      "grad_norm": 1.0753853243294782,
+      "learning_rate": 0.003,
+      "loss": 4.0313,
+      "step": 9857
+    },
+    {
+      "epoch": 0.09858,
+      "grad_norm": 1.206054994842722,
+      "learning_rate": 0.003,
+      "loss": 4.0289,
+      "step": 9858
+    },
+    {
+      "epoch": 0.09859,
+      "grad_norm": 1.098563875256706,
+      "learning_rate": 0.003,
+      "loss": 4.0698,
+      "step": 9859
+    },
+    {
+      "epoch": 0.0986,
+      "grad_norm": 1.1139961945407197,
+      "learning_rate": 0.003,
+      "loss": 4.0267,
+      "step": 9860
+    },
+    {
+      "epoch": 0.09861,
+      "grad_norm": 1.1965232138904764,
+      "learning_rate": 0.003,
+      "loss": 4.0368,
+      "step": 9861
+    },
+    {
+      "epoch": 0.09862,
+      "grad_norm": 1.1112915398055838,
+      "learning_rate": 0.003,
+      "loss": 4.0504,
+      "step": 9862
+    },
+    {
+      "epoch": 0.09863,
+      "grad_norm": 1.1765592866838956,
+      "learning_rate": 0.003,
+      "loss": 4.0581,
+      "step": 9863
+    },
+    {
+      "epoch": 0.09864,
+      "grad_norm": 1.281604965271072,
+      "learning_rate": 0.003,
+      "loss": 4.0622,
+      "step": 9864
+    },
+    {
+      "epoch": 0.09865,
+      "grad_norm": 0.9474747599256009,
+      "learning_rate": 0.003,
+      "loss": 4.0343,
+      "step": 9865
+    },
+    {
+      "epoch": 0.09866,
+      "grad_norm": 1.2526292758671924,
+      "learning_rate": 0.003,
+      "loss": 4.0278,
+      "step": 9866
+    },
+    {
+      "epoch": 0.09867,
+      "grad_norm": 0.9782340542182837,
+      "learning_rate": 0.003,
+      "loss": 4.0073,
+      "step": 9867
+    },
+    {
+      "epoch": 0.09868,
+      "grad_norm": 1.2384674527105715,
+      "learning_rate": 0.003,
+      "loss": 4.0223,
+      "step": 9868
+    },
+    {
+      "epoch": 0.09869,
+      "grad_norm": 1.1272829215817646,
+      "learning_rate": 0.003,
+      "loss": 4.0233,
+      "step": 9869
+    },
+    {
+      "epoch": 0.0987,
+      "grad_norm": 1.0396668913063039,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 9870
+    },
+    {
+      "epoch": 0.09871,
+      "grad_norm": 1.037702753803307,
+      "learning_rate": 0.003,
+      "loss": 4.0904,
+      "step": 9871
+    },
+    {
+      "epoch": 0.09872,
+      "grad_norm": 1.1864414611980871,
+      "learning_rate": 0.003,
+      "loss": 4.0479,
+      "step": 9872
+    },
+    {
+      "epoch": 0.09873,
+      "grad_norm": 1.0076047434777218,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 9873
+    },
+    {
+      "epoch": 0.09874,
+      "grad_norm": 1.1197613528576318,
+      "learning_rate": 0.003,
+      "loss": 4.0555,
+      "step": 9874
+    },
+    {
+      "epoch": 0.09875,
+      "grad_norm": 0.9686098749546355,
+      "learning_rate": 0.003,
+      "loss": 4.0267,
+      "step": 9875
+    },
+    {
+      "epoch": 0.09876,
+      "grad_norm": 1.04278415670397,
+      "learning_rate": 0.003,
+      "loss": 4.0323,
+      "step": 9876
+    },
+    {
+      "epoch": 0.09877,
+      "grad_norm": 1.402750496107436,
+      "learning_rate": 0.003,
+      "loss": 4.0345,
+      "step": 9877
+    },
+    {
+      "epoch": 0.09878,
+      "grad_norm": 1.2330935372023277,
+      "learning_rate": 0.003,
+      "loss": 4.0173,
+      "step": 9878
+    },
+    {
+      "epoch": 0.09879,
+      "grad_norm": 1.025634526463866,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 9879
+    },
+    {
+      "epoch": 0.0988,
+      "grad_norm": 1.0537640601114184,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 9880
+    },
+    {
+      "epoch": 0.09881,
+      "grad_norm": 1.2290027208808174,
+      "learning_rate": 0.003,
+      "loss": 4.0288,
+      "step": 9881
+    },
+    {
+      "epoch": 0.09882,
+      "grad_norm": 0.8930767525332688,
+      "learning_rate": 0.003,
+      "loss": 4.0448,
+      "step": 9882
+    },
+    {
+      "epoch": 0.09883,
+      "grad_norm": 1.0530103820851344,
+      "learning_rate": 0.003,
+      "loss": 4.0304,
+      "step": 9883
+    },
+    {
+      "epoch": 0.09884,
+      "grad_norm": 1.2191490701919565,
+      "learning_rate": 0.003,
+      "loss": 4.0172,
+      "step": 9884
+    },
+    {
+      "epoch": 0.09885,
+      "grad_norm": 1.0218706297933426,
+      "learning_rate": 0.003,
+      "loss": 4.0265,
+      "step": 9885
+    },
+    {
+      "epoch": 0.09886,
+      "grad_norm": 1.150967177807416,
+      "learning_rate": 0.003,
+      "loss": 4.0171,
+      "step": 9886
+    },
+    {
+      "epoch": 0.09887,
+      "grad_norm": 1.129373196803706,
+      "learning_rate": 0.003,
+      "loss": 4.0498,
+      "step": 9887
+    },
+    {
+      "epoch": 0.09888,
+      "grad_norm": 1.149004087106683,
+      "learning_rate": 0.003,
+      "loss": 4.0685,
+      "step": 9888
+    },
+    {
+      "epoch": 0.09889,
+      "grad_norm": 1.2702764689102495,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 9889
+    },
+    {
+      "epoch": 0.0989,
+      "grad_norm": 1.2676550107001645,
+      "learning_rate": 0.003,
+      "loss": 4.0413,
+      "step": 9890
+    },
+    {
+      "epoch": 0.09891,
+      "grad_norm": 1.2615471554776427,
+      "learning_rate": 0.003,
+      "loss": 4.0491,
+      "step": 9891
+    },
+    {
+      "epoch": 0.09892,
+      "grad_norm": 1.1167316738746,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 9892
+    },
+    {
+      "epoch": 0.09893,
+      "grad_norm": 0.9054671662434778,
+      "learning_rate": 0.003,
+      "loss": 4.0314,
+      "step": 9893
+    },
+    {
+      "epoch": 0.09894,
+      "grad_norm": 0.9236484052010723,
+      "learning_rate": 0.003,
+      "loss": 4.0437,
+      "step": 9894
+    },
+    {
+      "epoch": 0.09895,
+      "grad_norm": 1.208985347287342,
+      "learning_rate": 0.003,
+      "loss": 4.0508,
+      "step": 9895
+    },
+    {
+      "epoch": 0.09896,
+      "grad_norm": 1.061049368371955,
+      "learning_rate": 0.003,
+      "loss": 4.0129,
+      "step": 9896
+    },
+    {
+      "epoch": 0.09897,
+      "grad_norm": 1.329238894639678,
+      "learning_rate": 0.003,
+      "loss": 4.0314,
+      "step": 9897
+    },
+    {
+      "epoch": 0.09898,
+      "grad_norm": 0.9855278608052852,
+      "learning_rate": 0.003,
+      "loss": 4.0247,
+      "step": 9898
+    },
+    {
+      "epoch": 0.09899,
+      "grad_norm": 1.1077024624762888,
+      "learning_rate": 0.003,
+      "loss": 4.0213,
+      "step": 9899
+    },
+    {
+      "epoch": 0.099,
+      "grad_norm": 1.206281945713825,
+      "learning_rate": 0.003,
+      "loss": 4.0618,
+      "step": 9900
+    },
+    {
+      "epoch": 0.09901,
+      "grad_norm": 1.1299933707769678,
+      "learning_rate": 0.003,
+      "loss": 4.0394,
+      "step": 9901
+    },
+    {
+      "epoch": 0.09902,
+      "grad_norm": 1.195651589757622,
+      "learning_rate": 0.003,
+      "loss": 4.038,
+      "step": 9902
+    },
+    {
+      "epoch": 0.09903,
+      "grad_norm": 0.9114394972258246,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 9903
+    },
+    {
+      "epoch": 0.09904,
+      "grad_norm": 0.8585107487801131,
+      "learning_rate": 0.003,
+      "loss": 4.0026,
+      "step": 9904
+    },
+    {
+      "epoch": 0.09905,
+      "grad_norm": 0.8579026965268124,
+      "learning_rate": 0.003,
+      "loss": 4.0214,
+      "step": 9905
+    },
+    {
+      "epoch": 0.09906,
+      "grad_norm": 0.9985171119339783,
+      "learning_rate": 0.003,
+      "loss": 4.0377,
+      "step": 9906
+    },
+    {
+      "epoch": 0.09907,
+      "grad_norm": 1.1354189296859807,
+      "learning_rate": 0.003,
+      "loss": 3.9969,
+      "step": 9907
+    },
+    {
+      "epoch": 0.09908,
+      "grad_norm": 1.0976033717968405,
+      "learning_rate": 0.003,
+      "loss": 4.0439,
+      "step": 9908
+    },
+    {
+      "epoch": 0.09909,
+      "grad_norm": 1.1043513109244145,
+      "learning_rate": 0.003,
+      "loss": 4.0425,
+      "step": 9909
+    },
+    {
+      "epoch": 0.0991,
+      "grad_norm": 1.2913134989756772,
+      "learning_rate": 0.003,
+      "loss": 4.0422,
+      "step": 9910
+    },
+    {
+      "epoch": 0.09911,
+      "grad_norm": 1.0885873929556769,
+      "learning_rate": 0.003,
+      "loss": 4.0426,
+      "step": 9911
+    },
+    {
+      "epoch": 0.09912,
+      "grad_norm": 1.439636649962891,
+      "learning_rate": 0.003,
+      "loss": 4.0356,
+      "step": 9912
+    },
+    {
+      "epoch": 0.09913,
+      "grad_norm": 0.9927728387511651,
+      "learning_rate": 0.003,
+      "loss": 4.054,
+      "step": 9913
+    },
+    {
+      "epoch": 0.09914,
+      "grad_norm": 1.224689926683272,
+      "learning_rate": 0.003,
+      "loss": 4.0141,
+      "step": 9914
+    },
+    {
+      "epoch": 0.09915,
+      "grad_norm": 1.0185052815824782,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 9915
+    },
+    {
+      "epoch": 0.09916,
+      "grad_norm": 1.1039015304713733,
+      "learning_rate": 0.003,
+      "loss": 4.047,
+      "step": 9916
+    },
+    {
+      "epoch": 0.09917,
+      "grad_norm": 0.9560283744143093,
+      "learning_rate": 0.003,
+      "loss": 4.0484,
+      "step": 9917
+    },
+    {
+      "epoch": 0.09918,
+      "grad_norm": 1.249009304246015,
+      "learning_rate": 0.003,
+      "loss": 4.0134,
+      "step": 9918
+    },
+    {
+      "epoch": 0.09919,
+      "grad_norm": 1.0276598807835757,
+      "learning_rate": 0.003,
+      "loss": 4.0587,
+      "step": 9919
+    },
+    {
+      "epoch": 0.0992,
+      "grad_norm": 1.4384067882969795,
+      "learning_rate": 0.003,
+      "loss": 4.053,
+      "step": 9920
+    },
+    {
+      "epoch": 0.09921,
+      "grad_norm": 1.0855607508489593,
+      "learning_rate": 0.003,
+      "loss": 3.9945,
+      "step": 9921
+    },
+    {
+      "epoch": 0.09922,
+      "grad_norm": 1.3281753668853509,
+      "learning_rate": 0.003,
+      "loss": 4.0425,
+      "step": 9922
+    },
+    {
+      "epoch": 0.09923,
+      "grad_norm": 1.3210796402306015,
+      "learning_rate": 0.003,
+      "loss": 4.0473,
+      "step": 9923
+    },
+    {
+      "epoch": 0.09924,
+      "grad_norm": 0.9624480778672281,
+      "learning_rate": 0.003,
+      "loss": 4.0023,
+      "step": 9924
+    },
+    {
+      "epoch": 0.09925,
+      "grad_norm": 1.0010419815015703,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 9925
+    },
+    {
+      "epoch": 0.09926,
+      "grad_norm": 1.481720653662113,
+      "learning_rate": 0.003,
+      "loss": 4.0484,
+      "step": 9926
+    },
+    {
+      "epoch": 0.09927,
+      "grad_norm": 0.9737956838613526,
+      "learning_rate": 0.003,
+      "loss": 4.0682,
+      "step": 9927
+    },
+    {
+      "epoch": 0.09928,
+      "grad_norm": 1.0743465025663415,
+      "learning_rate": 0.003,
+      "loss": 4.0279,
+      "step": 9928
+    },
+    {
+      "epoch": 0.09929,
+      "grad_norm": 1.0978928933680199,
+      "learning_rate": 0.003,
+      "loss": 4.0414,
+      "step": 9929
+    },
+    {
+      "epoch": 0.0993,
+      "grad_norm": 1.154918711721139,
+      "learning_rate": 0.003,
+      "loss": 4.0245,
+      "step": 9930
+    },
+    {
+      "epoch": 0.09931,
+      "grad_norm": 1.292932588277131,
+      "learning_rate": 0.003,
+      "loss": 4.0365,
+      "step": 9931
+    },
+    {
+      "epoch": 0.09932,
+      "grad_norm": 1.068387070754382,
+      "learning_rate": 0.003,
+      "loss": 4.0412,
+      "step": 9932
+    },
+    {
+      "epoch": 0.09933,
+      "grad_norm": 1.0193039042669239,
+      "learning_rate": 0.003,
+      "loss": 4.0344,
+      "step": 9933
+    },
+    {
+      "epoch": 0.09934,
+      "grad_norm": 1.0548178768539755,
+      "learning_rate": 0.003,
+      "loss": 4.0425,
+      "step": 9934
+    },
+    {
+      "epoch": 0.09935,
+      "grad_norm": 1.1606839398396906,
+      "learning_rate": 0.003,
+      "loss": 4.0233,
+      "step": 9935
+    },
+    {
+      "epoch": 0.09936,
+      "grad_norm": 1.2692975261466353,
+      "learning_rate": 0.003,
+      "loss": 4.0406,
+      "step": 9936
+    },
+    {
+      "epoch": 0.09937,
+      "grad_norm": 1.0782042799196752,
+      "learning_rate": 0.003,
+      "loss": 4.0072,
+      "step": 9937
+    },
+    {
+      "epoch": 0.09938,
+      "grad_norm": 1.127980051105687,
+      "learning_rate": 0.003,
+      "loss": 4.0448,
+      "step": 9938
+    },
+    {
+      "epoch": 0.09939,
+      "grad_norm": 1.0287441783269569,
+      "learning_rate": 0.003,
+      "loss": 4.0386,
+      "step": 9939
+    },
+    {
+      "epoch": 0.0994,
+      "grad_norm": 1.2132424703891527,
+      "learning_rate": 0.003,
+      "loss": 4.0629,
+      "step": 9940
+    },
+    {
+      "epoch": 0.09941,
+      "grad_norm": 0.9732597157956879,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 9941
+    },
+    {
+      "epoch": 0.09942,
+      "grad_norm": 1.1389068598359773,
+      "learning_rate": 0.003,
+      "loss": 4.0301,
+      "step": 9942
+    },
+    {
+      "epoch": 0.09943,
+      "grad_norm": 1.102208282726071,
+      "learning_rate": 0.003,
+      "loss": 3.9767,
+      "step": 9943
+    },
+    {
+      "epoch": 0.09944,
+      "grad_norm": 1.1127584490971916,
+      "learning_rate": 0.003,
+      "loss": 4.0593,
+      "step": 9944
+    },
+    {
+      "epoch": 0.09945,
+      "grad_norm": 1.1635207514619523,
+      "learning_rate": 0.003,
+      "loss": 3.9994,
+      "step": 9945
+    },
+    {
+      "epoch": 0.09946,
+      "grad_norm": 1.0595853146121823,
+      "learning_rate": 0.003,
+      "loss": 4.0503,
+      "step": 9946
+    },
+    {
+      "epoch": 0.09947,
+      "grad_norm": 1.2408973624096764,
+      "learning_rate": 0.003,
+      "loss": 4.0408,
+      "step": 9947
+    },
+    {
+      "epoch": 0.09948,
+      "grad_norm": 1.2344813666227283,
+      "learning_rate": 0.003,
+      "loss": 4.0459,
+      "step": 9948
+    },
+    {
+      "epoch": 0.09949,
+      "grad_norm": 1.1766030065853175,
+      "learning_rate": 0.003,
+      "loss": 4.0395,
+      "step": 9949
+    },
+    {
+      "epoch": 0.0995,
+      "grad_norm": 1.0973536641516977,
+      "learning_rate": 0.003,
+      "loss": 4.0322,
+      "step": 9950
+    },
+    {
+      "epoch": 0.09951,
+      "grad_norm": 1.0236399693187321,
+      "learning_rate": 0.003,
+      "loss": 4.008,
+      "step": 9951
+    },
+    {
+      "epoch": 0.09952,
+      "grad_norm": 0.9915712705437711,
+      "learning_rate": 0.003,
+      "loss": 3.9982,
+      "step": 9952
+    },
+    {
+      "epoch": 0.09953,
+      "grad_norm": 1.1963753411506206,
+      "learning_rate": 0.003,
+      "loss": 4.0516,
+      "step": 9953
+    },
+    {
+      "epoch": 0.09954,
+      "grad_norm": 1.104880815250887,
+      "learning_rate": 0.003,
+      "loss": 4.0037,
+      "step": 9954
+    },
+    {
+      "epoch": 0.09955,
+      "grad_norm": 1.1708956908608728,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 9955
+    },
+    {
+      "epoch": 0.09956,
+      "grad_norm": 0.9866423739533496,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 9956
+    },
+    {
+      "epoch": 0.09957,
+      "grad_norm": 1.3165503592770573,
+      "learning_rate": 0.003,
+      "loss": 4.0434,
+      "step": 9957
+    },
+    {
+      "epoch": 0.09958,
+      "grad_norm": 0.9536837415813932,
+      "learning_rate": 0.003,
+      "loss": 4.0459,
+      "step": 9958
+    },
+    {
+      "epoch": 0.09959,
+      "grad_norm": 1.3409977002707099,
+      "learning_rate": 0.003,
+      "loss": 4.0601,
+      "step": 9959
+    },
+    {
+      "epoch": 0.0996,
+      "grad_norm": 0.9814138686721887,
+      "learning_rate": 0.003,
+      "loss": 4.0346,
+      "step": 9960
+    },
+    {
+      "epoch": 0.09961,
+      "grad_norm": 1.3803176304264348,
+      "learning_rate": 0.003,
+      "loss": 4.0515,
+      "step": 9961
+    },
+    {
+      "epoch": 0.09962,
+      "grad_norm": 1.1780995573672939,
+      "learning_rate": 0.003,
+      "loss": 4.053,
+      "step": 9962
+    },
+    {
+      "epoch": 0.09963,
+      "grad_norm": 1.0505789057253156,
+      "learning_rate": 0.003,
+      "loss": 4.0475,
+      "step": 9963
+    },
+    {
+      "epoch": 0.09964,
+      "grad_norm": 1.2577481781152686,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 9964
+    },
+    {
+      "epoch": 0.09965,
+      "grad_norm": 0.8750383187391458,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 9965
+    },
+    {
+      "epoch": 0.09966,
+      "grad_norm": 0.9324526260136522,
+      "learning_rate": 0.003,
+      "loss": 4.0416,
+      "step": 9966
+    },
+    {
+      "epoch": 0.09967,
+      "grad_norm": 1.3064826835022834,
+      "learning_rate": 0.003,
+      "loss": 4.0442,
+      "step": 9967
+    },
+    {
+      "epoch": 0.09968,
+      "grad_norm": 1.0488845951972847,
+      "learning_rate": 0.003,
+      "loss": 4.0303,
+      "step": 9968
+    },
+    {
+      "epoch": 0.09969,
+      "grad_norm": 1.2753549754302325,
+      "learning_rate": 0.003,
+      "loss": 4.0362,
+      "step": 9969
+    },
+    {
+      "epoch": 0.0997,
+      "grad_norm": 1.039234075567994,
+      "learning_rate": 0.003,
+      "loss": 4.0232,
+      "step": 9970
+    },
+    {
+      "epoch": 0.09971,
+      "grad_norm": 1.2214075051221316,
+      "learning_rate": 0.003,
+      "loss": 4.035,
+      "step": 9971
+    },
+    {
+      "epoch": 0.09972,
+      "grad_norm": 1.1623111884041941,
+      "learning_rate": 0.003,
+      "loss": 4.0218,
+      "step": 9972
+    },
+    {
+      "epoch": 0.09973,
+      "grad_norm": 1.1088110650963057,
+      "learning_rate": 0.003,
+      "loss": 4.0304,
+      "step": 9973
+    },
+    {
+      "epoch": 0.09974,
+      "grad_norm": 1.0426875423604298,
+      "learning_rate": 0.003,
+      "loss": 4.0263,
+      "step": 9974
+    },
+    {
+      "epoch": 0.09975,
+      "grad_norm": 1.259426553612719,
+      "learning_rate": 0.003,
+      "loss": 4.0416,
+      "step": 9975
+    },
+    {
+      "epoch": 0.09976,
+      "grad_norm": 1.0298690402873982,
+      "learning_rate": 0.003,
+      "loss": 4.0132,
+      "step": 9976
+    },
+    {
+      "epoch": 0.09977,
+      "grad_norm": 1.3610931313558177,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 9977
+    },
+    {
+      "epoch": 0.09978,
+      "grad_norm": 0.9132158967205964,
+      "learning_rate": 0.003,
+      "loss": 4.0121,
+      "step": 9978
+    },
+    {
+      "epoch": 0.09979,
+      "grad_norm": 1.219498068275462,
+      "learning_rate": 0.003,
+      "loss": 4.0316,
+      "step": 9979
+    },
+    {
+      "epoch": 0.0998,
+      "grad_norm": 1.1274470393928808,
+      "learning_rate": 0.003,
+      "loss": 4.0237,
+      "step": 9980
+    },
+    {
+      "epoch": 0.09981,
+      "grad_norm": 1.1231505756772355,
+      "learning_rate": 0.003,
+      "loss": 4.0324,
+      "step": 9981
+    },
+    {
+      "epoch": 0.09982,
+      "grad_norm": 1.1169231509650528,
+      "learning_rate": 0.003,
+      "loss": 4.0359,
+      "step": 9982
+    },
+    {
+      "epoch": 0.09983,
+      "grad_norm": 1.1018331890490325,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 9983
+    },
+    {
+      "epoch": 0.09984,
+      "grad_norm": 1.0714964487278968,
+      "learning_rate": 0.003,
+      "loss": 4.0483,
+      "step": 9984
+    },
+    {
+      "epoch": 0.09985,
+      "grad_norm": 1.1863401109530898,
+      "learning_rate": 0.003,
+      "loss": 4.0459,
+      "step": 9985
+    },
+    {
+      "epoch": 0.09986,
+      "grad_norm": 1.042279602365793,
+      "learning_rate": 0.003,
+      "loss": 4.0465,
+      "step": 9986
+    },
+    {
+      "epoch": 0.09987,
+      "grad_norm": 1.2665835602618778,
+      "learning_rate": 0.003,
+      "loss": 4.0233,
+      "step": 9987
+    },
+    {
+      "epoch": 0.09988,
+      "grad_norm": 0.89345425739561,
+      "learning_rate": 0.003,
+      "loss": 4.0526,
+      "step": 9988
+    },
+    {
+      "epoch": 0.09989,
+      "grad_norm": 1.1833757769860112,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 9989
+    },
+    {
+      "epoch": 0.0999,
+      "grad_norm": 1.2356695097233596,
+      "learning_rate": 0.003,
+      "loss": 4.0352,
+      "step": 9990
+    },
+    {
+      "epoch": 0.09991,
+      "grad_norm": 1.1575667617820786,
+      "learning_rate": 0.003,
+      "loss": 4.0544,
+      "step": 9991
+    },
+    {
+      "epoch": 0.09992,
+      "grad_norm": 1.0547400553533788,
+      "learning_rate": 0.003,
+      "loss": 4.0115,
+      "step": 9992
+    },
+    {
+      "epoch": 0.09993,
+      "grad_norm": 1.1235377413439607,
+      "learning_rate": 0.003,
+      "loss": 4.0291,
+      "step": 9993
+    },
+    {
+      "epoch": 0.09994,
+      "grad_norm": 1.0785414820906185,
+      "learning_rate": 0.003,
+      "loss": 4.044,
+      "step": 9994
+    },
+    {
+      "epoch": 0.09995,
+      "grad_norm": 1.2688086844276698,
+      "learning_rate": 0.003,
+      "loss": 4.0454,
+      "step": 9995
+    },
+    {
+      "epoch": 0.09996,
+      "grad_norm": 0.9896774509795319,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 9996
+    },
+    {
+      "epoch": 0.09997,
+      "grad_norm": 1.4769420952220298,
+      "learning_rate": 0.003,
+      "loss": 4.0404,
+      "step": 9997
+    },
+    {
+      "epoch": 0.09998,
+      "grad_norm": 1.1514423885404736,
+      "learning_rate": 0.003,
+      "loss": 4.0395,
+      "step": 9998
+    },
+    {
+      "epoch": 0.09999,
+      "grad_norm": 0.9975467451325027,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 9999
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 1.0688722481962825,
+      "learning_rate": 0.003,
+      "loss": 4.0401,
+      "step": 10000
+    },
+    {
+      "epoch": 0.10001,
+      "grad_norm": 1.0376217486708665,
+      "learning_rate": 0.003,
+      "loss": 4.0187,
+      "step": 10001
+    },
+    {
+      "epoch": 0.10002,
+      "grad_norm": 1.1705774677906358,
+      "learning_rate": 0.003,
+      "loss": 4.0483,
+      "step": 10002
+    },
+    {
+      "epoch": 0.10003,
+      "grad_norm": 0.9861302809355035,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 10003
+    },
+    {
+      "epoch": 0.10004,
+      "grad_norm": 1.1789334001801655,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 10004
+    },
+    {
+      "epoch": 0.10005,
+      "grad_norm": 1.1989313194786173,
+      "learning_rate": 0.003,
+      "loss": 4.0336,
+      "step": 10005
+    },
+    {
+      "epoch": 0.10006,
+      "grad_norm": 0.9685441038364853,
+      "learning_rate": 0.003,
+      "loss": 4.0312,
+      "step": 10006
+    },
+    {
+      "epoch": 0.10007,
+      "grad_norm": 1.1406363380895599,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 10007
+    },
+    {
+      "epoch": 0.10008,
+      "grad_norm": 1.1401310833512326,
+      "learning_rate": 0.003,
+      "loss": 4.0422,
+      "step": 10008
+    },
+    {
+      "epoch": 0.10009,
+      "grad_norm": 1.2874194973352402,
+      "learning_rate": 0.003,
+      "loss": 4.0303,
+      "step": 10009
+    },
+    {
+      "epoch": 0.1001,
+      "grad_norm": 0.9395473569554981,
+      "learning_rate": 0.003,
+      "loss": 4.0442,
+      "step": 10010
+    },
+    {
+      "epoch": 0.10011,
+      "grad_norm": 1.1533792238419305,
+      "learning_rate": 0.003,
+      "loss": 4.0321,
+      "step": 10011
+    },
+    {
+      "epoch": 0.10012,
+      "grad_norm": 1.1011717780365395,
+      "learning_rate": 0.003,
+      "loss": 4.0344,
+      "step": 10012
+    },
+    {
+      "epoch": 0.10013,
+      "grad_norm": 1.1016950112639643,
+      "learning_rate": 0.003,
+      "loss": 4.0531,
+      "step": 10013
+    },
+    {
+      "epoch": 0.10014,
+      "grad_norm": 1.1527773791821763,
+      "learning_rate": 0.003,
+      "loss": 4.0453,
+      "step": 10014
+    },
+    {
+      "epoch": 0.10015,
+      "grad_norm": 1.1942046889073201,
+      "learning_rate": 0.003,
+      "loss": 4.0643,
+      "step": 10015
+    },
+    {
+      "epoch": 0.10016,
+      "grad_norm": 0.957925342866834,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 10016
+    },
+    {
+      "epoch": 0.10017,
+      "grad_norm": 1.15252875423962,
+      "learning_rate": 0.003,
+      "loss": 4.0451,
+      "step": 10017
+    },
+    {
+      "epoch": 0.10018,
+      "grad_norm": 1.0374379123877617,
+      "learning_rate": 0.003,
+      "loss": 4.0459,
+      "step": 10018
+    },
+    {
+      "epoch": 0.10019,
+      "grad_norm": 1.1794252200816104,
+      "learning_rate": 0.003,
+      "loss": 4.0414,
+      "step": 10019
+    },
+    {
+      "epoch": 0.1002,
+      "grad_norm": 1.5203356193802966,
+      "learning_rate": 0.003,
+      "loss": 4.0191,
+      "step": 10020
+    },
+    {
+      "epoch": 0.10021,
+      "grad_norm": 1.0029056619089192,
+      "learning_rate": 0.003,
+      "loss": 4.0475,
+      "step": 10021
+    },
+    {
+      "epoch": 0.10022,
+      "grad_norm": 1.3936100699293918,
+      "learning_rate": 0.003,
+      "loss": 4.0404,
+      "step": 10022
+    },
+    {
+      "epoch": 0.10023,
+      "grad_norm": 0.9821509836904537,
+      "learning_rate": 0.003,
+      "loss": 4.0392,
+      "step": 10023
+    },
+    {
+      "epoch": 0.10024,
+      "grad_norm": 1.0077875906773899,
+      "learning_rate": 0.003,
+      "loss": 4.0469,
+      "step": 10024
+    },
+    {
+      "epoch": 0.10025,
+      "grad_norm": 1.2413891542340003,
+      "learning_rate": 0.003,
+      "loss": 4.0422,
+      "step": 10025
+    },
+    {
+      "epoch": 0.10026,
+      "grad_norm": 0.9367455732537656,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 10026
+    },
+    {
+      "epoch": 0.10027,
+      "grad_norm": 1.0593921888120654,
+      "learning_rate": 0.003,
+      "loss": 4.0129,
+      "step": 10027
+    },
+    {
+      "epoch": 0.10028,
+      "grad_norm": 1.167695592578088,
+      "learning_rate": 0.003,
+      "loss": 4.0697,
+      "step": 10028
+    },
+    {
+      "epoch": 0.10029,
+      "grad_norm": 1.1890588826725337,
+      "learning_rate": 0.003,
+      "loss": 4.0336,
+      "step": 10029
+    },
+    {
+      "epoch": 0.1003,
+      "grad_norm": 1.1200344528615116,
+      "learning_rate": 0.003,
+      "loss": 4.0434,
+      "step": 10030
+    },
+    {
+      "epoch": 0.10031,
+      "grad_norm": 1.1417105447998874,
+      "learning_rate": 0.003,
+      "loss": 4.0708,
+      "step": 10031
+    },
+    {
+      "epoch": 0.10032,
+      "grad_norm": 1.0185250099652587,
+      "learning_rate": 0.003,
+      "loss": 4.0223,
+      "step": 10032
+    },
+    {
+      "epoch": 0.10033,
+      "grad_norm": 1.370599313867179,
+      "learning_rate": 0.003,
+      "loss": 4.0345,
+      "step": 10033
+    },
+    {
+      "epoch": 0.10034,
+      "grad_norm": 1.1853235683781365,
+      "learning_rate": 0.003,
+      "loss": 4.0207,
+      "step": 10034
+    },
+    {
+      "epoch": 0.10035,
+      "grad_norm": 1.063599029412153,
+      "learning_rate": 0.003,
+      "loss": 4.0266,
+      "step": 10035
+    },
+    {
+      "epoch": 0.10036,
+      "grad_norm": 1.3175134033811957,
+      "learning_rate": 0.003,
+      "loss": 4.0032,
+      "step": 10036
+    },
+    {
+      "epoch": 0.10037,
+      "grad_norm": 1.0960419587396677,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 10037
+    },
+    {
+      "epoch": 0.10038,
+      "grad_norm": 1.0317353100182982,
+      "learning_rate": 0.003,
+      "loss": 4.0501,
+      "step": 10038
+    },
+    {
+      "epoch": 0.10039,
+      "grad_norm": 1.0757103090572837,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 10039
+    },
+    {
+      "epoch": 0.1004,
+      "grad_norm": 1.087521733469096,
+      "learning_rate": 0.003,
+      "loss": 4.0342,
+      "step": 10040
+    },
+    {
+      "epoch": 0.10041,
+      "grad_norm": 1.2084731171411833,
+      "learning_rate": 0.003,
+      "loss": 4.0444,
+      "step": 10041
+    },
+    {
+      "epoch": 0.10042,
+      "grad_norm": 0.9946783035892525,
+      "learning_rate": 0.003,
+      "loss": 4.0609,
+      "step": 10042
+    },
+    {
+      "epoch": 0.10043,
+      "grad_norm": 1.4298803409305711,
+      "learning_rate": 0.003,
+      "loss": 4.0693,
+      "step": 10043
+    },
+    {
+      "epoch": 0.10044,
+      "grad_norm": 0.9427138046048616,
+      "learning_rate": 0.003,
+      "loss": 4.0441,
+      "step": 10044
+    },
+    {
+      "epoch": 0.10045,
+      "grad_norm": 1.1440424909635767,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 10045
+    },
+    {
+      "epoch": 0.10046,
+      "grad_norm": 1.2208645128728925,
+      "learning_rate": 0.003,
+      "loss": 4.0501,
+      "step": 10046
+    },
+    {
+      "epoch": 0.10047,
+      "grad_norm": 1.1520779920124797,
+      "learning_rate": 0.003,
+      "loss": 4.0412,
+      "step": 10047
+    },
+    {
+      "epoch": 0.10048,
+      "grad_norm": 1.1859885677535336,
+      "learning_rate": 0.003,
+      "loss": 4.0402,
+      "step": 10048
+    },
+    {
+      "epoch": 0.10049,
+      "grad_norm": 1.1159990063667586,
+      "learning_rate": 0.003,
+      "loss": 4.0078,
+      "step": 10049
+    },
+    {
+      "epoch": 0.1005,
+      "grad_norm": 1.1289846456525583,
+      "learning_rate": 0.003,
+      "loss": 4.0402,
+      "step": 10050
+    },
+    {
+      "epoch": 0.10051,
+      "grad_norm": 1.113938198451074,
+      "learning_rate": 0.003,
+      "loss": 4.0318,
+      "step": 10051
+    },
+    {
+      "epoch": 0.10052,
+      "grad_norm": 1.108899705969844,
+      "learning_rate": 0.003,
+      "loss": 4.022,
+      "step": 10052
+    },
+    {
+      "epoch": 0.10053,
+      "grad_norm": 1.3756042460285796,
+      "learning_rate": 0.003,
+      "loss": 4.0196,
+      "step": 10053
+    },
+    {
+      "epoch": 0.10054,
+      "grad_norm": 0.9418379581640134,
+      "learning_rate": 0.003,
+      "loss": 4.0423,
+      "step": 10054
+    },
+    {
+      "epoch": 0.10055,
+      "grad_norm": 1.1739862813921886,
+      "learning_rate": 0.003,
+      "loss": 4.0485,
+      "step": 10055
+    },
+    {
+      "epoch": 0.10056,
+      "grad_norm": 0.9555340144478537,
+      "learning_rate": 0.003,
+      "loss": 4.0536,
+      "step": 10056
+    },
+    {
+      "epoch": 0.10057,
+      "grad_norm": 1.1683761393937124,
+      "learning_rate": 0.003,
+      "loss": 4.044,
+      "step": 10057
+    },
+    {
+      "epoch": 0.10058,
+      "grad_norm": 0.9254542092215158,
+      "learning_rate": 0.003,
+      "loss": 4.0366,
+      "step": 10058
+    },
+    {
+      "epoch": 0.10059,
+      "grad_norm": 1.0185610648270182,
+      "learning_rate": 0.003,
+      "loss": 4.0194,
+      "step": 10059
+    },
+    {
+      "epoch": 0.1006,
+      "grad_norm": 1.1877134215630414,
+      "learning_rate": 0.003,
+      "loss": 4.0411,
+      "step": 10060
+    },
+    {
+      "epoch": 0.10061,
+      "grad_norm": 1.1455483826531043,
+      "learning_rate": 0.003,
+      "loss": 4.0172,
+      "step": 10061
+    },
+    {
+      "epoch": 0.10062,
+      "grad_norm": 1.2513080657957403,
+      "learning_rate": 0.003,
+      "loss": 4.0191,
+      "step": 10062
+    },
+    {
+      "epoch": 0.10063,
+      "grad_norm": 1.2124912360540205,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 10063
+    },
+    {
+      "epoch": 0.10064,
+      "grad_norm": 1.1274860749990432,
+      "learning_rate": 0.003,
+      "loss": 4.0321,
+      "step": 10064
+    },
+    {
+      "epoch": 0.10065,
+      "grad_norm": 1.2298310787584599,
+      "learning_rate": 0.003,
+      "loss": 4.0579,
+      "step": 10065
+    },
+    {
+      "epoch": 0.10066,
+      "grad_norm": 0.9103543254343075,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 10066
+    },
+    {
+      "epoch": 0.10067,
+      "grad_norm": 1.0030930500020137,
+      "learning_rate": 0.003,
+      "loss": 4.037,
+      "step": 10067
+    },
+    {
+      "epoch": 0.10068,
+      "grad_norm": 1.2779301791387994,
+      "learning_rate": 0.003,
+      "loss": 4.0386,
+      "step": 10068
+    },
+    {
+      "epoch": 0.10069,
+      "grad_norm": 0.8818316506876382,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 10069
+    },
+    {
+      "epoch": 0.1007,
+      "grad_norm": 0.9710635888596929,
+      "learning_rate": 0.003,
+      "loss": 4.0113,
+      "step": 10070
+    },
+    {
+      "epoch": 0.10071,
+      "grad_norm": 1.2723709892951096,
+      "learning_rate": 0.003,
+      "loss": 4.0307,
+      "step": 10071
+    },
+    {
+      "epoch": 0.10072,
+      "grad_norm": 1.0605681039514947,
+      "learning_rate": 0.003,
+      "loss": 4.0278,
+      "step": 10072
+    },
+    {
+      "epoch": 0.10073,
+      "grad_norm": 1.2166085354095868,
+      "learning_rate": 0.003,
+      "loss": 4.0435,
+      "step": 10073
+    },
+    {
+      "epoch": 0.10074,
+      "grad_norm": 1.2473625383962492,
+      "learning_rate": 0.003,
+      "loss": 4.0126,
+      "step": 10074
+    },
+    {
+      "epoch": 0.10075,
+      "grad_norm": 1.2226275619264804,
+      "learning_rate": 0.003,
+      "loss": 4.0637,
+      "step": 10075
+    },
+    {
+      "epoch": 0.10076,
+      "grad_norm": 1.229825772234811,
+      "learning_rate": 0.003,
+      "loss": 4.0411,
+      "step": 10076
+    },
+    {
+      "epoch": 0.10077,
+      "grad_norm": 0.9456387474821364,
+      "learning_rate": 0.003,
+      "loss": 4.0533,
+      "step": 10077
+    },
+    {
+      "epoch": 0.10078,
+      "grad_norm": 1.1385609146582953,
+      "learning_rate": 0.003,
+      "loss": 4.0091,
+      "step": 10078
+    },
+    {
+      "epoch": 0.10079,
+      "grad_norm": 1.2697352120922631,
+      "learning_rate": 0.003,
+      "loss": 4.0369,
+      "step": 10079
+    },
+    {
+      "epoch": 0.1008,
+      "grad_norm": 1.4025860537648713,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 10080
+    },
+    {
+      "epoch": 0.10081,
+      "grad_norm": 1.0216197686884323,
+      "learning_rate": 0.003,
+      "loss": 4.0515,
+      "step": 10081
+    },
+    {
+      "epoch": 0.10082,
+      "grad_norm": 1.132424109176119,
+      "learning_rate": 0.003,
+      "loss": 4.0318,
+      "step": 10082
+    },
+    {
+      "epoch": 0.10083,
+      "grad_norm": 1.021553444751305,
+      "learning_rate": 0.003,
+      "loss": 4.0412,
+      "step": 10083
+    },
+    {
+      "epoch": 0.10084,
+      "grad_norm": 1.148442579664875,
+      "learning_rate": 0.003,
+      "loss": 4.0007,
+      "step": 10084
+    },
+    {
+      "epoch": 0.10085,
+      "grad_norm": 1.0104285734886607,
+      "learning_rate": 0.003,
+      "loss": 4.0135,
+      "step": 10085
+    },
+    {
+      "epoch": 0.10086,
+      "grad_norm": 1.1379868586708077,
+      "learning_rate": 0.003,
+      "loss": 4.0372,
+      "step": 10086
+    },
+    {
+      "epoch": 0.10087,
+      "grad_norm": 1.186592736125496,
+      "learning_rate": 0.003,
+      "loss": 4.0504,
+      "step": 10087
+    },
+    {
+      "epoch": 0.10088,
+      "grad_norm": 1.0060310853482595,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 10088
+    },
+    {
+      "epoch": 0.10089,
+      "grad_norm": 1.0640640849166343,
+      "learning_rate": 0.003,
+      "loss": 4.0309,
+      "step": 10089
+    },
+    {
+      "epoch": 0.1009,
+      "grad_norm": 1.3560044661047932,
+      "learning_rate": 0.003,
+      "loss": 4.0622,
+      "step": 10090
+    },
+    {
+      "epoch": 0.10091,
+      "grad_norm": 1.1078241142090288,
+      "learning_rate": 0.003,
+      "loss": 4.0276,
+      "step": 10091
+    },
+    {
+      "epoch": 0.10092,
+      "grad_norm": 1.0068291505885563,
+      "learning_rate": 0.003,
+      "loss": 4.0395,
+      "step": 10092
+    },
+    {
+      "epoch": 0.10093,
+      "grad_norm": 1.0599705107020405,
+      "learning_rate": 0.003,
+      "loss": 4.032,
+      "step": 10093
+    },
+    {
+      "epoch": 0.10094,
+      "grad_norm": 1.0876529083055502,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 10094
+    },
+    {
+      "epoch": 0.10095,
+      "grad_norm": 1.0961661282027626,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 10095
+    },
+    {
+      "epoch": 0.10096,
+      "grad_norm": 1.374941789482069,
+      "learning_rate": 0.003,
+      "loss": 4.0361,
+      "step": 10096
+    },
+    {
+      "epoch": 0.10097,
+      "grad_norm": 0.9082696715295778,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 10097
+    },
+    {
+      "epoch": 0.10098,
+      "grad_norm": 1.0771725339779925,
+      "learning_rate": 0.003,
+      "loss": 4.0244,
+      "step": 10098
+    },
+    {
+      "epoch": 0.10099,
+      "grad_norm": 1.0182992509469744,
+      "learning_rate": 0.003,
+      "loss": 4.0269,
+      "step": 10099
+    },
+    {
+      "epoch": 0.101,
+      "grad_norm": 1.1428363685009697,
+      "learning_rate": 0.003,
+      "loss": 4.0449,
+      "step": 10100
+    },
+    {
+      "epoch": 0.10101,
+      "grad_norm": 0.9694039996712733,
+      "learning_rate": 0.003,
+      "loss": 4.0213,
+      "step": 10101
+    },
+    {
+      "epoch": 0.10102,
+      "grad_norm": 1.0472023173402798,
+      "learning_rate": 0.003,
+      "loss": 4.0476,
+      "step": 10102
+    },
+    {
+      "epoch": 0.10103,
+      "grad_norm": 1.312075296718054,
+      "learning_rate": 0.003,
+      "loss": 4.0155,
+      "step": 10103
+    },
+    {
+      "epoch": 0.10104,
+      "grad_norm": 1.069956052695916,
+      "learning_rate": 0.003,
+      "loss": 4.0389,
+      "step": 10104
+    },
+    {
+      "epoch": 0.10105,
+      "grad_norm": 1.2931704494456024,
+      "learning_rate": 0.003,
+      "loss": 4.0378,
+      "step": 10105
+    },
+    {
+      "epoch": 0.10106,
+      "grad_norm": 1.1208723791794828,
+      "learning_rate": 0.003,
+      "loss": 4.0381,
+      "step": 10106
+    },
+    {
+      "epoch": 0.10107,
+      "grad_norm": 1.1935123431071022,
+      "learning_rate": 0.003,
+      "loss": 4.0402,
+      "step": 10107
+    },
+    {
+      "epoch": 0.10108,
+      "grad_norm": 1.1350366451944691,
+      "learning_rate": 0.003,
+      "loss": 4.0467,
+      "step": 10108
+    },
+    {
+      "epoch": 0.10109,
+      "grad_norm": 1.3607305740575226,
+      "learning_rate": 0.003,
+      "loss": 4.019,
+      "step": 10109
+    },
+    {
+      "epoch": 0.1011,
+      "grad_norm": 0.9949435252832233,
+      "learning_rate": 0.003,
+      "loss": 4.0519,
+      "step": 10110
+    },
+    {
+      "epoch": 0.10111,
+      "grad_norm": 1.2015497083429867,
+      "learning_rate": 0.003,
+      "loss": 4.0519,
+      "step": 10111
+    },
+    {
+      "epoch": 0.10112,
+      "grad_norm": 1.0762531833294637,
+      "learning_rate": 0.003,
+      "loss": 4.0285,
+      "step": 10112
+    },
+    {
+      "epoch": 0.10113,
+      "grad_norm": 1.3607307522223708,
+      "learning_rate": 0.003,
+      "loss": 4.0098,
+      "step": 10113
+    },
+    {
+      "epoch": 0.10114,
+      "grad_norm": 0.8372866022166425,
+      "learning_rate": 0.003,
+      "loss": 4.0048,
+      "step": 10114
+    },
+    {
+      "epoch": 0.10115,
+      "grad_norm": 1.169337588013949,
+      "learning_rate": 0.003,
+      "loss": 4.0338,
+      "step": 10115
+    },
+    {
+      "epoch": 0.10116,
+      "grad_norm": 1.1177277939964916,
+      "learning_rate": 0.003,
+      "loss": 4.0275,
+      "step": 10116
+    },
+    {
+      "epoch": 0.10117,
+      "grad_norm": 1.1778175104666457,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 10117
+    },
+    {
+      "epoch": 0.10118,
+      "grad_norm": 1.120506464897047,
+      "learning_rate": 0.003,
+      "loss": 4.0573,
+      "step": 10118
+    },
+    {
+      "epoch": 0.10119,
+      "grad_norm": 1.1474574001683169,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 10119
+    },
+    {
+      "epoch": 0.1012,
+      "grad_norm": 0.8783084850811431,
+      "learning_rate": 0.003,
+      "loss": 4.015,
+      "step": 10120
+    },
+    {
+      "epoch": 0.10121,
+      "grad_norm": 0.9780743863814633,
+      "learning_rate": 0.003,
+      "loss": 4.0344,
+      "step": 10121
+    },
+    {
+      "epoch": 0.10122,
+      "grad_norm": 1.235259685072935,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 10122
+    },
+    {
+      "epoch": 0.10123,
+      "grad_norm": 0.8532144497138836,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 10123
+    },
+    {
+      "epoch": 0.10124,
+      "grad_norm": 1.0588961816914821,
+      "learning_rate": 0.003,
+      "loss": 4.0539,
+      "step": 10124
+    },
+    {
+      "epoch": 0.10125,
+      "grad_norm": 1.0997888688267687,
+      "learning_rate": 0.003,
+      "loss": 4.0438,
+      "step": 10125
+    },
+    {
+      "epoch": 0.10126,
+      "grad_norm": 1.012973351895914,
+      "learning_rate": 0.003,
+      "loss": 4.0218,
+      "step": 10126
+    },
+    {
+      "epoch": 0.10127,
+      "grad_norm": 1.1601701911571694,
+      "learning_rate": 0.003,
+      "loss": 4.0525,
+      "step": 10127
+    },
+    {
+      "epoch": 0.10128,
+      "grad_norm": 0.8661155150916877,
+      "learning_rate": 0.003,
+      "loss": 4.0413,
+      "step": 10128
+    },
+    {
+      "epoch": 0.10129,
+      "grad_norm": 1.01187696037196,
+      "learning_rate": 0.003,
+      "loss": 4.0457,
+      "step": 10129
+    },
+    {
+      "epoch": 0.1013,
+      "grad_norm": 1.377560868899849,
+      "learning_rate": 0.003,
+      "loss": 4.008,
+      "step": 10130
+    },
+    {
+      "epoch": 0.10131,
+      "grad_norm": 1.2225045415187876,
+      "learning_rate": 0.003,
+      "loss": 4.0281,
+      "step": 10131
+    },
+    {
+      "epoch": 0.10132,
+      "grad_norm": 1.0805752380760862,
+      "learning_rate": 0.003,
+      "loss": 4.043,
+      "step": 10132
+    },
+    {
+      "epoch": 0.10133,
+      "grad_norm": 1.1689334091154489,
+      "learning_rate": 0.003,
+      "loss": 4.0215,
+      "step": 10133
+    },
+    {
+      "epoch": 0.10134,
+      "grad_norm": 1.3581198813392088,
+      "learning_rate": 0.003,
+      "loss": 4.019,
+      "step": 10134
+    },
+    {
+      "epoch": 0.10135,
+      "grad_norm": 0.7676340885206456,
+      "learning_rate": 0.003,
+      "loss": 4.0264,
+      "step": 10135
+    },
+    {
+      "epoch": 0.10136,
+      "grad_norm": 1.0234587289832506,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 10136
+    },
+    {
+      "epoch": 0.10137,
+      "grad_norm": 1.301339315671966,
+      "learning_rate": 0.003,
+      "loss": 4.0251,
+      "step": 10137
+    },
+    {
+      "epoch": 0.10138,
+      "grad_norm": 0.9786141586414955,
+      "learning_rate": 0.003,
+      "loss": 4.0333,
+      "step": 10138
+    },
+    {
+      "epoch": 0.10139,
+      "grad_norm": 1.227946401625274,
+      "learning_rate": 0.003,
+      "loss": 4.0004,
+      "step": 10139
+    },
+    {
+      "epoch": 0.1014,
+      "grad_norm": 1.1398058430097253,
+      "learning_rate": 0.003,
+      "loss": 4.0602,
+      "step": 10140
+    },
+    {
+      "epoch": 0.10141,
+      "grad_norm": 1.2511523411004795,
+      "learning_rate": 0.003,
+      "loss": 4.0254,
+      "step": 10141
+    },
+    {
+      "epoch": 0.10142,
+      "grad_norm": 1.0530250525908387,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 10142
+    },
+    {
+      "epoch": 0.10143,
+      "grad_norm": 1.2429388861453106,
+      "learning_rate": 0.003,
+      "loss": 4.0149,
+      "step": 10143
+    },
+    {
+      "epoch": 0.10144,
+      "grad_norm": 0.8865877007140897,
+      "learning_rate": 0.003,
+      "loss": 4.0396,
+      "step": 10144
+    },
+    {
+      "epoch": 0.10145,
+      "grad_norm": 1.0163852178819597,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 10145
+    },
+    {
+      "epoch": 0.10146,
+      "grad_norm": 1.3657614044980302,
+      "learning_rate": 0.003,
+      "loss": 4.0142,
+      "step": 10146
+    },
+    {
+      "epoch": 0.10147,
+      "grad_norm": 0.9950226203446018,
+      "learning_rate": 0.003,
+      "loss": 4.036,
+      "step": 10147
+    },
+    {
+      "epoch": 0.10148,
+      "grad_norm": 1.1980345153961933,
+      "learning_rate": 0.003,
+      "loss": 4.0368,
+      "step": 10148
+    },
+    {
+      "epoch": 0.10149,
+      "grad_norm": 1.019226564296152,
+      "learning_rate": 0.003,
+      "loss": 3.9982,
+      "step": 10149
+    },
+    {
+      "epoch": 0.1015,
+      "grad_norm": 0.965993025404742,
+      "learning_rate": 0.003,
+      "loss": 4.0191,
+      "step": 10150
+    },
+    {
+      "epoch": 0.10151,
+      "grad_norm": 1.3679875752682615,
+      "learning_rate": 0.003,
+      "loss": 4.0387,
+      "step": 10151
+    },
+    {
+      "epoch": 0.10152,
+      "grad_norm": 1.0149437487586224,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 10152
+    },
+    {
+      "epoch": 0.10153,
+      "grad_norm": 1.3162415560624259,
+      "learning_rate": 0.003,
+      "loss": 4.0441,
+      "step": 10153
+    },
+    {
+      "epoch": 0.10154,
+      "grad_norm": 1.0889175982737318,
+      "learning_rate": 0.003,
+      "loss": 4.0451,
+      "step": 10154
+    },
+    {
+      "epoch": 0.10155,
+      "grad_norm": 1.108256381768691,
+      "learning_rate": 0.003,
+      "loss": 4.0396,
+      "step": 10155
+    },
+    {
+      "epoch": 0.10156,
+      "grad_norm": 1.0449850400339091,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 10156
+    },
+    {
+      "epoch": 0.10157,
+      "grad_norm": 1.2857069143842972,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 10157
+    },
+    {
+      "epoch": 0.10158,
+      "grad_norm": 0.96497871026518,
+      "learning_rate": 0.003,
+      "loss": 4.0324,
+      "step": 10158
+    },
+    {
+      "epoch": 0.10159,
+      "grad_norm": 1.2412464612862566,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 10159
+    },
+    {
+      "epoch": 0.1016,
+      "grad_norm": 1.0649011421878327,
+      "learning_rate": 0.003,
+      "loss": 4.0416,
+      "step": 10160
+    },
+    {
+      "epoch": 0.10161,
+      "grad_norm": 1.3828387996970612,
+      "learning_rate": 0.003,
+      "loss": 4.0232,
+      "step": 10161
+    },
+    {
+      "epoch": 0.10162,
+      "grad_norm": 1.0268495952079426,
+      "learning_rate": 0.003,
+      "loss": 4.0318,
+      "step": 10162
+    },
+    {
+      "epoch": 0.10163,
+      "grad_norm": 1.2209399315758185,
+      "learning_rate": 0.003,
+      "loss": 4.0362,
+      "step": 10163
+    },
+    {
+      "epoch": 0.10164,
+      "grad_norm": 1.0382424289978558,
+      "learning_rate": 0.003,
+      "loss": 4.0519,
+      "step": 10164
+    },
+    {
+      "epoch": 0.10165,
+      "grad_norm": 1.1431393692734733,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 10165
+    },
+    {
+      "epoch": 0.10166,
+      "grad_norm": 1.025676794642058,
+      "learning_rate": 0.003,
+      "loss": 4.0431,
+      "step": 10166
+    },
+    {
+      "epoch": 0.10167,
+      "grad_norm": 1.0773528056179977,
+      "learning_rate": 0.003,
+      "loss": 4.0345,
+      "step": 10167
+    },
+    {
+      "epoch": 0.10168,
+      "grad_norm": 0.9799372640648593,
+      "learning_rate": 0.003,
+      "loss": 4.0016,
+      "step": 10168
+    },
+    {
+      "epoch": 0.10169,
+      "grad_norm": 1.3573925168119307,
+      "learning_rate": 0.003,
+      "loss": 4.0399,
+      "step": 10169
+    },
+    {
+      "epoch": 0.1017,
+      "grad_norm": 1.069207482050234,
+      "learning_rate": 0.003,
+      "loss": 4.0379,
+      "step": 10170
+    },
+    {
+      "epoch": 0.10171,
+      "grad_norm": 1.1246476134224932,
+      "learning_rate": 0.003,
+      "loss": 4.0173,
+      "step": 10171
+    },
+    {
+      "epoch": 0.10172,
+      "grad_norm": 1.0443602726524843,
+      "learning_rate": 0.003,
+      "loss": 4.0431,
+      "step": 10172
+    },
+    {
+      "epoch": 0.10173,
+      "grad_norm": 1.2312653223111745,
+      "learning_rate": 0.003,
+      "loss": 4.0441,
+      "step": 10173
+    },
+    {
+      "epoch": 0.10174,
+      "grad_norm": 1.1122257381493827,
+      "learning_rate": 0.003,
+      "loss": 4.024,
+      "step": 10174
+    },
+    {
+      "epoch": 0.10175,
+      "grad_norm": 1.3749066864648898,
+      "learning_rate": 0.003,
+      "loss": 4.0258,
+      "step": 10175
+    },
+    {
+      "epoch": 0.10176,
+      "grad_norm": 0.9339307181884096,
+      "learning_rate": 0.003,
+      "loss": 4.031,
+      "step": 10176
+    },
+    {
+      "epoch": 0.10177,
+      "grad_norm": 0.8986097272545992,
+      "learning_rate": 0.003,
+      "loss": 4.0191,
+      "step": 10177
+    },
+    {
+      "epoch": 0.10178,
+      "grad_norm": 0.9946058914616095,
+      "learning_rate": 0.003,
+      "loss": 4.035,
+      "step": 10178
+    },
+    {
+      "epoch": 0.10179,
+      "grad_norm": 1.499674078146875,
+      "learning_rate": 0.003,
+      "loss": 4.0478,
+      "step": 10179
+    },
+    {
+      "epoch": 0.1018,
+      "grad_norm": 0.9496966180142791,
+      "learning_rate": 0.003,
+      "loss": 4.0294,
+      "step": 10180
+    },
+    {
+      "epoch": 0.10181,
+      "grad_norm": 1.311618816110452,
+      "learning_rate": 0.003,
+      "loss": 4.041,
+      "step": 10181
+    },
+    {
+      "epoch": 0.10182,
+      "grad_norm": 1.0708634479960368,
+      "learning_rate": 0.003,
+      "loss": 4.049,
+      "step": 10182
+    },
+    {
+      "epoch": 0.10183,
+      "grad_norm": 1.1113870081016295,
+      "learning_rate": 0.003,
+      "loss": 4.0503,
+      "step": 10183
+    },
+    {
+      "epoch": 0.10184,
+      "grad_norm": 1.1178353224836102,
+      "learning_rate": 0.003,
+      "loss": 4.0584,
+      "step": 10184
+    },
+    {
+      "epoch": 0.10185,
+      "grad_norm": 1.1689044556545598,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 10185
+    },
+    {
+      "epoch": 0.10186,
+      "grad_norm": 1.419581610460463,
+      "learning_rate": 0.003,
+      "loss": 4.052,
+      "step": 10186
+    },
+    {
+      "epoch": 0.10187,
+      "grad_norm": 1.1733245395221399,
+      "learning_rate": 0.003,
+      "loss": 4.0332,
+      "step": 10187
+    },
+    {
+      "epoch": 0.10188,
+      "grad_norm": 0.9580355128633431,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 10188
+    },
+    {
+      "epoch": 0.10189,
+      "grad_norm": 1.1722566739996738,
+      "learning_rate": 0.003,
+      "loss": 3.9995,
+      "step": 10189
+    },
+    {
+      "epoch": 0.1019,
+      "grad_norm": 1.1612042034180012,
+      "learning_rate": 0.003,
+      "loss": 4.0377,
+      "step": 10190
+    },
+    {
+      "epoch": 0.10191,
+      "grad_norm": 1.195329589772822,
+      "learning_rate": 0.003,
+      "loss": 4.0465,
+      "step": 10191
+    },
+    {
+      "epoch": 0.10192,
+      "grad_norm": 1.0846595164302155,
+      "learning_rate": 0.003,
+      "loss": 4.0241,
+      "step": 10192
+    },
+    {
+      "epoch": 0.10193,
+      "grad_norm": 1.1488860249777917,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 10193
+    },
+    {
+      "epoch": 0.10194,
+      "grad_norm": 1.1895044545007407,
+      "learning_rate": 0.003,
+      "loss": 4.046,
+      "step": 10194
+    },
+    {
+      "epoch": 0.10195,
+      "grad_norm": 0.9769595430999226,
+      "learning_rate": 0.003,
+      "loss": 4.0428,
+      "step": 10195
+    },
+    {
+      "epoch": 0.10196,
+      "grad_norm": 1.1017689182766344,
+      "learning_rate": 0.003,
+      "loss": 4.0408,
+      "step": 10196
+    },
+    {
+      "epoch": 0.10197,
+      "grad_norm": 1.0793074132780305,
+      "learning_rate": 0.003,
+      "loss": 4.0393,
+      "step": 10197
+    },
+    {
+      "epoch": 0.10198,
+      "grad_norm": 1.0506607464798967,
+      "learning_rate": 0.003,
+      "loss": 4.0556,
+      "step": 10198
+    },
+    {
+      "epoch": 0.10199,
+      "grad_norm": 1.1837831773812761,
+      "learning_rate": 0.003,
+      "loss": 4.0353,
+      "step": 10199
+    },
+    {
+      "epoch": 0.102,
+      "grad_norm": 1.076073379069326,
+      "learning_rate": 0.003,
+      "loss": 4.0261,
+      "step": 10200
+    },
+    {
+      "epoch": 0.10201,
+      "grad_norm": 1.0825848802681943,
+      "learning_rate": 0.003,
+      "loss": 4.0329,
+      "step": 10201
+    },
+    {
+      "epoch": 0.10202,
+      "grad_norm": 1.3241527514135416,
+      "learning_rate": 0.003,
+      "loss": 4.0058,
+      "step": 10202
+    },
+    {
+      "epoch": 0.10203,
+      "grad_norm": 1.1724175197204518,
+      "learning_rate": 0.003,
+      "loss": 4.0214,
+      "step": 10203
+    },
+    {
+      "epoch": 0.10204,
+      "grad_norm": 1.3185840871244239,
+      "learning_rate": 0.003,
+      "loss": 4.057,
+      "step": 10204
+    },
+    {
+      "epoch": 0.10205,
+      "grad_norm": 0.9734256552070326,
+      "learning_rate": 0.003,
+      "loss": 4.0289,
+      "step": 10205
+    },
+    {
+      "epoch": 0.10206,
+      "grad_norm": 1.1022386726905078,
+      "learning_rate": 0.003,
+      "loss": 4.0407,
+      "step": 10206
+    },
+    {
+      "epoch": 0.10207,
+      "grad_norm": 1.173133415900628,
+      "learning_rate": 0.003,
+      "loss": 4.0333,
+      "step": 10207
+    },
+    {
+      "epoch": 0.10208,
+      "grad_norm": 1.1305215932951866,
+      "learning_rate": 0.003,
+      "loss": 3.9987,
+      "step": 10208
+    },
+    {
+      "epoch": 0.10209,
+      "grad_norm": 1.190358205023772,
+      "learning_rate": 0.003,
+      "loss": 4.0481,
+      "step": 10209
+    },
+    {
+      "epoch": 0.1021,
+      "grad_norm": 1.065896145776302,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 10210
+    },
+    {
+      "epoch": 0.10211,
+      "grad_norm": 1.3102703273182168,
+      "learning_rate": 0.003,
+      "loss": 4.0593,
+      "step": 10211
+    },
+    {
+      "epoch": 0.10212,
+      "grad_norm": 0.9660207653575211,
+      "learning_rate": 0.003,
+      "loss": 4.0301,
+      "step": 10212
+    },
+    {
+      "epoch": 0.10213,
+      "grad_norm": 0.9543652414333238,
+      "learning_rate": 0.003,
+      "loss": 4.0047,
+      "step": 10213
+    },
+    {
+      "epoch": 0.10214,
+      "grad_norm": 1.036805427205904,
+      "learning_rate": 0.003,
+      "loss": 4.0368,
+      "step": 10214
+    },
+    {
+      "epoch": 0.10215,
+      "grad_norm": 1.057033327613356,
+      "learning_rate": 0.003,
+      "loss": 4.0487,
+      "step": 10215
+    },
+    {
+      "epoch": 0.10216,
+      "grad_norm": 1.134350884528531,
+      "learning_rate": 0.003,
+      "loss": 4.043,
+      "step": 10216
+    },
+    {
+      "epoch": 0.10217,
+      "grad_norm": 1.1648146450494354,
+      "learning_rate": 0.003,
+      "loss": 4.0283,
+      "step": 10217
+    },
+    {
+      "epoch": 0.10218,
+      "grad_norm": 1.4506285684005091,
+      "learning_rate": 0.003,
+      "loss": 4.0673,
+      "step": 10218
+    },
+    {
+      "epoch": 0.10219,
+      "grad_norm": 0.9222728969038609,
+      "learning_rate": 0.003,
+      "loss": 4.0009,
+      "step": 10219
+    },
+    {
+      "epoch": 0.1022,
+      "grad_norm": 1.0654406081445127,
+      "learning_rate": 0.003,
+      "loss": 4.0343,
+      "step": 10220
+    },
+    {
+      "epoch": 0.10221,
+      "grad_norm": 1.252155041126181,
+      "learning_rate": 0.003,
+      "loss": 4.0536,
+      "step": 10221
+    },
+    {
+      "epoch": 0.10222,
+      "grad_norm": 1.206778847995836,
+      "learning_rate": 0.003,
+      "loss": 4.0386,
+      "step": 10222
+    },
+    {
+      "epoch": 0.10223,
+      "grad_norm": 1.2444900182503111,
+      "learning_rate": 0.003,
+      "loss": 4.0467,
+      "step": 10223
+    },
+    {
+      "epoch": 0.10224,
+      "grad_norm": 1.040598030326006,
+      "learning_rate": 0.003,
+      "loss": 4.0383,
+      "step": 10224
+    },
+    {
+      "epoch": 0.10225,
+      "grad_norm": 1.4878752264835577,
+      "learning_rate": 0.003,
+      "loss": 4.04,
+      "step": 10225
+    },
+    {
+      "epoch": 0.10226,
+      "grad_norm": 0.9402450092696958,
+      "learning_rate": 0.003,
+      "loss": 4.0818,
+      "step": 10226
+    },
+    {
+      "epoch": 0.10227,
+      "grad_norm": 1.2884407987069295,
+      "learning_rate": 0.003,
+      "loss": 4.0451,
+      "step": 10227
+    },
+    {
+      "epoch": 0.10228,
+      "grad_norm": 1.1522519965955926,
+      "learning_rate": 0.003,
+      "loss": 4.0548,
+      "step": 10228
+    },
+    {
+      "epoch": 0.10229,
+      "grad_norm": 1.0468946409066122,
+      "learning_rate": 0.003,
+      "loss": 4.0149,
+      "step": 10229
+    },
+    {
+      "epoch": 0.1023,
+      "grad_norm": 1.2964574881601119,
+      "learning_rate": 0.003,
+      "loss": 4.0458,
+      "step": 10230
+    },
+    {
+      "epoch": 0.10231,
+      "grad_norm": 1.24484224526044,
+      "learning_rate": 0.003,
+      "loss": 4.0436,
+      "step": 10231
+    },
+    {
+      "epoch": 0.10232,
+      "grad_norm": 1.0853521484203787,
+      "learning_rate": 0.003,
+      "loss": 4.0437,
+      "step": 10232
+    },
+    {
+      "epoch": 0.10233,
+      "grad_norm": 1.065069643441235,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 10233
+    },
+    {
+      "epoch": 0.10234,
+      "grad_norm": 1.1844230860579215,
+      "learning_rate": 0.003,
+      "loss": 4.0521,
+      "step": 10234
+    },
+    {
+      "epoch": 0.10235,
+      "grad_norm": 1.1858854955074398,
+      "learning_rate": 0.003,
+      "loss": 4.0345,
+      "step": 10235
+    },
+    {
+      "epoch": 0.10236,
+      "grad_norm": 1.155823762579354,
+      "learning_rate": 0.003,
+      "loss": 4.0689,
+      "step": 10236
+    },
+    {
+      "epoch": 0.10237,
+      "grad_norm": 1.259243563598782,
+      "learning_rate": 0.003,
+      "loss": 4.032,
+      "step": 10237
+    },
+    {
+      "epoch": 0.10238,
+      "grad_norm": 1.021888249686767,
+      "learning_rate": 0.003,
+      "loss": 4.0249,
+      "step": 10238
+    },
+    {
+      "epoch": 0.10239,
+      "grad_norm": 1.2726229230148722,
+      "learning_rate": 0.003,
+      "loss": 4.0704,
+      "step": 10239
+    },
+    {
+      "epoch": 0.1024,
+      "grad_norm": 0.8686221007981468,
+      "learning_rate": 0.003,
+      "loss": 3.9857,
+      "step": 10240
+    },
+    {
+      "epoch": 0.10241,
+      "grad_norm": 1.0195922987283028,
+      "learning_rate": 0.003,
+      "loss": 4.0172,
+      "step": 10241
+    },
+    {
+      "epoch": 0.10242,
+      "grad_norm": 1.4977495826980316,
+      "learning_rate": 0.003,
+      "loss": 4.0352,
+      "step": 10242
+    },
+    {
+      "epoch": 0.10243,
+      "grad_norm": 0.8331453442599753,
+      "learning_rate": 0.003,
+      "loss": 4.0384,
+      "step": 10243
+    },
+    {
+      "epoch": 0.10244,
+      "grad_norm": 0.9904099892605175,
+      "learning_rate": 0.003,
+      "loss": 4.0277,
+      "step": 10244
+    },
+    {
+      "epoch": 0.10245,
+      "grad_norm": 1.1062252978239564,
+      "learning_rate": 0.003,
+      "loss": 4.0142,
+      "step": 10245
+    },
+    {
+      "epoch": 0.10246,
+      "grad_norm": 1.2991428997983552,
+      "learning_rate": 0.003,
+      "loss": 4.0641,
+      "step": 10246
+    },
+    {
+      "epoch": 0.10247,
+      "grad_norm": 1.017899165985093,
+      "learning_rate": 0.003,
+      "loss": 4.0616,
+      "step": 10247
+    },
+    {
+      "epoch": 0.10248,
+      "grad_norm": 1.1832127454060877,
+      "learning_rate": 0.003,
+      "loss": 4.0553,
+      "step": 10248
+    },
+    {
+      "epoch": 0.10249,
+      "grad_norm": 1.1079456008025137,
+      "learning_rate": 0.003,
+      "loss": 4.0389,
+      "step": 10249
+    },
+    {
+      "epoch": 0.1025,
+      "grad_norm": 1.3362106747505438,
+      "learning_rate": 0.003,
+      "loss": 4.0511,
+      "step": 10250
+    },
+    {
+      "epoch": 0.10251,
+      "grad_norm": 1.1117749161341806,
+      "learning_rate": 0.003,
+      "loss": 4.0348,
+      "step": 10251
+    },
+    {
+      "epoch": 0.10252,
+      "grad_norm": 1.2698367881846497,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 10252
+    },
+    {
+      "epoch": 0.10253,
+      "grad_norm": 1.149957054689331,
+      "learning_rate": 0.003,
+      "loss": 4.0384,
+      "step": 10253
+    },
+    {
+      "epoch": 0.10254,
+      "grad_norm": 0.9917763597068653,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 10254
+    },
+    {
+      "epoch": 0.10255,
+      "grad_norm": 1.1692203345741474,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 10255
+    },
+    {
+      "epoch": 0.10256,
+      "grad_norm": 1.084143742321343,
+      "learning_rate": 0.003,
+      "loss": 4.0254,
+      "step": 10256
+    },
+    {
+      "epoch": 0.10257,
+      "grad_norm": 1.2380367761444084,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 10257
+    },
+    {
+      "epoch": 0.10258,
+      "grad_norm": 1.1407215637889365,
+      "learning_rate": 0.003,
+      "loss": 4.0301,
+      "step": 10258
+    },
+    {
+      "epoch": 0.10259,
+      "grad_norm": 1.1517658724662878,
+      "learning_rate": 0.003,
+      "loss": 4.0263,
+      "step": 10259
+    },
+    {
+      "epoch": 0.1026,
+      "grad_norm": 1.025390717700848,
+      "learning_rate": 0.003,
+      "loss": 4.0351,
+      "step": 10260
+    },
+    {
+      "epoch": 0.10261,
+      "grad_norm": 1.2024437713037104,
+      "learning_rate": 0.003,
+      "loss": 4.0343,
+      "step": 10261
+    },
+    {
+      "epoch": 0.10262,
+      "grad_norm": 1.0958713174379848,
+      "learning_rate": 0.003,
+      "loss": 4.038,
+      "step": 10262
+    },
+    {
+      "epoch": 0.10263,
+      "grad_norm": 0.8765257399245001,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 10263
+    },
+    {
+      "epoch": 0.10264,
+      "grad_norm": 0.9633993597005445,
+      "learning_rate": 0.003,
+      "loss": 4.0181,
+      "step": 10264
+    },
+    {
+      "epoch": 0.10265,
+      "grad_norm": 1.35821204523184,
+      "learning_rate": 0.003,
+      "loss": 4.0345,
+      "step": 10265
+    },
+    {
+      "epoch": 0.10266,
+      "grad_norm": 1.0219625008871738,
+      "learning_rate": 0.003,
+      "loss": 4.0236,
+      "step": 10266
+    },
+    {
+      "epoch": 0.10267,
+      "grad_norm": 1.2227874326162127,
+      "learning_rate": 0.003,
+      "loss": 4.0615,
+      "step": 10267
+    },
+    {
+      "epoch": 0.10268,
+      "grad_norm": 0.978586011050389,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 10268
+    },
+    {
+      "epoch": 0.10269,
+      "grad_norm": 1.3366116601016778,
+      "learning_rate": 0.003,
+      "loss": 4.0391,
+      "step": 10269
+    },
+    {
+      "epoch": 0.1027,
+      "grad_norm": 1.203267489591858,
+      "learning_rate": 0.003,
+      "loss": 4.0403,
+      "step": 10270
+    },
+    {
+      "epoch": 0.10271,
+      "grad_norm": 1.098001417832544,
+      "learning_rate": 0.003,
+      "loss": 4.05,
+      "step": 10271
+    },
+    {
+      "epoch": 0.10272,
+      "grad_norm": 1.0307246792918792,
+      "learning_rate": 0.003,
+      "loss": 4.0118,
+      "step": 10272
+    },
+    {
+      "epoch": 0.10273,
+      "grad_norm": 1.3053322880296834,
+      "learning_rate": 0.003,
+      "loss": 4.0553,
+      "step": 10273
+    },
+    {
+      "epoch": 0.10274,
+      "grad_norm": 1.1385323131916913,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 10274
+    },
+    {
+      "epoch": 0.10275,
+      "grad_norm": 1.1261509616015806,
+      "learning_rate": 0.003,
+      "loss": 4.0538,
+      "step": 10275
+    },
+    {
+      "epoch": 0.10276,
+      "grad_norm": 1.2868774897178286,
+      "learning_rate": 0.003,
+      "loss": 4.0471,
+      "step": 10276
+    },
+    {
+      "epoch": 0.10277,
+      "grad_norm": 1.0360335060829537,
+      "learning_rate": 0.003,
+      "loss": 4.0297,
+      "step": 10277
+    },
+    {
+      "epoch": 0.10278,
+      "grad_norm": 1.051397255526473,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 10278
+    },
+    {
+      "epoch": 0.10279,
+      "grad_norm": 1.1689181356787075,
+      "learning_rate": 0.003,
+      "loss": 4.0356,
+      "step": 10279
+    },
+    {
+      "epoch": 0.1028,
+      "grad_norm": 1.0026627281522649,
+      "learning_rate": 0.003,
+      "loss": 4.0372,
+      "step": 10280
+    },
+    {
+      "epoch": 0.10281,
+      "grad_norm": 1.2879919756618505,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 10281
+    },
+    {
+      "epoch": 0.10282,
+      "grad_norm": 1.244730230167987,
+      "learning_rate": 0.003,
+      "loss": 4.0151,
+      "step": 10282
+    },
+    {
+      "epoch": 0.10283,
+      "grad_norm": 1.3076934827367117,
+      "learning_rate": 0.003,
+      "loss": 4.0409,
+      "step": 10283
+    },
+    {
+      "epoch": 0.10284,
+      "grad_norm": 1.1046390773617638,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 10284
+    },
+    {
+      "epoch": 0.10285,
+      "grad_norm": 1.4413050093320747,
+      "learning_rate": 0.003,
+      "loss": 4.0284,
+      "step": 10285
+    },
+    {
+      "epoch": 0.10286,
+      "grad_norm": 0.8441280250112908,
+      "learning_rate": 0.003,
+      "loss": 4.0303,
+      "step": 10286
+    },
+    {
+      "epoch": 0.10287,
+      "grad_norm": 1.1644552941443256,
+      "learning_rate": 0.003,
+      "loss": 4.0254,
+      "step": 10287
+    },
+    {
+      "epoch": 0.10288,
+      "grad_norm": 1.253613939194171,
+      "learning_rate": 0.003,
+      "loss": 4.071,
+      "step": 10288
+    },
+    {
+      "epoch": 0.10289,
+      "grad_norm": 1.1992464347629659,
+      "learning_rate": 0.003,
+      "loss": 4.0557,
+      "step": 10289
+    },
+    {
+      "epoch": 0.1029,
+      "grad_norm": 1.1614080288077706,
+      "learning_rate": 0.003,
+      "loss": 4.0522,
+      "step": 10290
+    },
+    {
+      "epoch": 0.10291,
+      "grad_norm": 1.1200552133267128,
+      "learning_rate": 0.003,
+      "loss": 4.0186,
+      "step": 10291
+    },
+    {
+      "epoch": 0.10292,
+      "grad_norm": 1.1209107524052186,
+      "learning_rate": 0.003,
+      "loss": 4.0559,
+      "step": 10292
+    },
+    {
+      "epoch": 0.10293,
+      "grad_norm": 1.1739986737744281,
+      "learning_rate": 0.003,
+      "loss": 4.0357,
+      "step": 10293
+    },
+    {
+      "epoch": 0.10294,
+      "grad_norm": 0.9094006847879885,
+      "learning_rate": 0.003,
+      "loss": 4.0532,
+      "step": 10294
+    },
+    {
+      "epoch": 0.10295,
+      "grad_norm": 1.0626685311138218,
+      "learning_rate": 0.003,
+      "loss": 4.0426,
+      "step": 10295
+    },
+    {
+      "epoch": 0.10296,
+      "grad_norm": 1.3323240753459744,
+      "learning_rate": 0.003,
+      "loss": 4.0089,
+      "step": 10296
+    },
+    {
+      "epoch": 0.10297,
+      "grad_norm": 0.9873549849936493,
+      "learning_rate": 0.003,
+      "loss": 4.0433,
+      "step": 10297
+    },
+    {
+      "epoch": 0.10298,
+      "grad_norm": 1.1100743664279358,
+      "learning_rate": 0.003,
+      "loss": 4.0647,
+      "step": 10298
+    },
+    {
+      "epoch": 0.10299,
+      "grad_norm": 1.2118577543293596,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 10299
+    },
+    {
+      "epoch": 0.103,
+      "grad_norm": 1.1347265113461813,
+      "learning_rate": 0.003,
+      "loss": 4.0357,
+      "step": 10300
+    },
+    {
+      "epoch": 0.10301,
+      "grad_norm": 1.0447143445167029,
+      "learning_rate": 0.003,
+      "loss": 4.0515,
+      "step": 10301
+    },
+    {
+      "epoch": 0.10302,
+      "grad_norm": 1.051062325381112,
+      "learning_rate": 0.003,
+      "loss": 4.0564,
+      "step": 10302
+    },
+    {
+      "epoch": 0.10303,
+      "grad_norm": 1.0323975069194364,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 10303
+    },
+    {
+      "epoch": 0.10304,
+      "grad_norm": 1.2789211780935934,
+      "learning_rate": 0.003,
+      "loss": 4.0488,
+      "step": 10304
+    },
+    {
+      "epoch": 0.10305,
+      "grad_norm": 1.005336751800442,
+      "learning_rate": 0.003,
+      "loss": 4.0242,
+      "step": 10305
+    },
+    {
+      "epoch": 0.10306,
+      "grad_norm": 1.1730045793816195,
+      "learning_rate": 0.003,
+      "loss": 4.0592,
+      "step": 10306
+    },
+    {
+      "epoch": 0.10307,
+      "grad_norm": 1.1183555686080375,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 10307
+    },
+    {
+      "epoch": 0.10308,
+      "grad_norm": 1.0617744506344082,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 10308
+    },
+    {
+      "epoch": 0.10309,
+      "grad_norm": 1.22078736450434,
+      "learning_rate": 0.003,
+      "loss": 4.0186,
+      "step": 10309
+    },
+    {
+      "epoch": 0.1031,
+      "grad_norm": 0.9980699183199506,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 10310
+    },
+    {
+      "epoch": 0.10311,
+      "grad_norm": 1.2462656361373519,
+      "learning_rate": 0.003,
+      "loss": 4.0329,
+      "step": 10311
+    },
+    {
+      "epoch": 0.10312,
+      "grad_norm": 1.0351182027362364,
+      "learning_rate": 0.003,
+      "loss": 4.032,
+      "step": 10312
+    },
+    {
+      "epoch": 0.10313,
+      "grad_norm": 1.287680378354591,
+      "learning_rate": 0.003,
+      "loss": 4.0116,
+      "step": 10313
+    },
+    {
+      "epoch": 0.10314,
+      "grad_norm": 0.9076021123086481,
+      "learning_rate": 0.003,
+      "loss": 4.0396,
+      "step": 10314
+    },
+    {
+      "epoch": 0.10315,
+      "grad_norm": 1.1481668973991306,
+      "learning_rate": 0.003,
+      "loss": 4.0318,
+      "step": 10315
+    },
+    {
+      "epoch": 0.10316,
+      "grad_norm": 1.0780477730461324,
+      "learning_rate": 0.003,
+      "loss": 4.0693,
+      "step": 10316
+    },
+    {
+      "epoch": 0.10317,
+      "grad_norm": 1.1892911855004693,
+      "learning_rate": 0.003,
+      "loss": 4.0263,
+      "step": 10317
+    },
+    {
+      "epoch": 0.10318,
+      "grad_norm": 1.0047821885404769,
+      "learning_rate": 0.003,
+      "loss": 4.0154,
+      "step": 10318
+    },
+    {
+      "epoch": 0.10319,
+      "grad_norm": 1.28222936870825,
+      "learning_rate": 0.003,
+      "loss": 4.0361,
+      "step": 10319
+    },
+    {
+      "epoch": 0.1032,
+      "grad_norm": 1.1296157925908548,
+      "learning_rate": 0.003,
+      "loss": 4.0425,
+      "step": 10320
+    },
+    {
+      "epoch": 0.10321,
+      "grad_norm": 1.4853389937950883,
+      "learning_rate": 0.003,
+      "loss": 4.0501,
+      "step": 10321
+    },
+    {
+      "epoch": 0.10322,
+      "grad_norm": 0.9787569170059056,
+      "learning_rate": 0.003,
+      "loss": 4.0621,
+      "step": 10322
+    },
+    {
+      "epoch": 0.10323,
+      "grad_norm": 1.0906735449814466,
+      "learning_rate": 0.003,
+      "loss": 4.0462,
+      "step": 10323
+    },
+    {
+      "epoch": 0.10324,
+      "grad_norm": 1.068837791641826,
+      "learning_rate": 0.003,
+      "loss": 4.0485,
+      "step": 10324
+    },
+    {
+      "epoch": 0.10325,
+      "grad_norm": 1.2037337762025848,
+      "learning_rate": 0.003,
+      "loss": 4.0318,
+      "step": 10325
+    },
+    {
+      "epoch": 0.10326,
+      "grad_norm": 1.1133172543387875,
+      "learning_rate": 0.003,
+      "loss": 4.0423,
+      "step": 10326
+    },
+    {
+      "epoch": 0.10327,
+      "grad_norm": 1.3167960269468966,
+      "learning_rate": 0.003,
+      "loss": 4.0385,
+      "step": 10327
+    },
+    {
+      "epoch": 0.10328,
+      "grad_norm": 1.0696207926347543,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 10328
+    },
+    {
+      "epoch": 0.10329,
+      "grad_norm": 1.245399604421406,
+      "learning_rate": 0.003,
+      "loss": 4.0544,
+      "step": 10329
+    },
+    {
+      "epoch": 0.1033,
+      "grad_norm": 1.0059697543492918,
+      "learning_rate": 0.003,
+      "loss": 4.0414,
+      "step": 10330
+    },
+    {
+      "epoch": 0.10331,
+      "grad_norm": 1.5656251418654314,
+      "learning_rate": 0.003,
+      "loss": 4.0265,
+      "step": 10331
+    },
+    {
+      "epoch": 0.10332,
+      "grad_norm": 0.8574115626605384,
+      "learning_rate": 0.003,
+      "loss": 4.0255,
+      "step": 10332
+    },
+    {
+      "epoch": 0.10333,
+      "grad_norm": 0.9859350180122279,
+      "learning_rate": 0.003,
+      "loss": 4.0291,
+      "step": 10333
+    },
+    {
+      "epoch": 0.10334,
+      "grad_norm": 1.2820418030806104,
+      "learning_rate": 0.003,
+      "loss": 4.0694,
+      "step": 10334
+    },
+    {
+      "epoch": 0.10335,
+      "grad_norm": 0.8731451613756506,
+      "learning_rate": 0.003,
+      "loss": 4.0254,
+      "step": 10335
+    },
+    {
+      "epoch": 0.10336,
+      "grad_norm": 1.014757675534356,
+      "learning_rate": 0.003,
+      "loss": 4.0463,
+      "step": 10336
+    },
+    {
+      "epoch": 0.10337,
+      "grad_norm": 1.4909413920937094,
+      "learning_rate": 0.003,
+      "loss": 4.0439,
+      "step": 10337
+    },
+    {
+      "epoch": 0.10338,
+      "grad_norm": 0.7816794866102409,
+      "learning_rate": 0.003,
+      "loss": 4.022,
+      "step": 10338
+    },
+    {
+      "epoch": 0.10339,
+      "grad_norm": 0.9243690877719841,
+      "learning_rate": 0.003,
+      "loss": 4.0053,
+      "step": 10339
+    },
+    {
+      "epoch": 0.1034,
+      "grad_norm": 1.14694380987657,
+      "learning_rate": 0.003,
+      "loss": 4.0141,
+      "step": 10340
+    },
+    {
+      "epoch": 0.10341,
+      "grad_norm": 1.3834936067183423,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 10341
+    },
+    {
+      "epoch": 0.10342,
+      "grad_norm": 1.1154396048927036,
+      "learning_rate": 0.003,
+      "loss": 4.0105,
+      "step": 10342
+    },
+    {
+      "epoch": 0.10343,
+      "grad_norm": 1.1273639336085786,
+      "learning_rate": 0.003,
+      "loss": 4.0334,
+      "step": 10343
+    },
+    {
+      "epoch": 0.10344,
+      "grad_norm": 1.1608493429486209,
+      "learning_rate": 0.003,
+      "loss": 4.04,
+      "step": 10344
+    },
+    {
+      "epoch": 0.10345,
+      "grad_norm": 1.06990959346778,
+      "learning_rate": 0.003,
+      "loss": 4.0214,
+      "step": 10345
+    },
+    {
+      "epoch": 0.10346,
+      "grad_norm": 1.2479747749226988,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 10346
+    },
+    {
+      "epoch": 0.10347,
+      "grad_norm": 0.9737474653691929,
+      "learning_rate": 0.003,
+      "loss": 4.0239,
+      "step": 10347
+    },
+    {
+      "epoch": 0.10348,
+      "grad_norm": 1.1819821090166085,
+      "learning_rate": 0.003,
+      "loss": 4.0494,
+      "step": 10348
+    },
+    {
+      "epoch": 0.10349,
+      "grad_norm": 1.167019349942472,
+      "learning_rate": 0.003,
+      "loss": 4.0379,
+      "step": 10349
+    },
+    {
+      "epoch": 0.1035,
+      "grad_norm": 1.3519029606445285,
+      "learning_rate": 0.003,
+      "loss": 4.0382,
+      "step": 10350
+    },
+    {
+      "epoch": 0.10351,
+      "grad_norm": 0.8593715530349866,
+      "learning_rate": 0.003,
+      "loss": 4.0384,
+      "step": 10351
+    },
+    {
+      "epoch": 0.10352,
+      "grad_norm": 0.9127235399409747,
+      "learning_rate": 0.003,
+      "loss": 4.0287,
+      "step": 10352
+    },
+    {
+      "epoch": 0.10353,
+      "grad_norm": 1.1579963956603465,
+      "learning_rate": 0.003,
+      "loss": 4.0682,
+      "step": 10353
+    },
+    {
+      "epoch": 0.10354,
+      "grad_norm": 1.2797240045316958,
+      "learning_rate": 0.003,
+      "loss": 4.0387,
+      "step": 10354
+    },
+    {
+      "epoch": 0.10355,
+      "grad_norm": 0.9612463917392908,
+      "learning_rate": 0.003,
+      "loss": 4.0261,
+      "step": 10355
+    },
+    {
+      "epoch": 0.10356,
+      "grad_norm": 1.1043787877283193,
+      "learning_rate": 0.003,
+      "loss": 4.0468,
+      "step": 10356
+    },
+    {
+      "epoch": 0.10357,
+      "grad_norm": 1.121926767893712,
+      "learning_rate": 0.003,
+      "loss": 4.0126,
+      "step": 10357
+    },
+    {
+      "epoch": 0.10358,
+      "grad_norm": 1.0575829184316827,
+      "learning_rate": 0.003,
+      "loss": 4.0154,
+      "step": 10358
+    },
+    {
+      "epoch": 0.10359,
+      "grad_norm": 1.1712412204356015,
+      "learning_rate": 0.003,
+      "loss": 4.0284,
+      "step": 10359
+    },
+    {
+      "epoch": 0.1036,
+      "grad_norm": 1.1280743551856558,
+      "learning_rate": 0.003,
+      "loss": 4.0394,
+      "step": 10360
+    },
+    {
+      "epoch": 0.10361,
+      "grad_norm": 1.100752064232454,
+      "learning_rate": 0.003,
+      "loss": 3.9936,
+      "step": 10361
+    },
+    {
+      "epoch": 0.10362,
+      "grad_norm": 1.3611223839175068,
+      "learning_rate": 0.003,
+      "loss": 4.0151,
+      "step": 10362
+    },
+    {
+      "epoch": 0.10363,
+      "grad_norm": 0.9602281325752356,
+      "learning_rate": 0.003,
+      "loss": 4.0173,
+      "step": 10363
+    },
+    {
+      "epoch": 0.10364,
+      "grad_norm": 1.2553483424870702,
+      "learning_rate": 0.003,
+      "loss": 4.034,
+      "step": 10364
+    },
+    {
+      "epoch": 0.10365,
+      "grad_norm": 1.4024905356997521,
+      "learning_rate": 0.003,
+      "loss": 4.0628,
+      "step": 10365
+    },
+    {
+      "epoch": 0.10366,
+      "grad_norm": 0.9242656529205991,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 10366
+    },
+    {
+      "epoch": 0.10367,
+      "grad_norm": 1.1604823265405697,
+      "learning_rate": 0.003,
+      "loss": 4.0505,
+      "step": 10367
+    },
+    {
+      "epoch": 0.10368,
+      "grad_norm": 1.1969970485888448,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 10368
+    },
+    {
+      "epoch": 0.10369,
+      "grad_norm": 1.0200300949712449,
+      "learning_rate": 0.003,
+      "loss": 4.021,
+      "step": 10369
+    },
+    {
+      "epoch": 0.1037,
+      "grad_norm": 1.2689639441368583,
+      "learning_rate": 0.003,
+      "loss": 4.0276,
+      "step": 10370
+    },
+    {
+      "epoch": 0.10371,
+      "grad_norm": 1.0341059906639019,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 10371
+    },
+    {
+      "epoch": 0.10372,
+      "grad_norm": 1.2538857040073335,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 10372
+    },
+    {
+      "epoch": 0.10373,
+      "grad_norm": 1.1201024616892967,
+      "learning_rate": 0.003,
+      "loss": 4.0339,
+      "step": 10373
+    },
+    {
+      "epoch": 0.10374,
+      "grad_norm": 1.2950845025933793,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 10374
+    },
+    {
+      "epoch": 0.10375,
+      "grad_norm": 0.9699551875266422,
+      "learning_rate": 0.003,
+      "loss": 4.0121,
+      "step": 10375
+    },
+    {
+      "epoch": 0.10376,
+      "grad_norm": 1.1580083785809567,
+      "learning_rate": 0.003,
+      "loss": 4.0171,
+      "step": 10376
+    },
+    {
+      "epoch": 0.10377,
+      "grad_norm": 1.1093131687693019,
+      "learning_rate": 0.003,
+      "loss": 4.038,
+      "step": 10377
+    },
+    {
+      "epoch": 0.10378,
+      "grad_norm": 1.2624596205999834,
+      "learning_rate": 0.003,
+      "loss": 4.0025,
+      "step": 10378
+    },
+    {
+      "epoch": 0.10379,
+      "grad_norm": 1.0914527517869759,
+      "learning_rate": 0.003,
+      "loss": 4.0123,
+      "step": 10379
+    },
+    {
+      "epoch": 0.1038,
+      "grad_norm": 1.151305634171054,
+      "learning_rate": 0.003,
+      "loss": 4.0548,
+      "step": 10380
+    },
+    {
+      "epoch": 0.10381,
+      "grad_norm": 1.18506187805289,
+      "learning_rate": 0.003,
+      "loss": 4.0356,
+      "step": 10381
+    },
+    {
+      "epoch": 0.10382,
+      "grad_norm": 0.960814714372655,
+      "learning_rate": 0.003,
+      "loss": 4.0069,
+      "step": 10382
+    },
+    {
+      "epoch": 0.10383,
+      "grad_norm": 1.1274336145652182,
+      "learning_rate": 0.003,
+      "loss": 4.0386,
+      "step": 10383
+    },
+    {
+      "epoch": 0.10384,
+      "grad_norm": 1.0778718704979358,
+      "learning_rate": 0.003,
+      "loss": 4.0427,
+      "step": 10384
+    },
+    {
+      "epoch": 0.10385,
+      "grad_norm": 1.3346207537542287,
+      "learning_rate": 0.003,
+      "loss": 4.0374,
+      "step": 10385
+    },
+    {
+      "epoch": 0.10386,
+      "grad_norm": 1.1729682327745514,
+      "learning_rate": 0.003,
+      "loss": 4.0437,
+      "step": 10386
+    },
+    {
+      "epoch": 0.10387,
+      "grad_norm": 1.036661733873201,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 10387
+    },
+    {
+      "epoch": 0.10388,
+      "grad_norm": 1.1267714195794842,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 10388
+    },
+    {
+      "epoch": 0.10389,
+      "grad_norm": 1.070601905354444,
+      "learning_rate": 0.003,
+      "loss": 4.0419,
+      "step": 10389
+    },
+    {
+      "epoch": 0.1039,
+      "grad_norm": 1.1108942610835553,
+      "learning_rate": 0.003,
+      "loss": 4.0057,
+      "step": 10390
+    },
+    {
+      "epoch": 0.10391,
+      "grad_norm": 1.0766563550369346,
+      "learning_rate": 0.003,
+      "loss": 4.0289,
+      "step": 10391
+    },
+    {
+      "epoch": 0.10392,
+      "grad_norm": 1.2926774985748823,
+      "learning_rate": 0.003,
+      "loss": 4.0345,
+      "step": 10392
+    },
+    {
+      "epoch": 0.10393,
+      "grad_norm": 1.021832754641526,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 10393
+    },
+    {
+      "epoch": 0.10394,
+      "grad_norm": 1.2672584449062123,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 10394
+    },
+    {
+      "epoch": 0.10395,
+      "grad_norm": 0.9905815246662731,
+      "learning_rate": 0.003,
+      "loss": 4.0565,
+      "step": 10395
+    },
+    {
+      "epoch": 0.10396,
+      "grad_norm": 1.0082239163019409,
+      "learning_rate": 0.003,
+      "loss": 4.0346,
+      "step": 10396
+    },
+    {
+      "epoch": 0.10397,
+      "grad_norm": 1.2087702334917223,
+      "learning_rate": 0.003,
+      "loss": 4.0531,
+      "step": 10397
+    },
+    {
+      "epoch": 0.10398,
+      "grad_norm": 1.3670646129935469,
+      "learning_rate": 0.003,
+      "loss": 4.0473,
+      "step": 10398
+    },
+    {
+      "epoch": 0.10399,
+      "grad_norm": 1.0071185422959221,
+      "learning_rate": 0.003,
+      "loss": 4.0447,
+      "step": 10399
+    },
+    {
+      "epoch": 0.104,
+      "grad_norm": 1.201312509023756,
+      "learning_rate": 0.003,
+      "loss": 4.0427,
+      "step": 10400
+    },
+    {
+      "epoch": 0.10401,
+      "grad_norm": 0.9420904682898148,
+      "learning_rate": 0.003,
+      "loss": 4.0254,
+      "step": 10401
+    },
+    {
+      "epoch": 0.10402,
+      "grad_norm": 1.2050493388965062,
+      "learning_rate": 0.003,
+      "loss": 4.0259,
+      "step": 10402
+    },
+    {
+      "epoch": 0.10403,
+      "grad_norm": 1.046563241466676,
+      "learning_rate": 0.003,
+      "loss": 4.03,
+      "step": 10403
+    },
+    {
+      "epoch": 0.10404,
+      "grad_norm": 1.1628697809891466,
+      "learning_rate": 0.003,
+      "loss": 4.0309,
+      "step": 10404
+    },
+    {
+      "epoch": 0.10405,
+      "grad_norm": 1.3199812512263778,
+      "learning_rate": 0.003,
+      "loss": 4.0441,
+      "step": 10405
+    },
+    {
+      "epoch": 0.10406,
+      "grad_norm": 1.0594480118801703,
+      "learning_rate": 0.003,
+      "loss": 4.0273,
+      "step": 10406
+    },
+    {
+      "epoch": 0.10407,
+      "grad_norm": 1.1233067919692772,
+      "learning_rate": 0.003,
+      "loss": 4.0484,
+      "step": 10407
+    },
+    {
+      "epoch": 0.10408,
+      "grad_norm": 0.9910896481437917,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 10408
+    },
+    {
+      "epoch": 0.10409,
+      "grad_norm": 1.138918453922149,
+      "learning_rate": 0.003,
+      "loss": 4.0366,
+      "step": 10409
+    },
+    {
+      "epoch": 0.1041,
+      "grad_norm": 1.1821655962343458,
+      "learning_rate": 0.003,
+      "loss": 4.0616,
+      "step": 10410
+    },
+    {
+      "epoch": 0.10411,
+      "grad_norm": 1.214060735642106,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 10411
+    },
+    {
+      "epoch": 0.10412,
+      "grad_norm": 1.1271952204136202,
+      "learning_rate": 0.003,
+      "loss": 3.9872,
+      "step": 10412
+    },
+    {
+      "epoch": 0.10413,
+      "grad_norm": 0.9720082652463884,
+      "learning_rate": 0.003,
+      "loss": 4.0409,
+      "step": 10413
+    },
+    {
+      "epoch": 0.10414,
+      "grad_norm": 1.1999444084371422,
+      "learning_rate": 0.003,
+      "loss": 4.0294,
+      "step": 10414
+    },
+    {
+      "epoch": 0.10415,
+      "grad_norm": 1.073604246138116,
+      "learning_rate": 0.003,
+      "loss": 4.0336,
+      "step": 10415
+    },
+    {
+      "epoch": 0.10416,
+      "grad_norm": 1.108459057982964,
+      "learning_rate": 0.003,
+      "loss": 4.0489,
+      "step": 10416
+    },
+    {
+      "epoch": 0.10417,
+      "grad_norm": 1.271581283838925,
+      "learning_rate": 0.003,
+      "loss": 4.0579,
+      "step": 10417
+    },
+    {
+      "epoch": 0.10418,
+      "grad_norm": 1.079645522515878,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 10418
+    },
+    {
+      "epoch": 0.10419,
+      "grad_norm": 1.1152248774580158,
+      "learning_rate": 0.003,
+      "loss": 4.0746,
+      "step": 10419
+    },
+    {
+      "epoch": 0.1042,
+      "grad_norm": 1.3480810386551398,
+      "learning_rate": 0.003,
+      "loss": 4.0543,
+      "step": 10420
+    },
+    {
+      "epoch": 0.10421,
+      "grad_norm": 0.9290743153390996,
+      "learning_rate": 0.003,
+      "loss": 4.0209,
+      "step": 10421
+    },
+    {
+      "epoch": 0.10422,
+      "grad_norm": 1.0777250714984647,
+      "learning_rate": 0.003,
+      "loss": 4.0338,
+      "step": 10422
+    },
+    {
+      "epoch": 0.10423,
+      "grad_norm": 1.2450132081404977,
+      "learning_rate": 0.003,
+      "loss": 4.0475,
+      "step": 10423
+    },
+    {
+      "epoch": 0.10424,
+      "grad_norm": 0.9622549883886152,
+      "learning_rate": 0.003,
+      "loss": 4.0224,
+      "step": 10424
+    },
+    {
+      "epoch": 0.10425,
+      "grad_norm": 1.1529396436416368,
+      "learning_rate": 0.003,
+      "loss": 4.0305,
+      "step": 10425
+    },
+    {
+      "epoch": 0.10426,
+      "grad_norm": 1.0658110282776228,
+      "learning_rate": 0.003,
+      "loss": 4.0259,
+      "step": 10426
+    },
+    {
+      "epoch": 0.10427,
+      "grad_norm": 1.0117761795322662,
+      "learning_rate": 0.003,
+      "loss": 4.0416,
+      "step": 10427
+    },
+    {
+      "epoch": 0.10428,
+      "grad_norm": 1.1685995916063228,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 10428
+    },
+    {
+      "epoch": 0.10429,
+      "grad_norm": 1.0923828450042794,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 10429
+    },
+    {
+      "epoch": 0.1043,
+      "grad_norm": 1.1655387142481999,
+      "learning_rate": 0.003,
+      "loss": 4.0174,
+      "step": 10430
+    },
+    {
+      "epoch": 0.10431,
+      "grad_norm": 1.3531912916034343,
+      "learning_rate": 0.003,
+      "loss": 4.0362,
+      "step": 10431
+    },
+    {
+      "epoch": 0.10432,
+      "grad_norm": 0.9691209365956966,
+      "learning_rate": 0.003,
+      "loss": 4.0306,
+      "step": 10432
+    },
+    {
+      "epoch": 0.10433,
+      "grad_norm": 1.0876767583765015,
+      "learning_rate": 0.003,
+      "loss": 4.0289,
+      "step": 10433
+    },
+    {
+      "epoch": 0.10434,
+      "grad_norm": 1.0893989324457354,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 10434
+    },
+    {
+      "epoch": 0.10435,
+      "grad_norm": 1.1011090775263708,
+      "learning_rate": 0.003,
+      "loss": 4.016,
+      "step": 10435
+    },
+    {
+      "epoch": 0.10436,
+      "grad_norm": 1.2651035810376114,
+      "learning_rate": 0.003,
+      "loss": 4.0388,
+      "step": 10436
+    },
+    {
+      "epoch": 0.10437,
+      "grad_norm": 1.077788583905916,
+      "learning_rate": 0.003,
+      "loss": 4.0438,
+      "step": 10437
+    },
+    {
+      "epoch": 0.10438,
+      "grad_norm": 1.089658709482987,
+      "learning_rate": 0.003,
+      "loss": 4.0291,
+      "step": 10438
+    },
+    {
+      "epoch": 0.10439,
+      "grad_norm": 1.3526434745275007,
+      "learning_rate": 0.003,
+      "loss": 4.0494,
+      "step": 10439
+    },
+    {
+      "epoch": 0.1044,
+      "grad_norm": 1.072260827630628,
+      "learning_rate": 0.003,
+      "loss": 4.0382,
+      "step": 10440
+    },
+    {
+      "epoch": 0.10441,
+      "grad_norm": 1.275665138867706,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 10441
+    },
+    {
+      "epoch": 0.10442,
+      "grad_norm": 0.8780574008022751,
+      "learning_rate": 0.003,
+      "loss": 4.0134,
+      "step": 10442
+    },
+    {
+      "epoch": 0.10443,
+      "grad_norm": 1.0997214258568853,
+      "learning_rate": 0.003,
+      "loss": 4.0528,
+      "step": 10443
+    },
+    {
+      "epoch": 0.10444,
+      "grad_norm": 1.3431512939323784,
+      "learning_rate": 0.003,
+      "loss": 4.0468,
+      "step": 10444
+    },
+    {
+      "epoch": 0.10445,
+      "grad_norm": 1.2074116232555316,
+      "learning_rate": 0.003,
+      "loss": 4.0546,
+      "step": 10445
+    },
+    {
+      "epoch": 0.10446,
+      "grad_norm": 1.087105337997175,
+      "learning_rate": 0.003,
+      "loss": 4.0135,
+      "step": 10446
+    },
+    {
+      "epoch": 0.10447,
+      "grad_norm": 1.0707463528471768,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 10447
+    },
+    {
+      "epoch": 0.10448,
+      "grad_norm": 1.0692522472019002,
+      "learning_rate": 0.003,
+      "loss": 4.0339,
+      "step": 10448
+    },
+    {
+      "epoch": 0.10449,
+      "grad_norm": 1.001943199192615,
+      "learning_rate": 0.003,
+      "loss": 4.0238,
+      "step": 10449
+    },
+    {
+      "epoch": 0.1045,
+      "grad_norm": 1.259242230186131,
+      "learning_rate": 0.003,
+      "loss": 4.0551,
+      "step": 10450
+    },
+    {
+      "epoch": 0.10451,
+      "grad_norm": 0.8956322006755586,
+      "learning_rate": 0.003,
+      "loss": 4.0,
+      "step": 10451
+    },
+    {
+      "epoch": 0.10452,
+      "grad_norm": 1.2342703982705026,
+      "learning_rate": 0.003,
+      "loss": 4.0332,
+      "step": 10452
+    },
+    {
+      "epoch": 0.10453,
+      "grad_norm": 1.1659755217419676,
+      "learning_rate": 0.003,
+      "loss": 4.0123,
+      "step": 10453
+    },
+    {
+      "epoch": 0.10454,
+      "grad_norm": 1.329730366250319,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 10454
+    },
+    {
+      "epoch": 0.10455,
+      "grad_norm": 1.0525902475820774,
+      "learning_rate": 0.003,
+      "loss": 4.0172,
+      "step": 10455
+    },
+    {
+      "epoch": 0.10456,
+      "grad_norm": 1.4730676027395804,
+      "learning_rate": 0.003,
+      "loss": 4.0522,
+      "step": 10456
+    },
+    {
+      "epoch": 0.10457,
+      "grad_norm": 0.902647065287986,
+      "learning_rate": 0.003,
+      "loss": 4.039,
+      "step": 10457
+    },
+    {
+      "epoch": 0.10458,
+      "grad_norm": 0.9366078692778497,
+      "learning_rate": 0.003,
+      "loss": 4.0455,
+      "step": 10458
+    },
+    {
+      "epoch": 0.10459,
+      "grad_norm": 1.185490872765169,
+      "learning_rate": 0.003,
+      "loss": 4.0212,
+      "step": 10459
+    },
+    {
+      "epoch": 0.1046,
+      "grad_norm": 1.220542856466773,
+      "learning_rate": 0.003,
+      "loss": 4.0497,
+      "step": 10460
+    },
+    {
+      "epoch": 0.10461,
+      "grad_norm": 1.0971020768947664,
+      "learning_rate": 0.003,
+      "loss": 4.008,
+      "step": 10461
+    },
+    {
+      "epoch": 0.10462,
+      "grad_norm": 1.0948259417406494,
+      "learning_rate": 0.003,
+      "loss": 4.0431,
+      "step": 10462
+    },
+    {
+      "epoch": 0.10463,
+      "grad_norm": 1.0964751192354854,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 10463
+    },
+    {
+      "epoch": 0.10464,
+      "grad_norm": 1.1441448993227044,
+      "learning_rate": 0.003,
+      "loss": 4.0446,
+      "step": 10464
+    },
+    {
+      "epoch": 0.10465,
+      "grad_norm": 1.1473924677302787,
+      "learning_rate": 0.003,
+      "loss": 4.0343,
+      "step": 10465
+    },
+    {
+      "epoch": 0.10466,
+      "grad_norm": 1.1495513149677226,
+      "learning_rate": 0.003,
+      "loss": 4.0294,
+      "step": 10466
+    },
+    {
+      "epoch": 0.10467,
+      "grad_norm": 1.0544675114536122,
+      "learning_rate": 0.003,
+      "loss": 4.0301,
+      "step": 10467
+    },
+    {
+      "epoch": 0.10468,
+      "grad_norm": 1.34298439731449,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 10468
+    },
+    {
+      "epoch": 0.10469,
+      "grad_norm": 1.2021399413747627,
+      "learning_rate": 0.003,
+      "loss": 4.0423,
+      "step": 10469
+    },
+    {
+      "epoch": 0.1047,
+      "grad_norm": 1.1449386988739325,
+      "learning_rate": 0.003,
+      "loss": 4.0273,
+      "step": 10470
+    },
+    {
+      "epoch": 0.10471,
+      "grad_norm": 1.0404658350920764,
+      "learning_rate": 0.003,
+      "loss": 4.0415,
+      "step": 10471
+    },
+    {
+      "epoch": 0.10472,
+      "grad_norm": 1.2753272835439624,
+      "learning_rate": 0.003,
+      "loss": 4.019,
+      "step": 10472
+    },
+    {
+      "epoch": 0.10473,
+      "grad_norm": 1.1913855993707885,
+      "learning_rate": 0.003,
+      "loss": 4.0211,
+      "step": 10473
+    },
+    {
+      "epoch": 0.10474,
+      "grad_norm": 1.1501830644015107,
+      "learning_rate": 0.003,
+      "loss": 4.0254,
+      "step": 10474
+    },
+    {
+      "epoch": 0.10475,
+      "grad_norm": 1.243458316706119,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 10475
+    },
+    {
+      "epoch": 0.10476,
+      "grad_norm": 1.209956907576902,
+      "learning_rate": 0.003,
+      "loss": 4.0361,
+      "step": 10476
+    },
+    {
+      "epoch": 0.10477,
+      "grad_norm": 1.0020350040143609,
+      "learning_rate": 0.003,
+      "loss": 4.0111,
+      "step": 10477
+    },
+    {
+      "epoch": 0.10478,
+      "grad_norm": 1.0396708338378342,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 10478
+    },
+    {
+      "epoch": 0.10479,
+      "grad_norm": 1.1150470684477871,
+      "learning_rate": 0.003,
+      "loss": 4.0326,
+      "step": 10479
+    },
+    {
+      "epoch": 0.1048,
+      "grad_norm": 1.122637069690026,
+      "learning_rate": 0.003,
+      "loss": 4.0142,
+      "step": 10480
+    },
+    {
+      "epoch": 0.10481,
+      "grad_norm": 1.4318413431939903,
+      "learning_rate": 0.003,
+      "loss": 4.0382,
+      "step": 10481
+    },
+    {
+      "epoch": 0.10482,
+      "grad_norm": 1.0791992375976056,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 10482
+    },
+    {
+      "epoch": 0.10483,
+      "grad_norm": 1.3065570899996903,
+      "learning_rate": 0.003,
+      "loss": 4.0121,
+      "step": 10483
+    },
+    {
+      "epoch": 0.10484,
+      "grad_norm": 1.1874032550959215,
+      "learning_rate": 0.003,
+      "loss": 4.0232,
+      "step": 10484
+    },
+    {
+      "epoch": 0.10485,
+      "grad_norm": 1.1893565695289274,
+      "learning_rate": 0.003,
+      "loss": 4.0216,
+      "step": 10485
+    },
+    {
+      "epoch": 0.10486,
+      "grad_norm": 1.097831179509903,
+      "learning_rate": 0.003,
+      "loss": 3.9988,
+      "step": 10486
+    },
+    {
+      "epoch": 0.10487,
+      "grad_norm": 1.2993291047275668,
+      "learning_rate": 0.003,
+      "loss": 4.0411,
+      "step": 10487
+    },
+    {
+      "epoch": 0.10488,
+      "grad_norm": 0.9496980239335198,
+      "learning_rate": 0.003,
+      "loss": 4.0213,
+      "step": 10488
+    },
+    {
+      "epoch": 0.10489,
+      "grad_norm": 1.0629884631995594,
+      "learning_rate": 0.003,
+      "loss": 4.0265,
+      "step": 10489
+    },
+    {
+      "epoch": 0.1049,
+      "grad_norm": 1.0603268965374313,
+      "learning_rate": 0.003,
+      "loss": 4.0407,
+      "step": 10490
+    },
+    {
+      "epoch": 0.10491,
+      "grad_norm": 1.208967187962485,
+      "learning_rate": 0.003,
+      "loss": 4.0465,
+      "step": 10491
+    },
+    {
+      "epoch": 0.10492,
+      "grad_norm": 1.2629214617983036,
+      "learning_rate": 0.003,
+      "loss": 4.0305,
+      "step": 10492
+    },
+    {
+      "epoch": 0.10493,
+      "grad_norm": 1.2124437867421798,
+      "learning_rate": 0.003,
+      "loss": 4.0312,
+      "step": 10493
+    },
+    {
+      "epoch": 0.10494,
+      "grad_norm": 0.9449400933558595,
+      "learning_rate": 0.003,
+      "loss": 3.9937,
+      "step": 10494
+    },
+    {
+      "epoch": 0.10495,
+      "grad_norm": 1.1234515875951618,
+      "learning_rate": 0.003,
+      "loss": 4.0154,
+      "step": 10495
+    },
+    {
+      "epoch": 0.10496,
+      "grad_norm": 1.0139506348014824,
+      "learning_rate": 0.003,
+      "loss": 4.0311,
+      "step": 10496
+    },
+    {
+      "epoch": 0.10497,
+      "grad_norm": 1.3582522269851591,
+      "learning_rate": 0.003,
+      "loss": 4.0167,
+      "step": 10497
+    },
+    {
+      "epoch": 0.10498,
+      "grad_norm": 0.8485161156664304,
+      "learning_rate": 0.003,
+      "loss": 4.0385,
+      "step": 10498
+    },
+    {
+      "epoch": 0.10499,
+      "grad_norm": 0.9682827176949959,
+      "learning_rate": 0.003,
+      "loss": 4.0119,
+      "step": 10499
+    },
+    {
+      "epoch": 0.105,
+      "grad_norm": 1.3767621333474114,
+      "learning_rate": 0.003,
+      "loss": 4.0639,
+      "step": 10500
+    },
+    {
+      "epoch": 0.10501,
+      "grad_norm": 0.9911284707155722,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 10501
+    },
+    {
+      "epoch": 0.10502,
+      "grad_norm": 1.2098800890232249,
+      "learning_rate": 0.003,
+      "loss": 4.0239,
+      "step": 10502
+    },
+    {
+      "epoch": 0.10503,
+      "grad_norm": 1.125482625749235,
+      "learning_rate": 0.003,
+      "loss": 4.0644,
+      "step": 10503
+    },
+    {
+      "epoch": 0.10504,
+      "grad_norm": 1.049702069475873,
+      "learning_rate": 0.003,
+      "loss": 4.02,
+      "step": 10504
+    },
+    {
+      "epoch": 0.10505,
+      "grad_norm": 1.2213586640000345,
+      "learning_rate": 0.003,
+      "loss": 4.0348,
+      "step": 10505
+    },
+    {
+      "epoch": 0.10506,
+      "grad_norm": 1.315035994618202,
+      "learning_rate": 0.003,
+      "loss": 4.038,
+      "step": 10506
+    },
+    {
+      "epoch": 0.10507,
+      "grad_norm": 1.340531389565643,
+      "learning_rate": 0.003,
+      "loss": 4.0427,
+      "step": 10507
+    },
+    {
+      "epoch": 0.10508,
+      "grad_norm": 0.9990031849773587,
+      "learning_rate": 0.003,
+      "loss": 4.0073,
+      "step": 10508
+    },
+    {
+      "epoch": 0.10509,
+      "grad_norm": 1.1660130233668518,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 10509
+    },
+    {
+      "epoch": 0.1051,
+      "grad_norm": 1.039379676526276,
+      "learning_rate": 0.003,
+      "loss": 4.0316,
+      "step": 10510
+    },
+    {
+      "epoch": 0.10511,
+      "grad_norm": 1.338586383397336,
+      "learning_rate": 0.003,
+      "loss": 4.0073,
+      "step": 10511
+    },
+    {
+      "epoch": 0.10512,
+      "grad_norm": 1.0249548531611892,
+      "learning_rate": 0.003,
+      "loss": 4.0575,
+      "step": 10512
+    },
+    {
+      "epoch": 0.10513,
+      "grad_norm": 1.2470715536322723,
+      "learning_rate": 0.003,
+      "loss": 4.003,
+      "step": 10513
+    },
+    {
+      "epoch": 0.10514,
+      "grad_norm": 0.9573916157628471,
+      "learning_rate": 0.003,
+      "loss": 3.9872,
+      "step": 10514
+    },
+    {
+      "epoch": 0.10515,
+      "grad_norm": 1.299107259749984,
+      "learning_rate": 0.003,
+      "loss": 4.0462,
+      "step": 10515
+    },
+    {
+      "epoch": 0.10516,
+      "grad_norm": 1.01524445765738,
+      "learning_rate": 0.003,
+      "loss": 4.0,
+      "step": 10516
+    },
+    {
+      "epoch": 0.10517,
+      "grad_norm": 1.1149804894227142,
+      "learning_rate": 0.003,
+      "loss": 4.0559,
+      "step": 10517
+    },
+    {
+      "epoch": 0.10518,
+      "grad_norm": 1.1890411136167975,
+      "learning_rate": 0.003,
+      "loss": 4.0546,
+      "step": 10518
+    },
+    {
+      "epoch": 0.10519,
+      "grad_norm": 0.9364490204330106,
+      "learning_rate": 0.003,
+      "loss": 4.005,
+      "step": 10519
+    },
+    {
+      "epoch": 0.1052,
+      "grad_norm": 1.1387642541625675,
+      "learning_rate": 0.003,
+      "loss": 4.0573,
+      "step": 10520
+    },
+    {
+      "epoch": 0.10521,
+      "grad_norm": 1.1839482076334311,
+      "learning_rate": 0.003,
+      "loss": 4.0311,
+      "step": 10521
+    },
+    {
+      "epoch": 0.10522,
+      "grad_norm": 1.0014230334791343,
+      "learning_rate": 0.003,
+      "loss": 4.0382,
+      "step": 10522
+    },
+    {
+      "epoch": 0.10523,
+      "grad_norm": 1.1543281363304134,
+      "learning_rate": 0.003,
+      "loss": 4.0552,
+      "step": 10523
+    },
+    {
+      "epoch": 0.10524,
+      "grad_norm": 1.0058642661701724,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 10524
+    },
+    {
+      "epoch": 0.10525,
+      "grad_norm": 1.2600433039619774,
+      "learning_rate": 0.003,
+      "loss": 4.0329,
+      "step": 10525
+    },
+    {
+      "epoch": 0.10526,
+      "grad_norm": 1.0491904580994538,
+      "learning_rate": 0.003,
+      "loss": 4.0287,
+      "step": 10526
+    },
+    {
+      "epoch": 0.10527,
+      "grad_norm": 1.3835636637824966,
+      "learning_rate": 0.003,
+      "loss": 4.0667,
+      "step": 10527
+    },
+    {
+      "epoch": 0.10528,
+      "grad_norm": 1.1190543265923238,
+      "learning_rate": 0.003,
+      "loss": 4.0123,
+      "step": 10528
+    },
+    {
+      "epoch": 0.10529,
+      "grad_norm": 1.41063797874233,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 10529
+    },
+    {
+      "epoch": 0.1053,
+      "grad_norm": 1.0017135846243632,
+      "learning_rate": 0.003,
+      "loss": 4.0504,
+      "step": 10530
+    },
+    {
+      "epoch": 0.10531,
+      "grad_norm": 1.1349481015715086,
+      "learning_rate": 0.003,
+      "loss": 4.028,
+      "step": 10531
+    },
+    {
+      "epoch": 0.10532,
+      "grad_norm": 1.0974147690879539,
+      "learning_rate": 0.003,
+      "loss": 4.0413,
+      "step": 10532
+    },
+    {
+      "epoch": 0.10533,
+      "grad_norm": 1.1318583086940592,
+      "learning_rate": 0.003,
+      "loss": 4.0388,
+      "step": 10533
+    },
+    {
+      "epoch": 0.10534,
+      "grad_norm": 0.9728943881000612,
+      "learning_rate": 0.003,
+      "loss": 4.034,
+      "step": 10534
+    },
+    {
+      "epoch": 0.10535,
+      "grad_norm": 1.4760229533414277,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 10535
+    },
+    {
+      "epoch": 0.10536,
+      "grad_norm": 1.0178312131559886,
+      "learning_rate": 0.003,
+      "loss": 4.0033,
+      "step": 10536
+    },
+    {
+      "epoch": 0.10537,
+      "grad_norm": 1.326688326472958,
+      "learning_rate": 0.003,
+      "loss": 4.0357,
+      "step": 10537
+    },
+    {
+      "epoch": 0.10538,
+      "grad_norm": 0.9540964220451233,
+      "learning_rate": 0.003,
+      "loss": 4.0297,
+      "step": 10538
+    },
+    {
+      "epoch": 0.10539,
+      "grad_norm": 1.1867065524476788,
+      "learning_rate": 0.003,
+      "loss": 4.0241,
+      "step": 10539
+    },
+    {
+      "epoch": 0.1054,
+      "grad_norm": 1.306268363150594,
+      "learning_rate": 0.003,
+      "loss": 4.0031,
+      "step": 10540
+    },
+    {
+      "epoch": 0.10541,
+      "grad_norm": 1.1821939453890575,
+      "learning_rate": 0.003,
+      "loss": 4.0422,
+      "step": 10541
+    },
+    {
+      "epoch": 0.10542,
+      "grad_norm": 1.340191153094188,
+      "learning_rate": 0.003,
+      "loss": 4.0355,
+      "step": 10542
+    },
+    {
+      "epoch": 0.10543,
+      "grad_norm": 0.8866083438164943,
+      "learning_rate": 0.003,
+      "loss": 4.0249,
+      "step": 10543
+    },
+    {
+      "epoch": 0.10544,
+      "grad_norm": 1.1034227189294226,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 10544
+    },
+    {
+      "epoch": 0.10545,
+      "grad_norm": 1.145569873679018,
+      "learning_rate": 0.003,
+      "loss": 4.0453,
+      "step": 10545
+    },
+    {
+      "epoch": 0.10546,
+      "grad_norm": 1.2439976954614562,
+      "learning_rate": 0.003,
+      "loss": 4.067,
+      "step": 10546
+    },
+    {
+      "epoch": 0.10547,
+      "grad_norm": 0.9219452650458853,
+      "learning_rate": 0.003,
+      "loss": 4.0075,
+      "step": 10547
+    },
+    {
+      "epoch": 0.10548,
+      "grad_norm": 0.9976599090984258,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 10548
+    },
+    {
+      "epoch": 0.10549,
+      "grad_norm": 1.2774703253681363,
+      "learning_rate": 0.003,
+      "loss": 4.0251,
+      "step": 10549
+    },
+    {
+      "epoch": 0.1055,
+      "grad_norm": 1.1539905793697123,
+      "learning_rate": 0.003,
+      "loss": 4.0263,
+      "step": 10550
+    },
+    {
+      "epoch": 0.10551,
+      "grad_norm": 1.135911995287088,
+      "learning_rate": 0.003,
+      "loss": 4.0545,
+      "step": 10551
+    },
+    {
+      "epoch": 0.10552,
+      "grad_norm": 1.0527742384461596,
+      "learning_rate": 0.003,
+      "loss": 4.0526,
+      "step": 10552
+    },
+    {
+      "epoch": 0.10553,
+      "grad_norm": 1.322002490054715,
+      "learning_rate": 0.003,
+      "loss": 4.0109,
+      "step": 10553
+    },
+    {
+      "epoch": 0.10554,
+      "grad_norm": 1.0084031327815373,
+      "learning_rate": 0.003,
+      "loss": 4.0338,
+      "step": 10554
+    },
+    {
+      "epoch": 0.10555,
+      "grad_norm": 1.0808986156972835,
+      "learning_rate": 0.003,
+      "loss": 4.0494,
+      "step": 10555
+    },
+    {
+      "epoch": 0.10556,
+      "grad_norm": 1.1216797816863153,
+      "learning_rate": 0.003,
+      "loss": 4.037,
+      "step": 10556
+    },
+    {
+      "epoch": 0.10557,
+      "grad_norm": 1.1475055052985905,
+      "learning_rate": 0.003,
+      "loss": 4.0297,
+      "step": 10557
+    },
+    {
+      "epoch": 0.10558,
+      "grad_norm": 1.1119323781963724,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 10558
+    },
+    {
+      "epoch": 0.10559,
+      "grad_norm": 1.2460835632404752,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 10559
+    },
+    {
+      "epoch": 0.1056,
+      "grad_norm": 1.2138261319567527,
+      "learning_rate": 0.003,
+      "loss": 4.0181,
+      "step": 10560
+    },
+    {
+      "epoch": 0.10561,
+      "grad_norm": 1.0483615457288105,
+      "learning_rate": 0.003,
+      "loss": 4.024,
+      "step": 10561
+    },
+    {
+      "epoch": 0.10562,
+      "grad_norm": 1.0118320288891192,
+      "learning_rate": 0.003,
+      "loss": 4.0047,
+      "step": 10562
+    },
+    {
+      "epoch": 0.10563,
+      "grad_norm": 1.3644503768047236,
+      "learning_rate": 0.003,
+      "loss": 4.0502,
+      "step": 10563
+    },
+    {
+      "epoch": 0.10564,
+      "grad_norm": 0.8013831438598815,
+      "learning_rate": 0.003,
+      "loss": 4.0375,
+      "step": 10564
+    },
+    {
+      "epoch": 0.10565,
+      "grad_norm": 0.9612754540930174,
+      "learning_rate": 0.003,
+      "loss": 4.033,
+      "step": 10565
+    },
+    {
+      "epoch": 0.10566,
+      "grad_norm": 1.2366075679735475,
+      "learning_rate": 0.003,
+      "loss": 4.0419,
+      "step": 10566
+    },
+    {
+      "epoch": 0.10567,
+      "grad_norm": 1.155623176742999,
+      "learning_rate": 0.003,
+      "loss": 4.0615,
+      "step": 10567
+    },
+    {
+      "epoch": 0.10568,
+      "grad_norm": 1.1220562684114699,
+      "learning_rate": 0.003,
+      "loss": 4.053,
+      "step": 10568
+    },
+    {
+      "epoch": 0.10569,
+      "grad_norm": 1.163422842918069,
+      "learning_rate": 0.003,
+      "loss": 4.0554,
+      "step": 10569
+    },
+    {
+      "epoch": 0.1057,
+      "grad_norm": 1.1409717638103616,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 10570
+    },
+    {
+      "epoch": 0.10571,
+      "grad_norm": 0.9910760818265895,
+      "learning_rate": 0.003,
+      "loss": 4.0286,
+      "step": 10571
+    },
+    {
+      "epoch": 0.10572,
+      "grad_norm": 1.1832943348904186,
+      "learning_rate": 0.003,
+      "loss": 4.0345,
+      "step": 10572
+    },
+    {
+      "epoch": 0.10573,
+      "grad_norm": 1.0817165438920588,
+      "learning_rate": 0.003,
+      "loss": 4.0188,
+      "step": 10573
+    },
+    {
+      "epoch": 0.10574,
+      "grad_norm": 1.2883743060276551,
+      "learning_rate": 0.003,
+      "loss": 4.0405,
+      "step": 10574
+    },
+    {
+      "epoch": 0.10575,
+      "grad_norm": 0.8503051859413028,
+      "learning_rate": 0.003,
+      "loss": 4.0056,
+      "step": 10575
+    },
+    {
+      "epoch": 0.10576,
+      "grad_norm": 1.0469525545832647,
+      "learning_rate": 0.003,
+      "loss": 4.0498,
+      "step": 10576
+    },
+    {
+      "epoch": 0.10577,
+      "grad_norm": 1.509719903394313,
+      "learning_rate": 0.003,
+      "loss": 4.0326,
+      "step": 10577
+    },
+    {
+      "epoch": 0.10578,
+      "grad_norm": 0.9933503276360827,
+      "learning_rate": 0.003,
+      "loss": 4.0346,
+      "step": 10578
+    },
+    {
+      "epoch": 0.10579,
+      "grad_norm": 1.1546805042641897,
+      "learning_rate": 0.003,
+      "loss": 4.0015,
+      "step": 10579
+    },
+    {
+      "epoch": 0.1058,
+      "grad_norm": 1.10717494330016,
+      "learning_rate": 0.003,
+      "loss": 4.0501,
+      "step": 10580
+    },
+    {
+      "epoch": 0.10581,
+      "grad_norm": 1.0811061670677058,
+      "learning_rate": 0.003,
+      "loss": 4.0502,
+      "step": 10581
+    },
+    {
+      "epoch": 0.10582,
+      "grad_norm": 1.1040005473249752,
+      "learning_rate": 0.003,
+      "loss": 4.034,
+      "step": 10582
+    },
+    {
+      "epoch": 0.10583,
+      "grad_norm": 1.2803656515646102,
+      "learning_rate": 0.003,
+      "loss": 4.0327,
+      "step": 10583
+    },
+    {
+      "epoch": 0.10584,
+      "grad_norm": 1.3090939277354992,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 10584
+    },
+    {
+      "epoch": 0.10585,
+      "grad_norm": 1.0847880823254095,
+      "learning_rate": 0.003,
+      "loss": 4.0722,
+      "step": 10585
+    },
+    {
+      "epoch": 0.10586,
+      "grad_norm": 1.2200835861910564,
+      "learning_rate": 0.003,
+      "loss": 4.0548,
+      "step": 10586
+    },
+    {
+      "epoch": 0.10587,
+      "grad_norm": 0.8932627861610137,
+      "learning_rate": 0.003,
+      "loss": 4.0574,
+      "step": 10587
+    },
+    {
+      "epoch": 0.10588,
+      "grad_norm": 1.139469360617456,
+      "learning_rate": 0.003,
+      "loss": 4.019,
+      "step": 10588
+    },
+    {
+      "epoch": 0.10589,
+      "grad_norm": 1.4344930118024855,
+      "learning_rate": 0.003,
+      "loss": 4.0414,
+      "step": 10589
+    },
+    {
+      "epoch": 0.1059,
+      "grad_norm": 1.1102643686509654,
+      "learning_rate": 0.003,
+      "loss": 4.0264,
+      "step": 10590
+    },
+    {
+      "epoch": 0.10591,
+      "grad_norm": 1.1481517513413337,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 10591
+    },
+    {
+      "epoch": 0.10592,
+      "grad_norm": 1.122474591041532,
+      "learning_rate": 0.003,
+      "loss": 4.0682,
+      "step": 10592
+    },
+    {
+      "epoch": 0.10593,
+      "grad_norm": 1.2016781924295667,
+      "learning_rate": 0.003,
+      "loss": 4.005,
+      "step": 10593
+    },
+    {
+      "epoch": 0.10594,
+      "grad_norm": 1.07376059125612,
+      "learning_rate": 0.003,
+      "loss": 4.033,
+      "step": 10594
+    },
+    {
+      "epoch": 0.10595,
+      "grad_norm": 1.2904669765461627,
+      "learning_rate": 0.003,
+      "loss": 4.0303,
+      "step": 10595
+    },
+    {
+      "epoch": 0.10596,
+      "grad_norm": 0.8779132609347672,
+      "learning_rate": 0.003,
+      "loss": 4.0349,
+      "step": 10596
+    },
+    {
+      "epoch": 0.10597,
+      "grad_norm": 1.003771536112303,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 10597
+    },
+    {
+      "epoch": 0.10598,
+      "grad_norm": 1.0707616836208425,
+      "learning_rate": 0.003,
+      "loss": 4.0421,
+      "step": 10598
+    },
+    {
+      "epoch": 0.10599,
+      "grad_norm": 0.9955175174902907,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 10599
+    },
+    {
+      "epoch": 0.106,
+      "grad_norm": 1.3851273303582732,
+      "learning_rate": 0.003,
+      "loss": 4.0381,
+      "step": 10600
+    },
+    {
+      "epoch": 0.10601,
+      "grad_norm": 0.8326152872121492,
+      "learning_rate": 0.003,
+      "loss": 4.0298,
+      "step": 10601
+    },
+    {
+      "epoch": 0.10602,
+      "grad_norm": 0.9405825691994378,
+      "learning_rate": 0.003,
+      "loss": 4.0489,
+      "step": 10602
+    },
+    {
+      "epoch": 0.10603,
+      "grad_norm": 1.175319134130477,
+      "learning_rate": 0.003,
+      "loss": 4.0401,
+      "step": 10603
+    },
+    {
+      "epoch": 0.10604,
+      "grad_norm": 1.2400190358250294,
+      "learning_rate": 0.003,
+      "loss": 4.0657,
+      "step": 10604
+    },
+    {
+      "epoch": 0.10605,
+      "grad_norm": 1.3117115346590698,
+      "learning_rate": 0.003,
+      "loss": 4.0241,
+      "step": 10605
+    },
+    {
+      "epoch": 0.10606,
+      "grad_norm": 0.8357581858102279,
+      "learning_rate": 0.003,
+      "loss": 4.0395,
+      "step": 10606
+    },
+    {
+      "epoch": 0.10607,
+      "grad_norm": 1.2186083536406302,
+      "learning_rate": 0.003,
+      "loss": 4.0662,
+      "step": 10607
+    },
+    {
+      "epoch": 0.10608,
+      "grad_norm": 1.3549499721362552,
+      "learning_rate": 0.003,
+      "loss": 4.038,
+      "step": 10608
+    },
+    {
+      "epoch": 0.10609,
+      "grad_norm": 1.0359508960606705,
+      "learning_rate": 0.003,
+      "loss": 4.0462,
+      "step": 10609
+    },
+    {
+      "epoch": 0.1061,
+      "grad_norm": 1.234427345167838,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 10610
+    },
+    {
+      "epoch": 0.10611,
+      "grad_norm": 1.1049507794457696,
+      "learning_rate": 0.003,
+      "loss": 3.9948,
+      "step": 10611
+    },
+    {
+      "epoch": 0.10612,
+      "grad_norm": 1.1588897238356,
+      "learning_rate": 0.003,
+      "loss": 4.0209,
+      "step": 10612
+    },
+    {
+      "epoch": 0.10613,
+      "grad_norm": 1.0882437441177095,
+      "learning_rate": 0.003,
+      "loss": 4.0182,
+      "step": 10613
+    },
+    {
+      "epoch": 0.10614,
+      "grad_norm": 1.1661564975883256,
+      "learning_rate": 0.003,
+      "loss": 4.0415,
+      "step": 10614
+    },
+    {
+      "epoch": 0.10615,
+      "grad_norm": 0.9756194205234838,
+      "learning_rate": 0.003,
+      "loss": 4.0354,
+      "step": 10615
+    },
+    {
+      "epoch": 0.10616,
+      "grad_norm": 1.2136123225863367,
+      "learning_rate": 0.003,
+      "loss": 4.0497,
+      "step": 10616
+    },
+    {
+      "epoch": 0.10617,
+      "grad_norm": 1.004520655549312,
+      "learning_rate": 0.003,
+      "loss": 4.0446,
+      "step": 10617
+    },
+    {
+      "epoch": 0.10618,
+      "grad_norm": 1.293437758220031,
+      "learning_rate": 0.003,
+      "loss": 4.0319,
+      "step": 10618
+    },
+    {
+      "epoch": 0.10619,
+      "grad_norm": 1.0745903694460457,
+      "learning_rate": 0.003,
+      "loss": 4.0372,
+      "step": 10619
+    },
+    {
+      "epoch": 0.1062,
+      "grad_norm": 1.349386028002116,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 10620
+    },
+    {
+      "epoch": 0.10621,
+      "grad_norm": 1.0691689780916256,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 10621
+    },
+    {
+      "epoch": 0.10622,
+      "grad_norm": 1.224772531790401,
+      "learning_rate": 0.003,
+      "loss": 4.0508,
+      "step": 10622
+    },
+    {
+      "epoch": 0.10623,
+      "grad_norm": 1.153790588423269,
+      "learning_rate": 0.003,
+      "loss": 4.0457,
+      "step": 10623
+    },
+    {
+      "epoch": 0.10624,
+      "grad_norm": 1.193236273945861,
+      "learning_rate": 0.003,
+      "loss": 4.0242,
+      "step": 10624
+    },
+    {
+      "epoch": 0.10625,
+      "grad_norm": 1.0654745501906075,
+      "learning_rate": 0.003,
+      "loss": 4.0167,
+      "step": 10625
+    },
+    {
+      "epoch": 0.10626,
+      "grad_norm": 1.3261426114898907,
+      "learning_rate": 0.003,
+      "loss": 4.0064,
+      "step": 10626
+    },
+    {
+      "epoch": 0.10627,
+      "grad_norm": 0.8982241457991678,
+      "learning_rate": 0.003,
+      "loss": 4.0343,
+      "step": 10627
+    },
+    {
+      "epoch": 0.10628,
+      "grad_norm": 1.0114549713020966,
+      "learning_rate": 0.003,
+      "loss": 4.0149,
+      "step": 10628
+    },
+    {
+      "epoch": 0.10629,
+      "grad_norm": 1.1489582478278977,
+      "learning_rate": 0.003,
+      "loss": 4.0265,
+      "step": 10629
+    },
+    {
+      "epoch": 0.1063,
+      "grad_norm": 1.1444061673753105,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 10630
+    },
+    {
+      "epoch": 0.10631,
+      "grad_norm": 1.1576074418215807,
+      "learning_rate": 0.003,
+      "loss": 4.046,
+      "step": 10631
+    },
+    {
+      "epoch": 0.10632,
+      "grad_norm": 1.3627959422433176,
+      "learning_rate": 0.003,
+      "loss": 4.0037,
+      "step": 10632
+    },
+    {
+      "epoch": 0.10633,
+      "grad_norm": 1.06565267900167,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 10633
+    },
+    {
+      "epoch": 0.10634,
+      "grad_norm": 1.307799627360254,
+      "learning_rate": 0.003,
+      "loss": 4.0318,
+      "step": 10634
+    },
+    {
+      "epoch": 0.10635,
+      "grad_norm": 0.865797003148197,
+      "learning_rate": 0.003,
+      "loss": 4.019,
+      "step": 10635
+    },
+    {
+      "epoch": 0.10636,
+      "grad_norm": 1.0276563707055373,
+      "learning_rate": 0.003,
+      "loss": 4.0129,
+      "step": 10636
+    },
+    {
+      "epoch": 0.10637,
+      "grad_norm": 1.179328642984521,
+      "learning_rate": 0.003,
+      "loss": 4.0285,
+      "step": 10637
+    },
+    {
+      "epoch": 0.10638,
+      "grad_norm": 1.1306397758583546,
+      "learning_rate": 0.003,
+      "loss": 4.0264,
+      "step": 10638
+    },
+    {
+      "epoch": 0.10639,
+      "grad_norm": 1.1379536938728096,
+      "learning_rate": 0.003,
+      "loss": 4.0304,
+      "step": 10639
+    },
+    {
+      "epoch": 0.1064,
+      "grad_norm": 1.0529351002129548,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 10640
+    },
+    {
+      "epoch": 0.10641,
+      "grad_norm": 1.4104967072137549,
+      "learning_rate": 0.003,
+      "loss": 4.0351,
+      "step": 10641
+    },
+    {
+      "epoch": 0.10642,
+      "grad_norm": 0.9234934199965433,
+      "learning_rate": 0.003,
+      "loss": 4.0153,
+      "step": 10642
+    },
+    {
+      "epoch": 0.10643,
+      "grad_norm": 1.060649329346942,
+      "learning_rate": 0.003,
+      "loss": 4.0074,
+      "step": 10643
+    },
+    {
+      "epoch": 0.10644,
+      "grad_norm": 1.1653395641699265,
+      "learning_rate": 0.003,
+      "loss": 4.0448,
+      "step": 10644
+    },
+    {
+      "epoch": 0.10645,
+      "grad_norm": 1.05447570499781,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 10645
+    },
+    {
+      "epoch": 0.10646,
+      "grad_norm": 1.0414075025866627,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 10646
+    },
+    {
+      "epoch": 0.10647,
+      "grad_norm": 1.1643043878647805,
+      "learning_rate": 0.003,
+      "loss": 4.0313,
+      "step": 10647
+    },
+    {
+      "epoch": 0.10648,
+      "grad_norm": 1.0684722197241294,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 10648
+    },
+    {
+      "epoch": 0.10649,
+      "grad_norm": 1.3080634492785814,
+      "learning_rate": 0.003,
+      "loss": 4.0257,
+      "step": 10649
+    },
+    {
+      "epoch": 0.1065,
+      "grad_norm": 1.2923711297653349,
+      "learning_rate": 0.003,
+      "loss": 4.0653,
+      "step": 10650
+    },
+    {
+      "epoch": 0.10651,
+      "grad_norm": 1.1133144821639869,
+      "learning_rate": 0.003,
+      "loss": 3.9937,
+      "step": 10651
+    },
+    {
+      "epoch": 0.10652,
+      "grad_norm": 1.1508639513581163,
+      "learning_rate": 0.003,
+      "loss": 4.0315,
+      "step": 10652
+    },
+    {
+      "epoch": 0.10653,
+      "grad_norm": 1.1860732849874172,
+      "learning_rate": 0.003,
+      "loss": 4.0407,
+      "step": 10653
+    },
+    {
+      "epoch": 0.10654,
+      "grad_norm": 1.0365296506149477,
+      "learning_rate": 0.003,
+      "loss": 4.0266,
+      "step": 10654
+    },
+    {
+      "epoch": 0.10655,
+      "grad_norm": 0.9561188769757217,
+      "learning_rate": 0.003,
+      "loss": 4.0325,
+      "step": 10655
+    },
+    {
+      "epoch": 0.10656,
+      "grad_norm": 1.0940765619925885,
+      "learning_rate": 0.003,
+      "loss": 4.0171,
+      "step": 10656
+    },
+    {
+      "epoch": 0.10657,
+      "grad_norm": 1.3722387207935904,
+      "learning_rate": 0.003,
+      "loss": 4.037,
+      "step": 10657
+    },
+    {
+      "epoch": 0.10658,
+      "grad_norm": 1.1503504406176477,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 10658
+    },
+    {
+      "epoch": 0.10659,
+      "grad_norm": 0.966012228781889,
+      "learning_rate": 0.003,
+      "loss": 4.0342,
+      "step": 10659
+    },
+    {
+      "epoch": 0.1066,
+      "grad_norm": 1.0144525965138471,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 10660
+    },
+    {
+      "epoch": 0.10661,
+      "grad_norm": 1.2051087661883464,
+      "learning_rate": 0.003,
+      "loss": 4.0368,
+      "step": 10661
+    },
+    {
+      "epoch": 0.10662,
+      "grad_norm": 1.0764523304400608,
+      "learning_rate": 0.003,
+      "loss": 4.032,
+      "step": 10662
+    },
+    {
+      "epoch": 0.10663,
+      "grad_norm": 1.209131260208791,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 10663
+    },
+    {
+      "epoch": 0.10664,
+      "grad_norm": 1.0568957147582319,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 10664
+    },
+    {
+      "epoch": 0.10665,
+      "grad_norm": 1.3417015377120707,
+      "learning_rate": 0.003,
+      "loss": 4.0312,
+      "step": 10665
+    },
+    {
+      "epoch": 0.10666,
+      "grad_norm": 0.9362281562951543,
+      "learning_rate": 0.003,
+      "loss": 4.0082,
+      "step": 10666
+    },
+    {
+      "epoch": 0.10667,
+      "grad_norm": 1.1589003721865672,
+      "learning_rate": 0.003,
+      "loss": 3.997,
+      "step": 10667
+    },
+    {
+      "epoch": 0.10668,
+      "grad_norm": 1.2601287656447024,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 10668
+    },
+    {
+      "epoch": 0.10669,
+      "grad_norm": 1.061780696809955,
+      "learning_rate": 0.003,
+      "loss": 3.9927,
+      "step": 10669
+    },
+    {
+      "epoch": 0.1067,
+      "grad_norm": 1.1903387328335544,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 10670
+    },
+    {
+      "epoch": 0.10671,
+      "grad_norm": 1.0301208869856568,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 10671
+    },
+    {
+      "epoch": 0.10672,
+      "grad_norm": 1.226842299928559,
+      "learning_rate": 0.003,
+      "loss": 4.0352,
+      "step": 10672
+    },
+    {
+      "epoch": 0.10673,
+      "grad_norm": 1.1866348712385926,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 10673
+    },
+    {
+      "epoch": 0.10674,
+      "grad_norm": 1.0069430418737448,
+      "learning_rate": 0.003,
+      "loss": 4.0109,
+      "step": 10674
+    },
+    {
+      "epoch": 0.10675,
+      "grad_norm": 1.2985241581711995,
+      "learning_rate": 0.003,
+      "loss": 4.0554,
+      "step": 10675
+    },
+    {
+      "epoch": 0.10676,
+      "grad_norm": 1.0289286257118662,
+      "learning_rate": 0.003,
+      "loss": 4.0423,
+      "step": 10676
+    },
+    {
+      "epoch": 0.10677,
+      "grad_norm": 1.1701123709616597,
+      "learning_rate": 0.003,
+      "loss": 3.9978,
+      "step": 10677
+    },
+    {
+      "epoch": 0.10678,
+      "grad_norm": 1.1718815214063174,
+      "learning_rate": 0.003,
+      "loss": 4.0251,
+      "step": 10678
+    },
+    {
+      "epoch": 0.10679,
+      "grad_norm": 1.2943084912685583,
+      "learning_rate": 0.003,
+      "loss": 4.0276,
+      "step": 10679
+    },
+    {
+      "epoch": 0.1068,
+      "grad_norm": 1.1957684694159134,
+      "learning_rate": 0.003,
+      "loss": 4.0526,
+      "step": 10680
+    },
+    {
+      "epoch": 0.10681,
+      "grad_norm": 1.0034264649008786,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 10681
+    },
+    {
+      "epoch": 0.10682,
+      "grad_norm": 1.2761049621546057,
+      "learning_rate": 0.003,
+      "loss": 4.0534,
+      "step": 10682
+    },
+    {
+      "epoch": 0.10683,
+      "grad_norm": 1.0598055696545081,
+      "learning_rate": 0.003,
+      "loss": 4.0326,
+      "step": 10683
+    },
+    {
+      "epoch": 0.10684,
+      "grad_norm": 1.218444770158993,
+      "learning_rate": 0.003,
+      "loss": 4.0367,
+      "step": 10684
+    },
+    {
+      "epoch": 0.10685,
+      "grad_norm": 0.9258871795824707,
+      "learning_rate": 0.003,
+      "loss": 4.0452,
+      "step": 10685
+    },
+    {
+      "epoch": 0.10686,
+      "grad_norm": 1.133386229580556,
+      "learning_rate": 0.003,
+      "loss": 4.0377,
+      "step": 10686
+    },
+    {
+      "epoch": 0.10687,
+      "grad_norm": 1.2653062418463612,
+      "learning_rate": 0.003,
+      "loss": 4.0331,
+      "step": 10687
+    },
+    {
+      "epoch": 0.10688,
+      "grad_norm": 1.184995544981333,
+      "learning_rate": 0.003,
+      "loss": 4.0387,
+      "step": 10688
+    },
+    {
+      "epoch": 0.10689,
+      "grad_norm": 1.0320005457605674,
+      "learning_rate": 0.003,
+      "loss": 4.0263,
+      "step": 10689
+    },
+    {
+      "epoch": 0.1069,
+      "grad_norm": 1.2038777888203986,
+      "learning_rate": 0.003,
+      "loss": 4.0366,
+      "step": 10690
+    },
+    {
+      "epoch": 0.10691,
+      "grad_norm": 1.129676406607381,
+      "learning_rate": 0.003,
+      "loss": 4.0622,
+      "step": 10691
+    },
+    {
+      "epoch": 0.10692,
+      "grad_norm": 1.3575951971898041,
+      "learning_rate": 0.003,
+      "loss": 4.0376,
+      "step": 10692
+    },
+    {
+      "epoch": 0.10693,
+      "grad_norm": 1.1196525664794985,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 10693
+    },
+    {
+      "epoch": 0.10694,
+      "grad_norm": 1.215242402090735,
+      "learning_rate": 0.003,
+      "loss": 4.0282,
+      "step": 10694
+    },
+    {
+      "epoch": 0.10695,
+      "grad_norm": 1.013327742595491,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 10695
+    },
+    {
+      "epoch": 0.10696,
+      "grad_norm": 1.095537976839923,
+      "learning_rate": 0.003,
+      "loss": 4.0409,
+      "step": 10696
+    },
+    {
+      "epoch": 0.10697,
+      "grad_norm": 1.2129639377799921,
+      "learning_rate": 0.003,
+      "loss": 4.0414,
+      "step": 10697
+    },
+    {
+      "epoch": 0.10698,
+      "grad_norm": 1.1276205907442978,
+      "learning_rate": 0.003,
+      "loss": 4.0463,
+      "step": 10698
+    },
+    {
+      "epoch": 0.10699,
+      "grad_norm": 1.4057292020168137,
+      "learning_rate": 0.003,
+      "loss": 4.0678,
+      "step": 10699
+    },
+    {
+      "epoch": 0.107,
+      "grad_norm": 1.0889453831761258,
+      "learning_rate": 0.003,
+      "loss": 4.0545,
+      "step": 10700
+    },
+    {
+      "epoch": 0.10701,
+      "grad_norm": 1.2636981543117896,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 10701
+    },
+    {
+      "epoch": 0.10702,
+      "grad_norm": 1.0444226985618574,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 10702
+    },
+    {
+      "epoch": 0.10703,
+      "grad_norm": 1.2951500282214232,
+      "learning_rate": 0.003,
+      "loss": 4.0465,
+      "step": 10703
+    },
+    {
+      "epoch": 0.10704,
+      "grad_norm": 1.0393505702059902,
+      "learning_rate": 0.003,
+      "loss": 4.0307,
+      "step": 10704
+    },
+    {
+      "epoch": 0.10705,
+      "grad_norm": 1.1797188685102178,
+      "learning_rate": 0.003,
+      "loss": 4.037,
+      "step": 10705
+    },
+    {
+      "epoch": 0.10706,
+      "grad_norm": 1.1975777074687664,
+      "learning_rate": 0.003,
+      "loss": 4.0504,
+      "step": 10706
+    },
+    {
+      "epoch": 0.10707,
+      "grad_norm": 1.194075879308646,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 10707
+    },
+    {
+      "epoch": 0.10708,
+      "grad_norm": 1.0102354176185093,
+      "learning_rate": 0.003,
+      "loss": 4.0426,
+      "step": 10708
+    },
+    {
+      "epoch": 0.10709,
+      "grad_norm": 1.3131806649777147,
+      "learning_rate": 0.003,
+      "loss": 4.0477,
+      "step": 10709
+    },
+    {
+      "epoch": 0.1071,
+      "grad_norm": 1.2373109858890716,
+      "learning_rate": 0.003,
+      "loss": 4.0307,
+      "step": 10710
+    },
+    {
+      "epoch": 0.10711,
+      "grad_norm": 1.2003702638346156,
+      "learning_rate": 0.003,
+      "loss": 4.0403,
+      "step": 10711
+    },
+    {
+      "epoch": 0.10712,
+      "grad_norm": 0.9983255919354295,
+      "learning_rate": 0.003,
+      "loss": 4.0523,
+      "step": 10712
+    },
+    {
+      "epoch": 0.10713,
+      "grad_norm": 0.9877113113359729,
+      "learning_rate": 0.003,
+      "loss": 4.0272,
+      "step": 10713
+    },
+    {
+      "epoch": 0.10714,
+      "grad_norm": 1.3460252903807608,
+      "learning_rate": 0.003,
+      "loss": 4.0573,
+      "step": 10714
+    },
+    {
+      "epoch": 0.10715,
+      "grad_norm": 0.9965502362486884,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 10715
+    },
+    {
+      "epoch": 0.10716,
+      "grad_norm": 1.2725890037766499,
+      "learning_rate": 0.003,
+      "loss": 4.0469,
+      "step": 10716
+    },
+    {
+      "epoch": 0.10717,
+      "grad_norm": 1.1113072976703275,
+      "learning_rate": 0.003,
+      "loss": 4.0389,
+      "step": 10717
+    },
+    {
+      "epoch": 0.10718,
+      "grad_norm": 1.0100186651689251,
+      "learning_rate": 0.003,
+      "loss": 3.9967,
+      "step": 10718
+    },
+    {
+      "epoch": 0.10719,
+      "grad_norm": 1.0640701601768137,
+      "learning_rate": 0.003,
+      "loss": 4.0508,
+      "step": 10719
+    },
+    {
+      "epoch": 0.1072,
+      "grad_norm": 1.0593061211004513,
+      "learning_rate": 0.003,
+      "loss": 4.0667,
+      "step": 10720
+    },
+    {
+      "epoch": 0.10721,
+      "grad_norm": 1.3099431143363085,
+      "learning_rate": 0.003,
+      "loss": 4.0442,
+      "step": 10721
+    },
+    {
+      "epoch": 0.10722,
+      "grad_norm": 0.9809915726075293,
+      "learning_rate": 0.003,
+      "loss": 4.016,
+      "step": 10722
+    },
+    {
+      "epoch": 0.10723,
+      "grad_norm": 1.3406390414189182,
+      "learning_rate": 0.003,
+      "loss": 4.0412,
+      "step": 10723
+    },
+    {
+      "epoch": 0.10724,
+      "grad_norm": 1.1349839036395613,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 10724
+    },
+    {
+      "epoch": 0.10725,
+      "grad_norm": 1.1603735826420933,
+      "learning_rate": 0.003,
+      "loss": 4.0052,
+      "step": 10725
+    },
+    {
+      "epoch": 0.10726,
+      "grad_norm": 1.1306634640881634,
+      "learning_rate": 0.003,
+      "loss": 4.0504,
+      "step": 10726
+    },
+    {
+      "epoch": 0.10727,
+      "grad_norm": 1.0401524644138374,
+      "learning_rate": 0.003,
+      "loss": 4.0379,
+      "step": 10727
+    },
+    {
+      "epoch": 0.10728,
+      "grad_norm": 1.2861473387032047,
+      "learning_rate": 0.003,
+      "loss": 4.0525,
+      "step": 10728
+    },
+    {
+      "epoch": 0.10729,
+      "grad_norm": 0.8700171921619617,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 10729
+    },
+    {
+      "epoch": 0.1073,
+      "grad_norm": 1.1494268855327163,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 10730
+    },
+    {
+      "epoch": 0.10731,
+      "grad_norm": 1.0235158899515824,
+      "learning_rate": 0.003,
+      "loss": 4.0216,
+      "step": 10731
+    },
+    {
+      "epoch": 0.10732,
+      "grad_norm": 1.3171688814462412,
+      "learning_rate": 0.003,
+      "loss": 4.0469,
+      "step": 10732
+    },
+    {
+      "epoch": 0.10733,
+      "grad_norm": 1.1618346197242664,
+      "learning_rate": 0.003,
+      "loss": 4.0456,
+      "step": 10733
+    },
+    {
+      "epoch": 0.10734,
+      "grad_norm": 1.299839347131802,
+      "learning_rate": 0.003,
+      "loss": 4.0321,
+      "step": 10734
+    },
+    {
+      "epoch": 0.10735,
+      "grad_norm": 0.9596941109018636,
+      "learning_rate": 0.003,
+      "loss": 4.0353,
+      "step": 10735
+    },
+    {
+      "epoch": 0.10736,
+      "grad_norm": 1.047297478533331,
+      "learning_rate": 0.003,
+      "loss": 3.9969,
+      "step": 10736
+    },
+    {
+      "epoch": 0.10737,
+      "grad_norm": 1.0623586974481836,
+      "learning_rate": 0.003,
+      "loss": 4.0269,
+      "step": 10737
+    },
+    {
+      "epoch": 0.10738,
+      "grad_norm": 0.9993600202006488,
+      "learning_rate": 0.003,
+      "loss": 4.0128,
+      "step": 10738
+    },
+    {
+      "epoch": 0.10739,
+      "grad_norm": 1.254811328838207,
+      "learning_rate": 0.003,
+      "loss": 4.0349,
+      "step": 10739
+    },
+    {
+      "epoch": 0.1074,
+      "grad_norm": 1.02443107388267,
+      "learning_rate": 0.003,
+      "loss": 4.0162,
+      "step": 10740
+    },
+    {
+      "epoch": 0.10741,
+      "grad_norm": 1.053670941966742,
+      "learning_rate": 0.003,
+      "loss": 4.0419,
+      "step": 10741
+    },
+    {
+      "epoch": 0.10742,
+      "grad_norm": 1.3054887606936714,
+      "learning_rate": 0.003,
+      "loss": 4.0291,
+      "step": 10742
+    },
+    {
+      "epoch": 0.10743,
+      "grad_norm": 1.1815464442351478,
+      "learning_rate": 0.003,
+      "loss": 4.0526,
+      "step": 10743
+    },
+    {
+      "epoch": 0.10744,
+      "grad_norm": 1.0439503530541596,
+      "learning_rate": 0.003,
+      "loss": 4.0257,
+      "step": 10744
+    },
+    {
+      "epoch": 0.10745,
+      "grad_norm": 1.2910029674176995,
+      "learning_rate": 0.003,
+      "loss": 4.0282,
+      "step": 10745
+    },
+    {
+      "epoch": 0.10746,
+      "grad_norm": 1.1171478726339876,
+      "learning_rate": 0.003,
+      "loss": 4.0289,
+      "step": 10746
+    },
+    {
+      "epoch": 0.10747,
+      "grad_norm": 1.316025255296466,
+      "learning_rate": 0.003,
+      "loss": 4.0397,
+      "step": 10747
+    },
+    {
+      "epoch": 0.10748,
+      "grad_norm": 1.0185714857385384,
+      "learning_rate": 0.003,
+      "loss": 4.0551,
+      "step": 10748
+    },
+    {
+      "epoch": 0.10749,
+      "grad_norm": 1.2319455923474587,
+      "learning_rate": 0.003,
+      "loss": 4.0415,
+      "step": 10749
+    },
+    {
+      "epoch": 0.1075,
+      "grad_norm": 1.0698239902071434,
+      "learning_rate": 0.003,
+      "loss": 4.0456,
+      "step": 10750
+    },
+    {
+      "epoch": 0.10751,
+      "grad_norm": 1.1767304687464153,
+      "learning_rate": 0.003,
+      "loss": 4.0584,
+      "step": 10751
+    },
+    {
+      "epoch": 0.10752,
+      "grad_norm": 1.200346526551157,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 10752
+    },
+    {
+      "epoch": 0.10753,
+      "grad_norm": 0.8004910788946202,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 10753
+    },
+    {
+      "epoch": 0.10754,
+      "grad_norm": 0.9299169462294767,
+      "learning_rate": 0.003,
+      "loss": 4.0251,
+      "step": 10754
+    },
+    {
+      "epoch": 0.10755,
+      "grad_norm": 1.1496170197907911,
+      "learning_rate": 0.003,
+      "loss": 4.0447,
+      "step": 10755
+    },
+    {
+      "epoch": 0.10756,
+      "grad_norm": 1.3269009046033973,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 10756
+    },
+    {
+      "epoch": 0.10757,
+      "grad_norm": 1.01836707032746,
+      "learning_rate": 0.003,
+      "loss": 4.0339,
+      "step": 10757
+    },
+    {
+      "epoch": 0.10758,
+      "grad_norm": 1.4360512920311785,
+      "learning_rate": 0.003,
+      "loss": 4.0439,
+      "step": 10758
+    },
+    {
+      "epoch": 0.10759,
+      "grad_norm": 0.8198014966019104,
+      "learning_rate": 0.003,
+      "loss": 3.997,
+      "step": 10759
+    },
+    {
+      "epoch": 0.1076,
+      "grad_norm": 0.9805123439382194,
+      "learning_rate": 0.003,
+      "loss": 4.033,
+      "step": 10760
+    },
+    {
+      "epoch": 0.10761,
+      "grad_norm": 1.2404719998449543,
+      "learning_rate": 0.003,
+      "loss": 4.0294,
+      "step": 10761
+    },
+    {
+      "epoch": 0.10762,
+      "grad_norm": 1.100158666709582,
+      "learning_rate": 0.003,
+      "loss": 4.0441,
+      "step": 10762
+    },
+    {
+      "epoch": 0.10763,
+      "grad_norm": 1.255637890490154,
+      "learning_rate": 0.003,
+      "loss": 4.0298,
+      "step": 10763
+    },
+    {
+      "epoch": 0.10764,
+      "grad_norm": 1.0281623834755365,
+      "learning_rate": 0.003,
+      "loss": 4.0375,
+      "step": 10764
+    },
+    {
+      "epoch": 0.10765,
+      "grad_norm": 1.4149964948090226,
+      "learning_rate": 0.003,
+      "loss": 4.038,
+      "step": 10765
+    },
+    {
+      "epoch": 0.10766,
+      "grad_norm": 0.9836393890872247,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 10766
+    },
+    {
+      "epoch": 0.10767,
+      "grad_norm": 1.2741151461584197,
+      "learning_rate": 0.003,
+      "loss": 4.0314,
+      "step": 10767
+    },
+    {
+      "epoch": 0.10768,
+      "grad_norm": 1.1238950688453537,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 10768
+    },
+    {
+      "epoch": 0.10769,
+      "grad_norm": 1.0948648776611019,
+      "learning_rate": 0.003,
+      "loss": 4.0292,
+      "step": 10769
+    },
+    {
+      "epoch": 0.1077,
+      "grad_norm": 1.201055603123503,
+      "learning_rate": 0.003,
+      "loss": 4.0401,
+      "step": 10770
+    },
+    {
+      "epoch": 0.10771,
+      "grad_norm": 1.0193171022277756,
+      "learning_rate": 0.003,
+      "loss": 4.0383,
+      "step": 10771
+    },
+    {
+      "epoch": 0.10772,
+      "grad_norm": 1.3771536426901734,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 10772
+    },
+    {
+      "epoch": 0.10773,
+      "grad_norm": 1.036422327432776,
+      "learning_rate": 0.003,
+      "loss": 4.0361,
+      "step": 10773
+    },
+    {
+      "epoch": 0.10774,
+      "grad_norm": 1.3787602954979676,
+      "learning_rate": 0.003,
+      "loss": 4.0463,
+      "step": 10774
+    },
+    {
+      "epoch": 0.10775,
+      "grad_norm": 0.9876267992932676,
+      "learning_rate": 0.003,
+      "loss": 4.0442,
+      "step": 10775
+    },
+    {
+      "epoch": 0.10776,
+      "grad_norm": 1.2287074239953188,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 10776
+    },
+    {
+      "epoch": 0.10777,
+      "grad_norm": 1.1159465296748252,
+      "learning_rate": 0.003,
+      "loss": 4.0378,
+      "step": 10777
+    },
+    {
+      "epoch": 0.10778,
+      "grad_norm": 1.2143634624449668,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 10778
+    },
+    {
+      "epoch": 0.10779,
+      "grad_norm": 0.8371084824202994,
+      "learning_rate": 0.003,
+      "loss": 3.9984,
+      "step": 10779
+    },
+    {
+      "epoch": 0.1078,
+      "grad_norm": 1.0448266717192551,
+      "learning_rate": 0.003,
+      "loss": 4.0455,
+      "step": 10780
+    },
+    {
+      "epoch": 0.10781,
+      "grad_norm": 1.339724186849472,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 10781
+    },
+    {
+      "epoch": 0.10782,
+      "grad_norm": 0.9542629472880868,
+      "learning_rate": 0.003,
+      "loss": 4.0245,
+      "step": 10782
+    },
+    {
+      "epoch": 0.10783,
+      "grad_norm": 1.1863204682968913,
+      "learning_rate": 0.003,
+      "loss": 4.0301,
+      "step": 10783
+    },
+    {
+      "epoch": 0.10784,
+      "grad_norm": 1.4462798896316145,
+      "learning_rate": 0.003,
+      "loss": 4.0547,
+      "step": 10784
+    },
+    {
+      "epoch": 0.10785,
+      "grad_norm": 1.0951546615964138,
+      "learning_rate": 0.003,
+      "loss": 4.0329,
+      "step": 10785
+    },
+    {
+      "epoch": 0.10786,
+      "grad_norm": 1.296430995678855,
+      "learning_rate": 0.003,
+      "loss": 4.0492,
+      "step": 10786
+    },
+    {
+      "epoch": 0.10787,
+      "grad_norm": 0.8997746711593995,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 10787
+    },
+    {
+      "epoch": 0.10788,
+      "grad_norm": 1.170708369493236,
+      "learning_rate": 0.003,
+      "loss": 4.0215,
+      "step": 10788
+    },
+    {
+      "epoch": 0.10789,
+      "grad_norm": 1.1961220945918547,
+      "learning_rate": 0.003,
+      "loss": 3.981,
+      "step": 10789
+    },
+    {
+      "epoch": 0.1079,
+      "grad_norm": 1.2476033414148846,
+      "learning_rate": 0.003,
+      "loss": 4.0461,
+      "step": 10790
+    },
+    {
+      "epoch": 0.10791,
+      "grad_norm": 1.3938502880572137,
+      "learning_rate": 0.003,
+      "loss": 4.0531,
+      "step": 10791
+    },
+    {
+      "epoch": 0.10792,
+      "grad_norm": 0.8287063955222098,
+      "learning_rate": 0.003,
+      "loss": 4.0082,
+      "step": 10792
+    },
+    {
+      "epoch": 0.10793,
+      "grad_norm": 0.9968889789622127,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 10793
+    },
+    {
+      "epoch": 0.10794,
+      "grad_norm": 1.2636523367148476,
+      "learning_rate": 0.003,
+      "loss": 4.0512,
+      "step": 10794
+    },
+    {
+      "epoch": 0.10795,
+      "grad_norm": 0.9320188951631827,
+      "learning_rate": 0.003,
+      "loss": 3.9924,
+      "step": 10795
+    },
+    {
+      "epoch": 0.10796,
+      "grad_norm": 1.13871483259714,
+      "learning_rate": 0.003,
+      "loss": 4.0237,
+      "step": 10796
+    },
+    {
+      "epoch": 0.10797,
+      "grad_norm": 1.1430569400531418,
+      "learning_rate": 0.003,
+      "loss": 4.0057,
+      "step": 10797
+    },
+    {
+      "epoch": 0.10798,
+      "grad_norm": 1.2223707770106316,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 10798
+    },
+    {
+      "epoch": 0.10799,
+      "grad_norm": 1.0797391466920707,
+      "learning_rate": 0.003,
+      "loss": 4.0007,
+      "step": 10799
+    },
+    {
+      "epoch": 0.108,
+      "grad_norm": 1.5331772300169273,
+      "learning_rate": 0.003,
+      "loss": 4.0505,
+      "step": 10800
+    },
+    {
+      "epoch": 0.10801,
+      "grad_norm": 0.8462632416981025,
+      "learning_rate": 0.003,
+      "loss": 4.0265,
+      "step": 10801
+    },
+    {
+      "epoch": 0.10802,
+      "grad_norm": 1.1323280804246525,
+      "learning_rate": 0.003,
+      "loss": 4.0288,
+      "step": 10802
+    },
+    {
+      "epoch": 0.10803,
+      "grad_norm": 1.2472360350753866,
+      "learning_rate": 0.003,
+      "loss": 4.0445,
+      "step": 10803
+    },
+    {
+      "epoch": 0.10804,
+      "grad_norm": 1.0221991986540284,
+      "learning_rate": 0.003,
+      "loss": 4.0338,
+      "step": 10804
+    },
+    {
+      "epoch": 0.10805,
+      "grad_norm": 1.193633649724736,
+      "learning_rate": 0.003,
+      "loss": 4.0289,
+      "step": 10805
+    },
+    {
+      "epoch": 0.10806,
+      "grad_norm": 1.128446301055873,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 10806
+    },
+    {
+      "epoch": 0.10807,
+      "grad_norm": 1.3429005701975214,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 10807
+    },
+    {
+      "epoch": 0.10808,
+      "grad_norm": 1.1916229558274094,
+      "learning_rate": 0.003,
+      "loss": 4.1095,
+      "step": 10808
+    },
+    {
+      "epoch": 0.10809,
+      "grad_norm": 1.1666311030390335,
+      "learning_rate": 0.003,
+      "loss": 4.0351,
+      "step": 10809
+    },
+    {
+      "epoch": 0.1081,
+      "grad_norm": 1.0948133461800402,
+      "learning_rate": 0.003,
+      "loss": 4.0299,
+      "step": 10810
+    },
+    {
+      "epoch": 0.10811,
+      "grad_norm": 1.070956586984476,
+      "learning_rate": 0.003,
+      "loss": 4.0541,
+      "step": 10811
+    },
+    {
+      "epoch": 0.10812,
+      "grad_norm": 1.1405962531093388,
+      "learning_rate": 0.003,
+      "loss": 4.0003,
+      "step": 10812
+    },
+    {
+      "epoch": 0.10813,
+      "grad_norm": 1.1923710781482288,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 10813
+    },
+    {
+      "epoch": 0.10814,
+      "grad_norm": 0.9637789414068072,
+      "learning_rate": 0.003,
+      "loss": 4.0237,
+      "step": 10814
+    },
+    {
+      "epoch": 0.10815,
+      "grad_norm": 1.1303684936509142,
+      "learning_rate": 0.003,
+      "loss": 4.033,
+      "step": 10815
+    },
+    {
+      "epoch": 0.10816,
+      "grad_norm": 1.1277334351478485,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 10816
+    },
+    {
+      "epoch": 0.10817,
+      "grad_norm": 1.394920688077164,
+      "learning_rate": 0.003,
+      "loss": 4.036,
+      "step": 10817
+    },
+    {
+      "epoch": 0.10818,
+      "grad_norm": 0.9247339438137491,
+      "learning_rate": 0.003,
+      "loss": 4.0244,
+      "step": 10818
+    },
+    {
+      "epoch": 0.10819,
+      "grad_norm": 1.2827723765119825,
+      "learning_rate": 0.003,
+      "loss": 4.0319,
+      "step": 10819
+    },
+    {
+      "epoch": 0.1082,
+      "grad_norm": 1.1027026712478345,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 10820
+    },
+    {
+      "epoch": 0.10821,
+      "grad_norm": 1.130402720324394,
+      "learning_rate": 0.003,
+      "loss": 3.9894,
+      "step": 10821
+    },
+    {
+      "epoch": 0.10822,
+      "grad_norm": 1.1822569814525696,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 10822
+    },
+    {
+      "epoch": 0.10823,
+      "grad_norm": 1.3160765140161788,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 10823
+    },
+    {
+      "epoch": 0.10824,
+      "grad_norm": 1.2344419298288998,
+      "learning_rate": 0.003,
+      "loss": 4.0384,
+      "step": 10824
+    },
+    {
+      "epoch": 0.10825,
+      "grad_norm": 0.9804550860424565,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 10825
+    },
+    {
+      "epoch": 0.10826,
+      "grad_norm": 1.2393166369862887,
+      "learning_rate": 0.003,
+      "loss": 3.9966,
+      "step": 10826
+    },
+    {
+      "epoch": 0.10827,
+      "grad_norm": 1.1285558425268936,
+      "learning_rate": 0.003,
+      "loss": 4.0587,
+      "step": 10827
+    },
+    {
+      "epoch": 0.10828,
+      "grad_norm": 1.2190487151901024,
+      "learning_rate": 0.003,
+      "loss": 4.0677,
+      "step": 10828
+    },
+    {
+      "epoch": 0.10829,
+      "grad_norm": 1.0051177849528747,
+      "learning_rate": 0.003,
+      "loss": 4.0089,
+      "step": 10829
+    },
+    {
+      "epoch": 0.1083,
+      "grad_norm": 1.2475552966341636,
+      "learning_rate": 0.003,
+      "loss": 4.0468,
+      "step": 10830
+    },
+    {
+      "epoch": 0.10831,
+      "grad_norm": 1.086747469310628,
+      "learning_rate": 0.003,
+      "loss": 4.0198,
+      "step": 10831
+    },
+    {
+      "epoch": 0.10832,
+      "grad_norm": 1.2521787369584434,
+      "learning_rate": 0.003,
+      "loss": 4.0529,
+      "step": 10832
+    },
+    {
+      "epoch": 0.10833,
+      "grad_norm": 1.1044210264497303,
+      "learning_rate": 0.003,
+      "loss": 4.0245,
+      "step": 10833
+    },
+    {
+      "epoch": 0.10834,
+      "grad_norm": 1.121459039289483,
+      "learning_rate": 0.003,
+      "loss": 3.988,
+      "step": 10834
+    },
+    {
+      "epoch": 0.10835,
+      "grad_norm": 1.1128708174680606,
+      "learning_rate": 0.003,
+      "loss": 4.0194,
+      "step": 10835
+    },
+    {
+      "epoch": 0.10836,
+      "grad_norm": 1.5980637509276823,
+      "learning_rate": 0.003,
+      "loss": 4.0446,
+      "step": 10836
+    },
+    {
+      "epoch": 0.10837,
+      "grad_norm": 1.2324712817974364,
+      "learning_rate": 0.003,
+      "loss": 4.038,
+      "step": 10837
+    },
+    {
+      "epoch": 0.10838,
+      "grad_norm": 1.0657770844010468,
+      "learning_rate": 0.003,
+      "loss": 4.0489,
+      "step": 10838
+    },
+    {
+      "epoch": 0.10839,
+      "grad_norm": 1.0927040171769402,
+      "learning_rate": 0.003,
+      "loss": 4.0296,
+      "step": 10839
+    },
+    {
+      "epoch": 0.1084,
+      "grad_norm": 1.1359360509882592,
+      "learning_rate": 0.003,
+      "loss": 4.0499,
+      "step": 10840
+    },
+    {
+      "epoch": 0.10841,
+      "grad_norm": 0.903405768475979,
+      "learning_rate": 0.003,
+      "loss": 4.0259,
+      "step": 10841
+    },
+    {
+      "epoch": 0.10842,
+      "grad_norm": 1.227696001765903,
+      "learning_rate": 0.003,
+      "loss": 4.046,
+      "step": 10842
+    },
+    {
+      "epoch": 0.10843,
+      "grad_norm": 1.2265984387203541,
+      "learning_rate": 0.003,
+      "loss": 4.031,
+      "step": 10843
+    },
+    {
+      "epoch": 0.10844,
+      "grad_norm": 1.2973674659059795,
+      "learning_rate": 0.003,
+      "loss": 4.0612,
+      "step": 10844
+    },
+    {
+      "epoch": 0.10845,
+      "grad_norm": 1.0539275266266204,
+      "learning_rate": 0.003,
+      "loss": 4.0486,
+      "step": 10845
+    },
+    {
+      "epoch": 0.10846,
+      "grad_norm": 1.2771787744436187,
+      "learning_rate": 0.003,
+      "loss": 4.0292,
+      "step": 10846
+    },
+    {
+      "epoch": 0.10847,
+      "grad_norm": 1.1019062809673246,
+      "learning_rate": 0.003,
+      "loss": 4.0469,
+      "step": 10847
+    },
+    {
+      "epoch": 0.10848,
+      "grad_norm": 1.170952800012063,
+      "learning_rate": 0.003,
+      "loss": 3.9984,
+      "step": 10848
+    },
+    {
+      "epoch": 0.10849,
+      "grad_norm": 1.199786332475287,
+      "learning_rate": 0.003,
+      "loss": 4.0089,
+      "step": 10849
+    },
+    {
+      "epoch": 0.1085,
+      "grad_norm": 1.0400770912095054,
+      "learning_rate": 0.003,
+      "loss": 4.0171,
+      "step": 10850
+    },
+    {
+      "epoch": 0.10851,
+      "grad_norm": 1.169933869139355,
+      "learning_rate": 0.003,
+      "loss": 4.0357,
+      "step": 10851
+    },
+    {
+      "epoch": 0.10852,
+      "grad_norm": 1.134282690971657,
+      "learning_rate": 0.003,
+      "loss": 4.0391,
+      "step": 10852
+    },
+    {
+      "epoch": 0.10853,
+      "grad_norm": 1.1214683357708541,
+      "learning_rate": 0.003,
+      "loss": 4.0425,
+      "step": 10853
+    },
+    {
+      "epoch": 0.10854,
+      "grad_norm": 1.1718280449767655,
+      "learning_rate": 0.003,
+      "loss": 4.0051,
+      "step": 10854
+    },
+    {
+      "epoch": 0.10855,
+      "grad_norm": 1.3835381568865763,
+      "learning_rate": 0.003,
+      "loss": 4.0154,
+      "step": 10855
+    },
+    {
+      "epoch": 0.10856,
+      "grad_norm": 0.8314414595792815,
+      "learning_rate": 0.003,
+      "loss": 4.0299,
+      "step": 10856
+    },
+    {
+      "epoch": 0.10857,
+      "grad_norm": 0.9164952496161265,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 10857
+    },
+    {
+      "epoch": 0.10858,
+      "grad_norm": 1.1610451470170327,
+      "learning_rate": 0.003,
+      "loss": 4.0358,
+      "step": 10858
+    },
+    {
+      "epoch": 0.10859,
+      "grad_norm": 1.1965855517444213,
+      "learning_rate": 0.003,
+      "loss": 4.0111,
+      "step": 10859
+    },
+    {
+      "epoch": 0.1086,
+      "grad_norm": 1.128342345854473,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 10860
+    },
+    {
+      "epoch": 0.10861,
+      "grad_norm": 1.2002004859159896,
+      "learning_rate": 0.003,
+      "loss": 4.0459,
+      "step": 10861
+    },
+    {
+      "epoch": 0.10862,
+      "grad_norm": 1.2403320918171405,
+      "learning_rate": 0.003,
+      "loss": 4.0751,
+      "step": 10862
+    },
+    {
+      "epoch": 0.10863,
+      "grad_norm": 0.9972196835269065,
+      "learning_rate": 0.003,
+      "loss": 4.0403,
+      "step": 10863
+    },
+    {
+      "epoch": 0.10864,
+      "grad_norm": 1.387585591578004,
+      "learning_rate": 0.003,
+      "loss": 4.0297,
+      "step": 10864
+    },
+    {
+      "epoch": 0.10865,
+      "grad_norm": 0.9421221076213664,
+      "learning_rate": 0.003,
+      "loss": 4.0349,
+      "step": 10865
+    },
+    {
+      "epoch": 0.10866,
+      "grad_norm": 1.1948737529367885,
+      "learning_rate": 0.003,
+      "loss": 4.0402,
+      "step": 10866
+    },
+    {
+      "epoch": 0.10867,
+      "grad_norm": 1.022212784674362,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 10867
+    },
+    {
+      "epoch": 0.10868,
+      "grad_norm": 1.2732584760819687,
+      "learning_rate": 0.003,
+      "loss": 4.0091,
+      "step": 10868
+    },
+    {
+      "epoch": 0.10869,
+      "grad_norm": 1.1047154277724311,
+      "learning_rate": 0.003,
+      "loss": 4.0306,
+      "step": 10869
+    },
+    {
+      "epoch": 0.1087,
+      "grad_norm": 1.1854424862823498,
+      "learning_rate": 0.003,
+      "loss": 4.0376,
+      "step": 10870
+    },
+    {
+      "epoch": 0.10871,
+      "grad_norm": 1.1741520672533723,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 10871
+    },
+    {
+      "epoch": 0.10872,
+      "grad_norm": 1.0480602065379063,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 10872
+    },
+    {
+      "epoch": 0.10873,
+      "grad_norm": 1.2571567165899944,
+      "learning_rate": 0.003,
+      "loss": 4.0374,
+      "step": 10873
+    },
+    {
+      "epoch": 0.10874,
+      "grad_norm": 1.337309331111073,
+      "learning_rate": 0.003,
+      "loss": 4.0266,
+      "step": 10874
+    },
+    {
+      "epoch": 0.10875,
+      "grad_norm": 1.2565095414668002,
+      "learning_rate": 0.003,
+      "loss": 4.0504,
+      "step": 10875
+    },
+    {
+      "epoch": 0.10876,
+      "grad_norm": 0.9744590907381745,
+      "learning_rate": 0.003,
+      "loss": 3.9856,
+      "step": 10876
+    },
+    {
+      "epoch": 0.10877,
+      "grad_norm": 1.3835777093829644,
+      "learning_rate": 0.003,
+      "loss": 4.0207,
+      "step": 10877
+    },
+    {
+      "epoch": 0.10878,
+      "grad_norm": 0.9441806514467737,
+      "learning_rate": 0.003,
+      "loss": 4.0164,
+      "step": 10878
+    },
+    {
+      "epoch": 0.10879,
+      "grad_norm": 1.1451762360336781,
+      "learning_rate": 0.003,
+      "loss": 4.0325,
+      "step": 10879
+    },
+    {
+      "epoch": 0.1088,
+      "grad_norm": 1.1541770622613978,
+      "learning_rate": 0.003,
+      "loss": 4.0177,
+      "step": 10880
+    },
+    {
+      "epoch": 0.10881,
+      "grad_norm": 1.1458016315587218,
+      "learning_rate": 0.003,
+      "loss": 4.0009,
+      "step": 10881
+    },
+    {
+      "epoch": 0.10882,
+      "grad_norm": 1.070149396095645,
+      "learning_rate": 0.003,
+      "loss": 4.0234,
+      "step": 10882
+    },
+    {
+      "epoch": 0.10883,
+      "grad_norm": 1.177388796693508,
+      "learning_rate": 0.003,
+      "loss": 4.0449,
+      "step": 10883
+    },
+    {
+      "epoch": 0.10884,
+      "grad_norm": 1.097237368443832,
+      "learning_rate": 0.003,
+      "loss": 4.03,
+      "step": 10884
+    },
+    {
+      "epoch": 0.10885,
+      "grad_norm": 1.200352068902575,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 10885
+    },
+    {
+      "epoch": 0.10886,
+      "grad_norm": 1.104974642016532,
+      "learning_rate": 0.003,
+      "loss": 4.0059,
+      "step": 10886
+    },
+    {
+      "epoch": 0.10887,
+      "grad_norm": 1.2279008727498621,
+      "learning_rate": 0.003,
+      "loss": 4.0501,
+      "step": 10887
+    },
+    {
+      "epoch": 0.10888,
+      "grad_norm": 1.202420390338381,
+      "learning_rate": 0.003,
+      "loss": 3.9994,
+      "step": 10888
+    },
+    {
+      "epoch": 0.10889,
+      "grad_norm": 1.0304295601558235,
+      "learning_rate": 0.003,
+      "loss": 4.0047,
+      "step": 10889
+    },
+    {
+      "epoch": 0.1089,
+      "grad_norm": 1.3568156975479342,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 10890
+    },
+    {
+      "epoch": 0.10891,
+      "grad_norm": 1.106559013336969,
+      "learning_rate": 0.003,
+      "loss": 4.0367,
+      "step": 10891
+    },
+    {
+      "epoch": 0.10892,
+      "grad_norm": 1.175561573120817,
+      "learning_rate": 0.003,
+      "loss": 4.0302,
+      "step": 10892
+    },
+    {
+      "epoch": 0.10893,
+      "grad_norm": 1.02974471285148,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 10893
+    },
+    {
+      "epoch": 0.10894,
+      "grad_norm": 1.186667566372747,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 10894
+    },
+    {
+      "epoch": 0.10895,
+      "grad_norm": 0.9317722895107462,
+      "learning_rate": 0.003,
+      "loss": 4.0457,
+      "step": 10895
+    },
+    {
+      "epoch": 0.10896,
+      "grad_norm": 1.1702513574970357,
+      "learning_rate": 0.003,
+      "loss": 4.0294,
+      "step": 10896
+    },
+    {
+      "epoch": 0.10897,
+      "grad_norm": 1.2355323699421268,
+      "learning_rate": 0.003,
+      "loss": 4.0123,
+      "step": 10897
+    },
+    {
+      "epoch": 0.10898,
+      "grad_norm": 1.0319769089185715,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 10898
+    },
+    {
+      "epoch": 0.10899,
+      "grad_norm": 1.1782040466486867,
+      "learning_rate": 0.003,
+      "loss": 4.0194,
+      "step": 10899
+    },
+    {
+      "epoch": 0.109,
+      "grad_norm": 1.0467952536854728,
+      "learning_rate": 0.003,
+      "loss": 4.0444,
+      "step": 10900
+    },
+    {
+      "epoch": 0.10901,
+      "grad_norm": 1.230063746087194,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 10901
+    },
+    {
+      "epoch": 0.10902,
+      "grad_norm": 1.256377588857215,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 10902
+    },
+    {
+      "epoch": 0.10903,
+      "grad_norm": 1.1730572753174304,
+      "learning_rate": 0.003,
+      "loss": 4.0385,
+      "step": 10903
+    },
+    {
+      "epoch": 0.10904,
+      "grad_norm": 1.1307723050871854,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 10904
+    },
+    {
+      "epoch": 0.10905,
+      "grad_norm": 1.1510216931005048,
+      "learning_rate": 0.003,
+      "loss": 4.006,
+      "step": 10905
+    },
+    {
+      "epoch": 0.10906,
+      "grad_norm": 1.0156123289857955,
+      "learning_rate": 0.003,
+      "loss": 4.0413,
+      "step": 10906
+    },
+    {
+      "epoch": 0.10907,
+      "grad_norm": 1.298179684529916,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 10907
+    },
+    {
+      "epoch": 0.10908,
+      "grad_norm": 1.017892580324516,
+      "learning_rate": 0.003,
+      "loss": 4.026,
+      "step": 10908
+    },
+    {
+      "epoch": 0.10909,
+      "grad_norm": 1.4152613686138085,
+      "learning_rate": 0.003,
+      "loss": 4.0223,
+      "step": 10909
+    },
+    {
+      "epoch": 0.1091,
+      "grad_norm": 0.9104381971108158,
+      "learning_rate": 0.003,
+      "loss": 4.0232,
+      "step": 10910
+    },
+    {
+      "epoch": 0.10911,
+      "grad_norm": 1.1706988285504234,
+      "learning_rate": 0.003,
+      "loss": 4.0319,
+      "step": 10911
+    },
+    {
+      "epoch": 0.10912,
+      "grad_norm": 1.1246788359272881,
+      "learning_rate": 0.003,
+      "loss": 4.0257,
+      "step": 10912
+    },
+    {
+      "epoch": 0.10913,
+      "grad_norm": 1.2615350196980175,
+      "learning_rate": 0.003,
+      "loss": 4.0467,
+      "step": 10913
+    },
+    {
+      "epoch": 0.10914,
+      "grad_norm": 1.0529171886939046,
+      "learning_rate": 0.003,
+      "loss": 4.0191,
+      "step": 10914
+    },
+    {
+      "epoch": 0.10915,
+      "grad_norm": 1.181984560865992,
+      "learning_rate": 0.003,
+      "loss": 4.0212,
+      "step": 10915
+    },
+    {
+      "epoch": 0.10916,
+      "grad_norm": 0.9398122380586809,
+      "learning_rate": 0.003,
+      "loss": 4.0149,
+      "step": 10916
+    },
+    {
+      "epoch": 0.10917,
+      "grad_norm": 1.3986893076006122,
+      "learning_rate": 0.003,
+      "loss": 4.0419,
+      "step": 10917
+    },
+    {
+      "epoch": 0.10918,
+      "grad_norm": 1.1386917109617494,
+      "learning_rate": 0.003,
+      "loss": 4.0365,
+      "step": 10918
+    },
+    {
+      "epoch": 0.10919,
+      "grad_norm": 1.388526998048085,
+      "learning_rate": 0.003,
+      "loss": 4.0186,
+      "step": 10919
+    },
+    {
+      "epoch": 0.1092,
+      "grad_norm": 0.9553367529462157,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 10920
+    },
+    {
+      "epoch": 0.10921,
+      "grad_norm": 1.1249097029410848,
+      "learning_rate": 0.003,
+      "loss": 4.04,
+      "step": 10921
+    },
+    {
+      "epoch": 0.10922,
+      "grad_norm": 1.1588772915067118,
+      "learning_rate": 0.003,
+      "loss": 4.036,
+      "step": 10922
+    },
+    {
+      "epoch": 0.10923,
+      "grad_norm": 1.1594370172230615,
+      "learning_rate": 0.003,
+      "loss": 3.9942,
+      "step": 10923
+    },
+    {
+      "epoch": 0.10924,
+      "grad_norm": 1.1587584233463495,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 10924
+    },
+    {
+      "epoch": 0.10925,
+      "grad_norm": 1.199402284930254,
+      "learning_rate": 0.003,
+      "loss": 4.039,
+      "step": 10925
+    },
+    {
+      "epoch": 0.10926,
+      "grad_norm": 1.3780799925530476,
+      "learning_rate": 0.003,
+      "loss": 3.998,
+      "step": 10926
+    },
+    {
+      "epoch": 0.10927,
+      "grad_norm": 0.9547337146677488,
+      "learning_rate": 0.003,
+      "loss": 4.0326,
+      "step": 10927
+    },
+    {
+      "epoch": 0.10928,
+      "grad_norm": 1.2312732084033924,
+      "learning_rate": 0.003,
+      "loss": 4.0209,
+      "step": 10928
+    },
+    {
+      "epoch": 0.10929,
+      "grad_norm": 1.124638240875568,
+      "learning_rate": 0.003,
+      "loss": 4.0077,
+      "step": 10929
+    },
+    {
+      "epoch": 0.1093,
+      "grad_norm": 1.0996981207669076,
+      "learning_rate": 0.003,
+      "loss": 4.0395,
+      "step": 10930
+    },
+    {
+      "epoch": 0.10931,
+      "grad_norm": 0.9997241567513053,
+      "learning_rate": 0.003,
+      "loss": 4.037,
+      "step": 10931
+    },
+    {
+      "epoch": 0.10932,
+      "grad_norm": 1.2572811468968264,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 10932
+    },
+    {
+      "epoch": 0.10933,
+      "grad_norm": 1.235322426229629,
+      "learning_rate": 0.003,
+      "loss": 4.0441,
+      "step": 10933
+    },
+    {
+      "epoch": 0.10934,
+      "grad_norm": 1.203499600140372,
+      "learning_rate": 0.003,
+      "loss": 4.0677,
+      "step": 10934
+    },
+    {
+      "epoch": 0.10935,
+      "grad_norm": 1.0560801016003458,
+      "learning_rate": 0.003,
+      "loss": 4.0392,
+      "step": 10935
+    },
+    {
+      "epoch": 0.10936,
+      "grad_norm": 1.4290113078300473,
+      "learning_rate": 0.003,
+      "loss": 4.0068,
+      "step": 10936
+    },
+    {
+      "epoch": 0.10937,
+      "grad_norm": 0.8838300191155496,
+      "learning_rate": 0.003,
+      "loss": 4.0463,
+      "step": 10937
+    },
+    {
+      "epoch": 0.10938,
+      "grad_norm": 1.0970734661026584,
+      "learning_rate": 0.003,
+      "loss": 4.042,
+      "step": 10938
+    },
+    {
+      "epoch": 0.10939,
+      "grad_norm": 1.1495776401685247,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 10939
+    },
+    {
+      "epoch": 0.1094,
+      "grad_norm": 1.2065630154932683,
+      "learning_rate": 0.003,
+      "loss": 4.0565,
+      "step": 10940
+    },
+    {
+      "epoch": 0.10941,
+      "grad_norm": 1.2374225552061804,
+      "learning_rate": 0.003,
+      "loss": 4.0493,
+      "step": 10941
+    },
+    {
+      "epoch": 0.10942,
+      "grad_norm": 1.2524313252271473,
+      "learning_rate": 0.003,
+      "loss": 4.0406,
+      "step": 10942
+    },
+    {
+      "epoch": 0.10943,
+      "grad_norm": 1.0258331148557027,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 10943
+    },
+    {
+      "epoch": 0.10944,
+      "grad_norm": 1.1367149424101854,
+      "learning_rate": 0.003,
+      "loss": 4.0311,
+      "step": 10944
+    },
+    {
+      "epoch": 0.10945,
+      "grad_norm": 1.264314678398142,
+      "learning_rate": 0.003,
+      "loss": 4.0326,
+      "step": 10945
+    },
+    {
+      "epoch": 0.10946,
+      "grad_norm": 1.1470856240128633,
+      "learning_rate": 0.003,
+      "loss": 4.0397,
+      "step": 10946
+    },
+    {
+      "epoch": 0.10947,
+      "grad_norm": 1.343562409477934,
+      "learning_rate": 0.003,
+      "loss": 4.0204,
+      "step": 10947
+    },
+    {
+      "epoch": 0.10948,
+      "grad_norm": 1.1340548072968755,
+      "learning_rate": 0.003,
+      "loss": 4.0134,
+      "step": 10948
+    },
+    {
+      "epoch": 0.10949,
+      "grad_norm": 1.1527721030646023,
+      "learning_rate": 0.003,
+      "loss": 4.0113,
+      "step": 10949
+    },
+    {
+      "epoch": 0.1095,
+      "grad_norm": 1.0245060674231437,
+      "learning_rate": 0.003,
+      "loss": 4.006,
+      "step": 10950
+    },
+    {
+      "epoch": 0.10951,
+      "grad_norm": 1.094903481253029,
+      "learning_rate": 0.003,
+      "loss": 4.0063,
+      "step": 10951
+    },
+    {
+      "epoch": 0.10952,
+      "grad_norm": 1.138549421081459,
+      "learning_rate": 0.003,
+      "loss": 4.0299,
+      "step": 10952
+    },
+    {
+      "epoch": 0.10953,
+      "grad_norm": 1.1286975653189033,
+      "learning_rate": 0.003,
+      "loss": 4.0276,
+      "step": 10953
+    },
+    {
+      "epoch": 0.10954,
+      "grad_norm": 1.1602190952130804,
+      "learning_rate": 0.003,
+      "loss": 4.0556,
+      "step": 10954
+    },
+    {
+      "epoch": 0.10955,
+      "grad_norm": 1.2130192000896474,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 10955
+    },
+    {
+      "epoch": 0.10956,
+      "grad_norm": 1.2258014219563267,
+      "learning_rate": 0.003,
+      "loss": 4.0443,
+      "step": 10956
+    },
+    {
+      "epoch": 0.10957,
+      "grad_norm": 1.0776761162714221,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 10957
+    },
+    {
+      "epoch": 0.10958,
+      "grad_norm": 1.3034903704468932,
+      "learning_rate": 0.003,
+      "loss": 4.0273,
+      "step": 10958
+    },
+    {
+      "epoch": 0.10959,
+      "grad_norm": 1.0464894790504649,
+      "learning_rate": 0.003,
+      "loss": 4.034,
+      "step": 10959
+    },
+    {
+      "epoch": 0.1096,
+      "grad_norm": 1.14488623119123,
+      "learning_rate": 0.003,
+      "loss": 4.0022,
+      "step": 10960
+    },
+    {
+      "epoch": 0.10961,
+      "grad_norm": 1.115487082003804,
+      "learning_rate": 0.003,
+      "loss": 4.016,
+      "step": 10961
+    },
+    {
+      "epoch": 0.10962,
+      "grad_norm": 1.2309886332824755,
+      "learning_rate": 0.003,
+      "loss": 4.0491,
+      "step": 10962
+    },
+    {
+      "epoch": 0.10963,
+      "grad_norm": 1.1406460952529542,
+      "learning_rate": 0.003,
+      "loss": 4.0386,
+      "step": 10963
+    },
+    {
+      "epoch": 0.10964,
+      "grad_norm": 1.2092082481970878,
+      "learning_rate": 0.003,
+      "loss": 4.0423,
+      "step": 10964
+    },
+    {
+      "epoch": 0.10965,
+      "grad_norm": 1.1089678629170563,
+      "learning_rate": 0.003,
+      "loss": 3.9934,
+      "step": 10965
+    },
+    {
+      "epoch": 0.10966,
+      "grad_norm": 1.453982552663511,
+      "learning_rate": 0.003,
+      "loss": 4.0301,
+      "step": 10966
+    },
+    {
+      "epoch": 0.10967,
+      "grad_norm": 1.0132525838939015,
+      "learning_rate": 0.003,
+      "loss": 4.0417,
+      "step": 10967
+    },
+    {
+      "epoch": 0.10968,
+      "grad_norm": 1.2921121470526886,
+      "learning_rate": 0.003,
+      "loss": 4.0568,
+      "step": 10968
+    },
+    {
+      "epoch": 0.10969,
+      "grad_norm": 1.0273381027526496,
+      "learning_rate": 0.003,
+      "loss": 4.0512,
+      "step": 10969
+    },
+    {
+      "epoch": 0.1097,
+      "grad_norm": 1.402973935592333,
+      "learning_rate": 0.003,
+      "loss": 4.0401,
+      "step": 10970
+    },
+    {
+      "epoch": 0.10971,
+      "grad_norm": 1.1233713113959605,
+      "learning_rate": 0.003,
+      "loss": 4.0514,
+      "step": 10971
+    },
+    {
+      "epoch": 0.10972,
+      "grad_norm": 1.1972507884324581,
+      "learning_rate": 0.003,
+      "loss": 4.0761,
+      "step": 10972
+    },
+    {
+      "epoch": 0.10973,
+      "grad_norm": 1.0689380384676346,
+      "learning_rate": 0.003,
+      "loss": 4.031,
+      "step": 10973
+    },
+    {
+      "epoch": 0.10974,
+      "grad_norm": 1.2935838341305652,
+      "learning_rate": 0.003,
+      "loss": 4.0246,
+      "step": 10974
+    },
+    {
+      "epoch": 0.10975,
+      "grad_norm": 1.2148835363852442,
+      "learning_rate": 0.003,
+      "loss": 4.0447,
+      "step": 10975
+    },
+    {
+      "epoch": 0.10976,
+      "grad_norm": 1.1196265735397568,
+      "learning_rate": 0.003,
+      "loss": 4.0218,
+      "step": 10976
+    },
+    {
+      "epoch": 0.10977,
+      "grad_norm": 1.0766132416152376,
+      "learning_rate": 0.003,
+      "loss": 4.0344,
+      "step": 10977
+    },
+    {
+      "epoch": 0.10978,
+      "grad_norm": 1.262804962527681,
+      "learning_rate": 0.003,
+      "loss": 4.0487,
+      "step": 10978
+    },
+    {
+      "epoch": 0.10979,
+      "grad_norm": 0.9073294804824369,
+      "learning_rate": 0.003,
+      "loss": 4.0246,
+      "step": 10979
+    },
+    {
+      "epoch": 0.1098,
+      "grad_norm": 1.2605259139634963,
+      "learning_rate": 0.003,
+      "loss": 4.0252,
+      "step": 10980
+    },
+    {
+      "epoch": 0.10981,
+      "grad_norm": 0.9680594140582308,
+      "learning_rate": 0.003,
+      "loss": 4.0389,
+      "step": 10981
+    },
+    {
+      "epoch": 0.10982,
+      "grad_norm": 1.164726097839411,
+      "learning_rate": 0.003,
+      "loss": 4.004,
+      "step": 10982
+    },
+    {
+      "epoch": 0.10983,
+      "grad_norm": 1.078100686604328,
+      "learning_rate": 0.003,
+      "loss": 4.0092,
+      "step": 10983
+    },
+    {
+      "epoch": 0.10984,
+      "grad_norm": 1.27951824157926,
+      "learning_rate": 0.003,
+      "loss": 4.0522,
+      "step": 10984
+    },
+    {
+      "epoch": 0.10985,
+      "grad_norm": 1.2483088905929303,
+      "learning_rate": 0.003,
+      "loss": 4.0354,
+      "step": 10985
+    },
+    {
+      "epoch": 0.10986,
+      "grad_norm": 1.0764913660255253,
+      "learning_rate": 0.003,
+      "loss": 4.0315,
+      "step": 10986
+    },
+    {
+      "epoch": 0.10987,
+      "grad_norm": 1.2325305120406893,
+      "learning_rate": 0.003,
+      "loss": 4.048,
+      "step": 10987
+    },
+    {
+      "epoch": 0.10988,
+      "grad_norm": 1.1074761108120266,
+      "learning_rate": 0.003,
+      "loss": 4.04,
+      "step": 10988
+    },
+    {
+      "epoch": 0.10989,
+      "grad_norm": 1.1438099942455424,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 10989
+    },
+    {
+      "epoch": 0.1099,
+      "grad_norm": 0.9531903580271849,
+      "learning_rate": 0.003,
+      "loss": 4.0264,
+      "step": 10990
+    },
+    {
+      "epoch": 0.10991,
+      "grad_norm": 1.1261491344660406,
+      "learning_rate": 0.003,
+      "loss": 4.0342,
+      "step": 10991
+    },
+    {
+      "epoch": 0.10992,
+      "grad_norm": 1.2458886185212843,
+      "learning_rate": 0.003,
+      "loss": 4.0282,
+      "step": 10992
+    },
+    {
+      "epoch": 0.10993,
+      "grad_norm": 0.9909464766748146,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 10993
+    },
+    {
+      "epoch": 0.10994,
+      "grad_norm": 1.2594072221547077,
+      "learning_rate": 0.003,
+      "loss": 4.0682,
+      "step": 10994
+    },
+    {
+      "epoch": 0.10995,
+      "grad_norm": 1.0286576604311943,
+      "learning_rate": 0.003,
+      "loss": 4.0381,
+      "step": 10995
+    },
+    {
+      "epoch": 0.10996,
+      "grad_norm": 1.0879885186981424,
+      "learning_rate": 0.003,
+      "loss": 4.0286,
+      "step": 10996
+    },
+    {
+      "epoch": 0.10997,
+      "grad_norm": 1.2251558646601735,
+      "learning_rate": 0.003,
+      "loss": 4.0616,
+      "step": 10997
+    },
+    {
+      "epoch": 0.10998,
+      "grad_norm": 1.520992898306493,
+      "learning_rate": 0.003,
+      "loss": 4.0723,
+      "step": 10998
+    },
+    {
+      "epoch": 0.10999,
+      "grad_norm": 0.9974930439447344,
+      "learning_rate": 0.003,
+      "loss": 4.0279,
+      "step": 10999
+    },
+    {
+      "epoch": 0.11,
+      "grad_norm": 1.3984769150579184,
+      "learning_rate": 0.003,
+      "loss": 4.074,
+      "step": 11000
+    },
+    {
+      "epoch": 0.11001,
+      "grad_norm": 0.9811644529227863,
+      "learning_rate": 0.003,
+      "loss": 4.0576,
+      "step": 11001
+    },
+    {
+      "epoch": 0.11002,
+      "grad_norm": 1.1450918449781147,
+      "learning_rate": 0.003,
+      "loss": 4.0454,
+      "step": 11002
+    },
+    {
+      "epoch": 0.11003,
+      "grad_norm": 1.0480431049370729,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 11003
+    },
+    {
+      "epoch": 0.11004,
+      "grad_norm": 1.4695224754258749,
+      "learning_rate": 0.003,
+      "loss": 4.0444,
+      "step": 11004
+    },
+    {
+      "epoch": 0.11005,
+      "grad_norm": 1.0012488506178907,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 11005
+    },
+    {
+      "epoch": 0.11006,
+      "grad_norm": 1.1097531270381813,
+      "learning_rate": 0.003,
+      "loss": 4.044,
+      "step": 11006
+    },
+    {
+      "epoch": 0.11007,
+      "grad_norm": 1.3457776983843623,
+      "learning_rate": 0.003,
+      "loss": 4.0413,
+      "step": 11007
+    },
+    {
+      "epoch": 0.11008,
+      "grad_norm": 0.9945908240179561,
+      "learning_rate": 0.003,
+      "loss": 4.0435,
+      "step": 11008
+    },
+    {
+      "epoch": 0.11009,
+      "grad_norm": 1.146031306723099,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 11009
+    },
+    {
+      "epoch": 0.1101,
+      "grad_norm": 1.3041512694151578,
+      "learning_rate": 0.003,
+      "loss": 4.0021,
+      "step": 11010
+    },
+    {
+      "epoch": 0.11011,
+      "grad_norm": 1.0561894869979496,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 11011
+    },
+    {
+      "epoch": 0.11012,
+      "grad_norm": 1.2575408616044952,
+      "learning_rate": 0.003,
+      "loss": 4.0259,
+      "step": 11012
+    },
+    {
+      "epoch": 0.11013,
+      "grad_norm": 0.9634053404348774,
+      "learning_rate": 0.003,
+      "loss": 4.0391,
+      "step": 11013
+    },
+    {
+      "epoch": 0.11014,
+      "grad_norm": 1.1455573340874126,
+      "learning_rate": 0.003,
+      "loss": 4.0404,
+      "step": 11014
+    },
+    {
+      "epoch": 0.11015,
+      "grad_norm": 1.0979373194946567,
+      "learning_rate": 0.003,
+      "loss": 4.0164,
+      "step": 11015
+    },
+    {
+      "epoch": 0.11016,
+      "grad_norm": 1.0909505512602309,
+      "learning_rate": 0.003,
+      "loss": 4.0354,
+      "step": 11016
+    },
+    {
+      "epoch": 0.11017,
+      "grad_norm": 1.070802229909584,
+      "learning_rate": 0.003,
+      "loss": 4.0146,
+      "step": 11017
+    },
+    {
+      "epoch": 0.11018,
+      "grad_norm": 1.0655397569106997,
+      "learning_rate": 0.003,
+      "loss": 4.0492,
+      "step": 11018
+    },
+    {
+      "epoch": 0.11019,
+      "grad_norm": 1.2632535966366136,
+      "learning_rate": 0.003,
+      "loss": 4.0332,
+      "step": 11019
+    },
+    {
+      "epoch": 0.1102,
+      "grad_norm": 0.958588750896633,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 11020
+    },
+    {
+      "epoch": 0.11021,
+      "grad_norm": 1.1602559223950129,
+      "learning_rate": 0.003,
+      "loss": 4.0204,
+      "step": 11021
+    },
+    {
+      "epoch": 0.11022,
+      "grad_norm": 0.9786123700993407,
+      "learning_rate": 0.003,
+      "loss": 4.0461,
+      "step": 11022
+    },
+    {
+      "epoch": 0.11023,
+      "grad_norm": 1.0897353532712468,
+      "learning_rate": 0.003,
+      "loss": 4.0503,
+      "step": 11023
+    },
+    {
+      "epoch": 0.11024,
+      "grad_norm": 1.3783770276098104,
+      "learning_rate": 0.003,
+      "loss": 4.0579,
+      "step": 11024
+    },
+    {
+      "epoch": 0.11025,
+      "grad_norm": 0.9920237745810896,
+      "learning_rate": 0.003,
+      "loss": 4.0322,
+      "step": 11025
+    },
+    {
+      "epoch": 0.11026,
+      "grad_norm": 1.2990419941936762,
+      "learning_rate": 0.003,
+      "loss": 4.0331,
+      "step": 11026
+    },
+    {
+      "epoch": 0.11027,
+      "grad_norm": 1.1054453581494064,
+      "learning_rate": 0.003,
+      "loss": 4.0198,
+      "step": 11027
+    },
+    {
+      "epoch": 0.11028,
+      "grad_norm": 1.1552917683687645,
+      "learning_rate": 0.003,
+      "loss": 4.0267,
+      "step": 11028
+    },
+    {
+      "epoch": 0.11029,
+      "grad_norm": 1.1868727922087963,
+      "learning_rate": 0.003,
+      "loss": 4.0186,
+      "step": 11029
+    },
+    {
+      "epoch": 0.1103,
+      "grad_norm": 0.9825451090946785,
+      "learning_rate": 0.003,
+      "loss": 4.0305,
+      "step": 11030
+    },
+    {
+      "epoch": 0.11031,
+      "grad_norm": 1.1804760582921996,
+      "learning_rate": 0.003,
+      "loss": 4.0347,
+      "step": 11031
+    },
+    {
+      "epoch": 0.11032,
+      "grad_norm": 1.0624183712164799,
+      "learning_rate": 0.003,
+      "loss": 4.0435,
+      "step": 11032
+    },
+    {
+      "epoch": 0.11033,
+      "grad_norm": 1.279437929756929,
+      "learning_rate": 0.003,
+      "loss": 4.0351,
+      "step": 11033
+    },
+    {
+      "epoch": 0.11034,
+      "grad_norm": 0.9050557877634389,
+      "learning_rate": 0.003,
+      "loss": 3.9997,
+      "step": 11034
+    },
+    {
+      "epoch": 0.11035,
+      "grad_norm": 1.0758467424852527,
+      "learning_rate": 0.003,
+      "loss": 4.0384,
+      "step": 11035
+    },
+    {
+      "epoch": 0.11036,
+      "grad_norm": 1.4004957620496878,
+      "learning_rate": 0.003,
+      "loss": 4.0541,
+      "step": 11036
+    },
+    {
+      "epoch": 0.11037,
+      "grad_norm": 1.148007136705928,
+      "learning_rate": 0.003,
+      "loss": 4.0007,
+      "step": 11037
+    },
+    {
+      "epoch": 0.11038,
+      "grad_norm": 1.1327536508610503,
+      "learning_rate": 0.003,
+      "loss": 4.0224,
+      "step": 11038
+    },
+    {
+      "epoch": 0.11039,
+      "grad_norm": 1.1848546858736662,
+      "learning_rate": 0.003,
+      "loss": 4.0284,
+      "step": 11039
+    },
+    {
+      "epoch": 0.1104,
+      "grad_norm": 1.323480908182648,
+      "learning_rate": 0.003,
+      "loss": 4.0653,
+      "step": 11040
+    },
+    {
+      "epoch": 0.11041,
+      "grad_norm": 0.8971359841886503,
+      "learning_rate": 0.003,
+      "loss": 4.0396,
+      "step": 11041
+    },
+    {
+      "epoch": 0.11042,
+      "grad_norm": 1.1769094334599115,
+      "learning_rate": 0.003,
+      "loss": 4.0507,
+      "step": 11042
+    },
+    {
+      "epoch": 0.11043,
+      "grad_norm": 1.1666462489026705,
+      "learning_rate": 0.003,
+      "loss": 4.0379,
+      "step": 11043
+    },
+    {
+      "epoch": 0.11044,
+      "grad_norm": 1.2253457217850752,
+      "learning_rate": 0.003,
+      "loss": 4.0318,
+      "step": 11044
+    },
+    {
+      "epoch": 0.11045,
+      "grad_norm": 1.1206251489359325,
+      "learning_rate": 0.003,
+      "loss": 4.0186,
+      "step": 11045
+    },
+    {
+      "epoch": 0.11046,
+      "grad_norm": 1.3392782992158903,
+      "learning_rate": 0.003,
+      "loss": 4.019,
+      "step": 11046
+    },
+    {
+      "epoch": 0.11047,
+      "grad_norm": 1.0113482407477978,
+      "learning_rate": 0.003,
+      "loss": 4.0081,
+      "step": 11047
+    },
+    {
+      "epoch": 0.11048,
+      "grad_norm": 1.2434741216349872,
+      "learning_rate": 0.003,
+      "loss": 4.0316,
+      "step": 11048
+    },
+    {
+      "epoch": 0.11049,
+      "grad_norm": 1.0690557967832832,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 11049
+    },
+    {
+      "epoch": 0.1105,
+      "grad_norm": 1.2906453831596687,
+      "learning_rate": 0.003,
+      "loss": 4.0272,
+      "step": 11050
+    },
+    {
+      "epoch": 0.11051,
+      "grad_norm": 1.049645251796059,
+      "learning_rate": 0.003,
+      "loss": 4.0399,
+      "step": 11051
+    },
+    {
+      "epoch": 0.11052,
+      "grad_norm": 1.1022370813626103,
+      "learning_rate": 0.003,
+      "loss": 4.0388,
+      "step": 11052
+    },
+    {
+      "epoch": 0.11053,
+      "grad_norm": 1.404602485307846,
+      "learning_rate": 0.003,
+      "loss": 4.0364,
+      "step": 11053
+    },
+    {
+      "epoch": 0.11054,
+      "grad_norm": 0.9778708210072532,
+      "learning_rate": 0.003,
+      "loss": 4.0244,
+      "step": 11054
+    },
+    {
+      "epoch": 0.11055,
+      "grad_norm": 1.0849146668394676,
+      "learning_rate": 0.003,
+      "loss": 4.0307,
+      "step": 11055
+    },
+    {
+      "epoch": 0.11056,
+      "grad_norm": 1.3525665780096308,
+      "learning_rate": 0.003,
+      "loss": 4.0384,
+      "step": 11056
+    },
+    {
+      "epoch": 0.11057,
+      "grad_norm": 0.8663546618979914,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 11057
+    },
+    {
+      "epoch": 0.11058,
+      "grad_norm": 0.9349712450004345,
+      "learning_rate": 0.003,
+      "loss": 4.0462,
+      "step": 11058
+    },
+    {
+      "epoch": 0.11059,
+      "grad_norm": 1.2419152885907019,
+      "learning_rate": 0.003,
+      "loss": 4.0367,
+      "step": 11059
+    },
+    {
+      "epoch": 0.1106,
+      "grad_norm": 1.2015131996587158,
+      "learning_rate": 0.003,
+      "loss": 4.0378,
+      "step": 11060
+    },
+    {
+      "epoch": 0.11061,
+      "grad_norm": 1.1726168292055932,
+      "learning_rate": 0.003,
+      "loss": 4.0486,
+      "step": 11061
+    },
+    {
+      "epoch": 0.11062,
+      "grad_norm": 1.235236596847503,
+      "learning_rate": 0.003,
+      "loss": 4.0432,
+      "step": 11062
+    },
+    {
+      "epoch": 0.11063,
+      "grad_norm": 1.1430465957610831,
+      "learning_rate": 0.003,
+      "loss": 4.0116,
+      "step": 11063
+    },
+    {
+      "epoch": 0.11064,
+      "grad_norm": 1.1694916513185551,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 11064
+    },
+    {
+      "epoch": 0.11065,
+      "grad_norm": 1.1843858564427805,
+      "learning_rate": 0.003,
+      "loss": 4.0144,
+      "step": 11065
+    },
+    {
+      "epoch": 0.11066,
+      "grad_norm": 1.1731005960048595,
+      "learning_rate": 0.003,
+      "loss": 4.0447,
+      "step": 11066
+    },
+    {
+      "epoch": 0.11067,
+      "grad_norm": 1.3610597815554937,
+      "learning_rate": 0.003,
+      "loss": 4.0507,
+      "step": 11067
+    },
+    {
+      "epoch": 0.11068,
+      "grad_norm": 0.9388571621226179,
+      "learning_rate": 0.003,
+      "loss": 4.0154,
+      "step": 11068
+    },
+    {
+      "epoch": 0.11069,
+      "grad_norm": 1.1845720962926707,
+      "learning_rate": 0.003,
+      "loss": 4.0048,
+      "step": 11069
+    },
+    {
+      "epoch": 0.1107,
+      "grad_norm": 1.0184914945024592,
+      "learning_rate": 0.003,
+      "loss": 4.0304,
+      "step": 11070
+    },
+    {
+      "epoch": 0.11071,
+      "grad_norm": 1.1695843280280722,
+      "learning_rate": 0.003,
+      "loss": 4.0337,
+      "step": 11071
+    },
+    {
+      "epoch": 0.11072,
+      "grad_norm": 1.2124753448260535,
+      "learning_rate": 0.003,
+      "loss": 4.0134,
+      "step": 11072
+    },
+    {
+      "epoch": 0.11073,
+      "grad_norm": 1.1600313038561658,
+      "learning_rate": 0.003,
+      "loss": 4.0583,
+      "step": 11073
+    },
+    {
+      "epoch": 0.11074,
+      "grad_norm": 1.2553421574935741,
+      "learning_rate": 0.003,
+      "loss": 4.024,
+      "step": 11074
+    },
+    {
+      "epoch": 0.11075,
+      "grad_norm": 1.1166830328732873,
+      "learning_rate": 0.003,
+      "loss": 4.0495,
+      "step": 11075
+    },
+    {
+      "epoch": 0.11076,
+      "grad_norm": 1.362951644650086,
+      "learning_rate": 0.003,
+      "loss": 4.0309,
+      "step": 11076
+    },
+    {
+      "epoch": 0.11077,
+      "grad_norm": 1.0206627313768633,
+      "learning_rate": 0.003,
+      "loss": 4.0744,
+      "step": 11077
+    },
+    {
+      "epoch": 0.11078,
+      "grad_norm": 1.137921297110709,
+      "learning_rate": 0.003,
+      "loss": 4.0399,
+      "step": 11078
+    },
+    {
+      "epoch": 0.11079,
+      "grad_norm": 1.0636974748852701,
+      "learning_rate": 0.003,
+      "loss": 4.0251,
+      "step": 11079
+    },
+    {
+      "epoch": 0.1108,
+      "grad_norm": 1.243205584369264,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 11080
+    },
+    {
+      "epoch": 0.11081,
+      "grad_norm": 1.1466850373196094,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 11081
+    },
+    {
+      "epoch": 0.11082,
+      "grad_norm": 1.0332780369799524,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 11082
+    },
+    {
+      "epoch": 0.11083,
+      "grad_norm": 1.1547474519658834,
+      "learning_rate": 0.003,
+      "loss": 4.0214,
+      "step": 11083
+    },
+    {
+      "epoch": 0.11084,
+      "grad_norm": 1.2630821564699577,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 11084
+    },
+    {
+      "epoch": 0.11085,
+      "grad_norm": 0.9616550782921469,
+      "learning_rate": 0.003,
+      "loss": 4.0246,
+      "step": 11085
+    },
+    {
+      "epoch": 0.11086,
+      "grad_norm": 1.0337612711666608,
+      "learning_rate": 0.003,
+      "loss": 4.0506,
+      "step": 11086
+    },
+    {
+      "epoch": 0.11087,
+      "grad_norm": 1.246325517565779,
+      "learning_rate": 0.003,
+      "loss": 4.0365,
+      "step": 11087
+    },
+    {
+      "epoch": 0.11088,
+      "grad_norm": 1.1210534817234898,
+      "learning_rate": 0.003,
+      "loss": 4.0234,
+      "step": 11088
+    },
+    {
+      "epoch": 0.11089,
+      "grad_norm": 1.1210922280085747,
+      "learning_rate": 0.003,
+      "loss": 4.025,
+      "step": 11089
+    },
+    {
+      "epoch": 0.1109,
+      "grad_norm": 1.2076356656569776,
+      "learning_rate": 0.003,
+      "loss": 4.0113,
+      "step": 11090
+    },
+    {
+      "epoch": 0.11091,
+      "grad_norm": 1.0505164908267228,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 11091
+    },
+    {
+      "epoch": 0.11092,
+      "grad_norm": 1.116407928167723,
+      "learning_rate": 0.003,
+      "loss": 4.021,
+      "step": 11092
+    },
+    {
+      "epoch": 0.11093,
+      "grad_norm": 1.3679759693794233,
+      "learning_rate": 0.003,
+      "loss": 4.0357,
+      "step": 11093
+    },
+    {
+      "epoch": 0.11094,
+      "grad_norm": 0.9149580813928061,
+      "learning_rate": 0.003,
+      "loss": 4.0234,
+      "step": 11094
+    },
+    {
+      "epoch": 0.11095,
+      "grad_norm": 1.2127377272290825,
+      "learning_rate": 0.003,
+      "loss": 4.0474,
+      "step": 11095
+    },
+    {
+      "epoch": 0.11096,
+      "grad_norm": 1.2654770177418224,
+      "learning_rate": 0.003,
+      "loss": 4.0424,
+      "step": 11096
+    },
+    {
+      "epoch": 0.11097,
+      "grad_norm": 1.005101859952365,
+      "learning_rate": 0.003,
+      "loss": 4.0414,
+      "step": 11097
+    },
+    {
+      "epoch": 0.11098,
+      "grad_norm": 1.1559183971161302,
+      "learning_rate": 0.003,
+      "loss": 4.0228,
+      "step": 11098
+    },
+    {
+      "epoch": 0.11099,
+      "grad_norm": 1.2125995577106417,
+      "learning_rate": 0.003,
+      "loss": 4.0545,
+      "step": 11099
+    },
+    {
+      "epoch": 0.111,
+      "grad_norm": 1.1741835677043042,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 11100
+    },
+    {
+      "epoch": 0.11101,
+      "grad_norm": 1.0431649381898074,
+      "learning_rate": 0.003,
+      "loss": 4.0334,
+      "step": 11101
+    },
+    {
+      "epoch": 0.11102,
+      "grad_norm": 1.1127524956437145,
+      "learning_rate": 0.003,
+      "loss": 4.019,
+      "step": 11102
+    },
+    {
+      "epoch": 0.11103,
+      "grad_norm": 1.4390556394886567,
+      "learning_rate": 0.003,
+      "loss": 4.0296,
+      "step": 11103
+    },
+    {
+      "epoch": 0.11104,
+      "grad_norm": 0.8695810012178421,
+      "learning_rate": 0.003,
+      "loss": 4.0163,
+      "step": 11104
+    },
+    {
+      "epoch": 0.11105,
+      "grad_norm": 1.2898692002393344,
+      "learning_rate": 0.003,
+      "loss": 4.0276,
+      "step": 11105
+    },
+    {
+      "epoch": 0.11106,
+      "grad_norm": 0.9339284088375288,
+      "learning_rate": 0.003,
+      "loss": 4.0107,
+      "step": 11106
+    },
+    {
+      "epoch": 0.11107,
+      "grad_norm": 1.002203567794351,
+      "learning_rate": 0.003,
+      "loss": 4.0134,
+      "step": 11107
+    },
+    {
+      "epoch": 0.11108,
+      "grad_norm": 1.212964255808357,
+      "learning_rate": 0.003,
+      "loss": 4.0438,
+      "step": 11108
+    },
+    {
+      "epoch": 0.11109,
+      "grad_norm": 1.1363148006340882,
+      "learning_rate": 0.003,
+      "loss": 4.0424,
+      "step": 11109
+    },
+    {
+      "epoch": 0.1111,
+      "grad_norm": 1.0654823793341264,
+      "learning_rate": 0.003,
+      "loss": 4.0362,
+      "step": 11110
+    },
+    {
+      "epoch": 0.11111,
+      "grad_norm": 1.0410737532990986,
+      "learning_rate": 0.003,
+      "loss": 4.0323,
+      "step": 11111
+    },
+    {
+      "epoch": 0.11112,
+      "grad_norm": 1.2755289571055364,
+      "learning_rate": 0.003,
+      "loss": 4.057,
+      "step": 11112
+    },
+    {
+      "epoch": 0.11113,
+      "grad_norm": 1.1301440513445171,
+      "learning_rate": 0.003,
+      "loss": 4.0508,
+      "step": 11113
+    },
+    {
+      "epoch": 0.11114,
+      "grad_norm": 1.3210030487586704,
+      "learning_rate": 0.003,
+      "loss": 4.0686,
+      "step": 11114
+    },
+    {
+      "epoch": 0.11115,
+      "grad_norm": 1.0405738153003652,
+      "learning_rate": 0.003,
+      "loss": 4.0427,
+      "step": 11115
+    },
+    {
+      "epoch": 0.11116,
+      "grad_norm": 1.4674668716202037,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 11116
+    },
+    {
+      "epoch": 0.11117,
+      "grad_norm": 1.126768367155695,
+      "learning_rate": 0.003,
+      "loss": 4.0556,
+      "step": 11117
+    },
+    {
+      "epoch": 0.11118,
+      "grad_norm": 1.4499818568572693,
+      "learning_rate": 0.003,
+      "loss": 4.0283,
+      "step": 11118
+    },
+    {
+      "epoch": 0.11119,
+      "grad_norm": 0.8723773760283889,
+      "learning_rate": 0.003,
+      "loss": 4.0457,
+      "step": 11119
+    },
+    {
+      "epoch": 0.1112,
+      "grad_norm": 0.9880766918243952,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 11120
+    },
+    {
+      "epoch": 0.11121,
+      "grad_norm": 1.233724451927589,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 11121
+    },
+    {
+      "epoch": 0.11122,
+      "grad_norm": 0.9592358462304813,
+      "learning_rate": 0.003,
+      "loss": 4.0442,
+      "step": 11122
+    },
+    {
+      "epoch": 0.11123,
+      "grad_norm": 0.9374151178986565,
+      "learning_rate": 0.003,
+      "loss": 4.0452,
+      "step": 11123
+    },
+    {
+      "epoch": 0.11124,
+      "grad_norm": 1.0746579089776649,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 11124
+    },
+    {
+      "epoch": 0.11125,
+      "grad_norm": 1.0272355456391729,
+      "learning_rate": 0.003,
+      "loss": 4.0412,
+      "step": 11125
+    },
+    {
+      "epoch": 0.11126,
+      "grad_norm": 1.4696931867563632,
+      "learning_rate": 0.003,
+      "loss": 4.0333,
+      "step": 11126
+    },
+    {
+      "epoch": 0.11127,
+      "grad_norm": 0.9306122996930648,
+      "learning_rate": 0.003,
+      "loss": 4.0339,
+      "step": 11127
+    },
+    {
+      "epoch": 0.11128,
+      "grad_norm": 1.259426522382338,
+      "learning_rate": 0.003,
+      "loss": 4.0128,
+      "step": 11128
+    },
+    {
+      "epoch": 0.11129,
+      "grad_norm": 0.9952764357013387,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 11129
+    },
+    {
+      "epoch": 0.1113,
+      "grad_norm": 1.2922884263779666,
+      "learning_rate": 0.003,
+      "loss": 4.0043,
+      "step": 11130
+    },
+    {
+      "epoch": 0.11131,
+      "grad_norm": 1.0933870097789917,
+      "learning_rate": 0.003,
+      "loss": 4.0313,
+      "step": 11131
+    },
+    {
+      "epoch": 0.11132,
+      "grad_norm": 1.222132471274821,
+      "learning_rate": 0.003,
+      "loss": 4.0241,
+      "step": 11132
+    },
+    {
+      "epoch": 0.11133,
+      "grad_norm": 1.086913343875556,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 11133
+    },
+    {
+      "epoch": 0.11134,
+      "grad_norm": 1.2038589551456618,
+      "learning_rate": 0.003,
+      "loss": 4.0088,
+      "step": 11134
+    },
+    {
+      "epoch": 0.11135,
+      "grad_norm": 1.1630263270645305,
+      "learning_rate": 0.003,
+      "loss": 4.0329,
+      "step": 11135
+    },
+    {
+      "epoch": 0.11136,
+      "grad_norm": 1.2109273926288262,
+      "learning_rate": 0.003,
+      "loss": 4.0059,
+      "step": 11136
+    },
+    {
+      "epoch": 0.11137,
+      "grad_norm": 1.133767888763714,
+      "learning_rate": 0.003,
+      "loss": 4.0247,
+      "step": 11137
+    },
+    {
+      "epoch": 0.11138,
+      "grad_norm": 1.3304986578383604,
+      "learning_rate": 0.003,
+      "loss": 4.0157,
+      "step": 11138
+    },
+    {
+      "epoch": 0.11139,
+      "grad_norm": 1.0703377791647029,
+      "learning_rate": 0.003,
+      "loss": 4.0476,
+      "step": 11139
+    },
+    {
+      "epoch": 0.1114,
+      "grad_norm": 1.2710614152570363,
+      "learning_rate": 0.003,
+      "loss": 4.0126,
+      "step": 11140
+    },
+    {
+      "epoch": 0.11141,
+      "grad_norm": 1.23803074569592,
+      "learning_rate": 0.003,
+      "loss": 4.0363,
+      "step": 11141
+    },
+    {
+      "epoch": 0.11142,
+      "grad_norm": 1.3481665548815753,
+      "learning_rate": 0.003,
+      "loss": 4.0364,
+      "step": 11142
+    },
+    {
+      "epoch": 0.11143,
+      "grad_norm": 1.07936134160658,
+      "learning_rate": 0.003,
+      "loss": 4.0257,
+      "step": 11143
+    },
+    {
+      "epoch": 0.11144,
+      "grad_norm": 1.2330660101095827,
+      "learning_rate": 0.003,
+      "loss": 4.0021,
+      "step": 11144
+    },
+    {
+      "epoch": 0.11145,
+      "grad_norm": 1.1001097752215603,
+      "learning_rate": 0.003,
+      "loss": 4.0315,
+      "step": 11145
+    },
+    {
+      "epoch": 0.11146,
+      "grad_norm": 1.2439381573445438,
+      "learning_rate": 0.003,
+      "loss": 4.0299,
+      "step": 11146
+    },
+    {
+      "epoch": 0.11147,
+      "grad_norm": 0.99219418207008,
+      "learning_rate": 0.003,
+      "loss": 4.0463,
+      "step": 11147
+    },
+    {
+      "epoch": 0.11148,
+      "grad_norm": 1.2162762932312685,
+      "learning_rate": 0.003,
+      "loss": 4.0332,
+      "step": 11148
+    },
+    {
+      "epoch": 0.11149,
+      "grad_norm": 1.1639544600650762,
+      "learning_rate": 0.003,
+      "loss": 4.0111,
+      "step": 11149
+    },
+    {
+      "epoch": 0.1115,
+      "grad_norm": 1.1006116288127763,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 11150
+    },
+    {
+      "epoch": 0.11151,
+      "grad_norm": 1.2672886873563878,
+      "learning_rate": 0.003,
+      "loss": 4.0486,
+      "step": 11151
+    },
+    {
+      "epoch": 0.11152,
+      "grad_norm": 0.910987023246572,
+      "learning_rate": 0.003,
+      "loss": 4.0111,
+      "step": 11152
+    },
+    {
+      "epoch": 0.11153,
+      "grad_norm": 1.101946526056744,
+      "learning_rate": 0.003,
+      "loss": 4.0404,
+      "step": 11153
+    },
+    {
+      "epoch": 0.11154,
+      "grad_norm": 1.0947773638822873,
+      "learning_rate": 0.003,
+      "loss": 4.0326,
+      "step": 11154
+    },
+    {
+      "epoch": 0.11155,
+      "grad_norm": 1.2702655690371203,
+      "learning_rate": 0.003,
+      "loss": 4.041,
+      "step": 11155
+    },
+    {
+      "epoch": 0.11156,
+      "grad_norm": 1.007909951916138,
+      "learning_rate": 0.003,
+      "loss": 4.0313,
+      "step": 11156
+    },
+    {
+      "epoch": 0.11157,
+      "grad_norm": 1.3319173530862718,
+      "learning_rate": 0.003,
+      "loss": 4.0435,
+      "step": 11157
+    },
+    {
+      "epoch": 0.11158,
+      "grad_norm": 0.9207828461156834,
+      "learning_rate": 0.003,
+      "loss": 4.0364,
+      "step": 11158
+    },
+    {
+      "epoch": 0.11159,
+      "grad_norm": 1.197285438055082,
+      "learning_rate": 0.003,
+      "loss": 4.0325,
+      "step": 11159
+    },
+    {
+      "epoch": 0.1116,
+      "grad_norm": 1.0433813495595061,
+      "learning_rate": 0.003,
+      "loss": 4.0415,
+      "step": 11160
+    },
+    {
+      "epoch": 0.11161,
+      "grad_norm": 1.081328185673201,
+      "learning_rate": 0.003,
+      "loss": 4.0123,
+      "step": 11161
+    },
+    {
+      "epoch": 0.11162,
+      "grad_norm": 1.2926210086878027,
+      "learning_rate": 0.003,
+      "loss": 4.0342,
+      "step": 11162
+    },
+    {
+      "epoch": 0.11163,
+      "grad_norm": 0.9963164319297045,
+      "learning_rate": 0.003,
+      "loss": 4.0229,
+      "step": 11163
+    },
+    {
+      "epoch": 0.11164,
+      "grad_norm": 1.3281076066936632,
+      "learning_rate": 0.003,
+      "loss": 4.0386,
+      "step": 11164
+    },
+    {
+      "epoch": 0.11165,
+      "grad_norm": 0.9999767623058433,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 11165
+    },
+    {
+      "epoch": 0.11166,
+      "grad_norm": 1.3510910761006925,
+      "learning_rate": 0.003,
+      "loss": 4.0402,
+      "step": 11166
+    },
+    {
+      "epoch": 0.11167,
+      "grad_norm": 0.9733408617008773,
+      "learning_rate": 0.003,
+      "loss": 4.0351,
+      "step": 11167
+    },
+    {
+      "epoch": 0.11168,
+      "grad_norm": 1.247793495062047,
+      "learning_rate": 0.003,
+      "loss": 4.026,
+      "step": 11168
+    },
+    {
+      "epoch": 0.11169,
+      "grad_norm": 0.8953094218520867,
+      "learning_rate": 0.003,
+      "loss": 4.0213,
+      "step": 11169
+    },
+    {
+      "epoch": 0.1117,
+      "grad_norm": 1.1780557955764552,
+      "learning_rate": 0.003,
+      "loss": 4.0355,
+      "step": 11170
+    },
+    {
+      "epoch": 0.11171,
+      "grad_norm": 1.1811278342129226,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 11171
+    },
+    {
+      "epoch": 0.11172,
+      "grad_norm": 1.2689331311338854,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 11172
+    },
+    {
+      "epoch": 0.11173,
+      "grad_norm": 1.1683719180902132,
+      "learning_rate": 0.003,
+      "loss": 4.0394,
+      "step": 11173
+    },
+    {
+      "epoch": 0.11174,
+      "grad_norm": 1.0094485713003973,
+      "learning_rate": 0.003,
+      "loss": 4.0383,
+      "step": 11174
+    },
+    {
+      "epoch": 0.11175,
+      "grad_norm": 1.3250881012136548,
+      "learning_rate": 0.003,
+      "loss": 4.0446,
+      "step": 11175
+    },
+    {
+      "epoch": 0.11176,
+      "grad_norm": 1.0043723462137841,
+      "learning_rate": 0.003,
+      "loss": 4.0077,
+      "step": 11176
+    },
+    {
+      "epoch": 0.11177,
+      "grad_norm": 1.1386272400480386,
+      "learning_rate": 0.003,
+      "loss": 4.0564,
+      "step": 11177
+    },
+    {
+      "epoch": 0.11178,
+      "grad_norm": 1.2126593447984106,
+      "learning_rate": 0.003,
+      "loss": 4.049,
+      "step": 11178
+    },
+    {
+      "epoch": 0.11179,
+      "grad_norm": 1.2307687560202298,
+      "learning_rate": 0.003,
+      "loss": 4.0288,
+      "step": 11179
+    },
+    {
+      "epoch": 0.1118,
+      "grad_norm": 1.1602217088708568,
+      "learning_rate": 0.003,
+      "loss": 4.0364,
+      "step": 11180
+    },
+    {
+      "epoch": 0.11181,
+      "grad_norm": 1.057733354940848,
+      "learning_rate": 0.003,
+      "loss": 4.0397,
+      "step": 11181
+    },
+    {
+      "epoch": 0.11182,
+      "grad_norm": 1.1720682053385487,
+      "learning_rate": 0.003,
+      "loss": 4.0297,
+      "step": 11182
+    },
+    {
+      "epoch": 0.11183,
+      "grad_norm": 1.1436907781386656,
+      "learning_rate": 0.003,
+      "loss": 4.0365,
+      "step": 11183
+    },
+    {
+      "epoch": 0.11184,
+      "grad_norm": 1.3870042277723742,
+      "learning_rate": 0.003,
+      "loss": 4.0392,
+      "step": 11184
+    },
+    {
+      "epoch": 0.11185,
+      "grad_norm": 1.0936717631849686,
+      "learning_rate": 0.003,
+      "loss": 4.0334,
+      "step": 11185
+    },
+    {
+      "epoch": 0.11186,
+      "grad_norm": 1.3772654953768122,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 11186
+    },
+    {
+      "epoch": 0.11187,
+      "grad_norm": 1.1790597204228255,
+      "learning_rate": 0.003,
+      "loss": 4.0046,
+      "step": 11187
+    },
+    {
+      "epoch": 0.11188,
+      "grad_norm": 1.1073857604366244,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 11188
+    },
+    {
+      "epoch": 0.11189,
+      "grad_norm": 1.130277787761296,
+      "learning_rate": 0.003,
+      "loss": 4.0596,
+      "step": 11189
+    },
+    {
+      "epoch": 0.1119,
+      "grad_norm": 1.2313249191247733,
+      "learning_rate": 0.003,
+      "loss": 4.0616,
+      "step": 11190
+    },
+    {
+      "epoch": 0.11191,
+      "grad_norm": 1.0068096645265725,
+      "learning_rate": 0.003,
+      "loss": 4.0172,
+      "step": 11191
+    },
+    {
+      "epoch": 0.11192,
+      "grad_norm": 1.4926464646825244,
+      "learning_rate": 0.003,
+      "loss": 4.0641,
+      "step": 11192
+    },
+    {
+      "epoch": 0.11193,
+      "grad_norm": 1.075022433453773,
+      "learning_rate": 0.003,
+      "loss": 4.0321,
+      "step": 11193
+    },
+    {
+      "epoch": 0.11194,
+      "grad_norm": 1.097191178968125,
+      "learning_rate": 0.003,
+      "loss": 4.0594,
+      "step": 11194
+    },
+    {
+      "epoch": 0.11195,
+      "grad_norm": 1.1810849460786332,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 11195
+    },
+    {
+      "epoch": 0.11196,
+      "grad_norm": 1.1503825490398842,
+      "learning_rate": 0.003,
+      "loss": 4.0284,
+      "step": 11196
+    },
+    {
+      "epoch": 0.11197,
+      "grad_norm": 1.0421171730999512,
+      "learning_rate": 0.003,
+      "loss": 4.026,
+      "step": 11197
+    },
+    {
+      "epoch": 0.11198,
+      "grad_norm": 1.0783422809281649,
+      "learning_rate": 0.003,
+      "loss": 4.0245,
+      "step": 11198
+    },
+    {
+      "epoch": 0.11199,
+      "grad_norm": 1.133756805734178,
+      "learning_rate": 0.003,
+      "loss": 4.0286,
+      "step": 11199
+    },
+    {
+      "epoch": 0.112,
+      "grad_norm": 1.132914344900601,
+      "learning_rate": 0.003,
+      "loss": 3.9998,
+      "step": 11200
+    },
+    {
+      "epoch": 0.11201,
+      "grad_norm": 1.3917889105852121,
+      "learning_rate": 0.003,
+      "loss": 4.0287,
+      "step": 11201
+    },
+    {
+      "epoch": 0.11202,
+      "grad_norm": 0.9826420636753783,
+      "learning_rate": 0.003,
+      "loss": 4.0329,
+      "step": 11202
+    },
+    {
+      "epoch": 0.11203,
+      "grad_norm": 1.35378770158763,
+      "learning_rate": 0.003,
+      "loss": 4.0273,
+      "step": 11203
+    },
+    {
+      "epoch": 0.11204,
+      "grad_norm": 1.1631858332700105,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 11204
+    },
+    {
+      "epoch": 0.11205,
+      "grad_norm": 1.2921914277100586,
+      "learning_rate": 0.003,
+      "loss": 4.0291,
+      "step": 11205
+    },
+    {
+      "epoch": 0.11206,
+      "grad_norm": 1.0898547835621788,
+      "learning_rate": 0.003,
+      "loss": 4.0434,
+      "step": 11206
+    },
+    {
+      "epoch": 0.11207,
+      "grad_norm": 1.2652008774282912,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 11207
+    },
+    {
+      "epoch": 0.11208,
+      "grad_norm": 0.9934464327612474,
+      "learning_rate": 0.003,
+      "loss": 4.0342,
+      "step": 11208
+    },
+    {
+      "epoch": 0.11209,
+      "grad_norm": 1.2684033075929582,
+      "learning_rate": 0.003,
+      "loss": 4.0207,
+      "step": 11209
+    },
+    {
+      "epoch": 0.1121,
+      "grad_norm": 1.0481317867453344,
+      "learning_rate": 0.003,
+      "loss": 4.0077,
+      "step": 11210
+    },
+    {
+      "epoch": 0.11211,
+      "grad_norm": 1.2804818902359865,
+      "learning_rate": 0.003,
+      "loss": 4.0471,
+      "step": 11211
+    },
+    {
+      "epoch": 0.11212,
+      "grad_norm": 1.1905349539801156,
+      "learning_rate": 0.003,
+      "loss": 4.0325,
+      "step": 11212
+    },
+    {
+      "epoch": 0.11213,
+      "grad_norm": 1.1425360307860808,
+      "learning_rate": 0.003,
+      "loss": 4.0305,
+      "step": 11213
+    },
+    {
+      "epoch": 0.11214,
+      "grad_norm": 1.0912668598159907,
+      "learning_rate": 0.003,
+      "loss": 4.0548,
+      "step": 11214
+    },
+    {
+      "epoch": 0.11215,
+      "grad_norm": 1.2077706236952057,
+      "learning_rate": 0.003,
+      "loss": 4.0587,
+      "step": 11215
+    },
+    {
+      "epoch": 0.11216,
+      "grad_norm": 1.139473012377748,
+      "learning_rate": 0.003,
+      "loss": 4.0144,
+      "step": 11216
+    },
+    {
+      "epoch": 0.11217,
+      "grad_norm": 1.2737949872278453,
+      "learning_rate": 0.003,
+      "loss": 4.0295,
+      "step": 11217
+    },
+    {
+      "epoch": 0.11218,
+      "grad_norm": 1.1462612962051975,
+      "learning_rate": 0.003,
+      "loss": 4.033,
+      "step": 11218
+    },
+    {
+      "epoch": 0.11219,
+      "grad_norm": 1.0951419504062592,
+      "learning_rate": 0.003,
+      "loss": 4.0258,
+      "step": 11219
+    },
+    {
+      "epoch": 0.1122,
+      "grad_norm": 1.4045206799945849,
+      "learning_rate": 0.003,
+      "loss": 3.9973,
+      "step": 11220
+    },
+    {
+      "epoch": 0.11221,
+      "grad_norm": 0.950211704268758,
+      "learning_rate": 0.003,
+      "loss": 4.025,
+      "step": 11221
+    },
+    {
+      "epoch": 0.11222,
+      "grad_norm": 1.1123561273367195,
+      "learning_rate": 0.003,
+      "loss": 4.045,
+      "step": 11222
+    },
+    {
+      "epoch": 0.11223,
+      "grad_norm": 1.1138231860334924,
+      "learning_rate": 0.003,
+      "loss": 4.0164,
+      "step": 11223
+    },
+    {
+      "epoch": 0.11224,
+      "grad_norm": 1.33255470843806,
+      "learning_rate": 0.003,
+      "loss": 4.0344,
+      "step": 11224
+    },
+    {
+      "epoch": 0.11225,
+      "grad_norm": 1.0201189422663082,
+      "learning_rate": 0.003,
+      "loss": 4.0472,
+      "step": 11225
+    },
+    {
+      "epoch": 0.11226,
+      "grad_norm": 1.2296700017941067,
+      "learning_rate": 0.003,
+      "loss": 4.0359,
+      "step": 11226
+    },
+    {
+      "epoch": 0.11227,
+      "grad_norm": 1.2361560337024013,
+      "learning_rate": 0.003,
+      "loss": 4.015,
+      "step": 11227
+    },
+    {
+      "epoch": 0.11228,
+      "grad_norm": 1.359208100581836,
+      "learning_rate": 0.003,
+      "loss": 4.0302,
+      "step": 11228
+    },
+    {
+      "epoch": 0.11229,
+      "grad_norm": 0.9721364087537138,
+      "learning_rate": 0.003,
+      "loss": 4.0273,
+      "step": 11229
+    },
+    {
+      "epoch": 0.1123,
+      "grad_norm": 1.056450608774184,
+      "learning_rate": 0.003,
+      "loss": 4.0317,
+      "step": 11230
+    },
+    {
+      "epoch": 0.11231,
+      "grad_norm": 1.1151470529423464,
+      "learning_rate": 0.003,
+      "loss": 4.0273,
+      "step": 11231
+    },
+    {
+      "epoch": 0.11232,
+      "grad_norm": 1.349357130637738,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 11232
+    },
+    {
+      "epoch": 0.11233,
+      "grad_norm": 1.2692980366831477,
+      "learning_rate": 0.003,
+      "loss": 4.0427,
+      "step": 11233
+    },
+    {
+      "epoch": 0.11234,
+      "grad_norm": 1.0600042315155727,
+      "learning_rate": 0.003,
+      "loss": 4.0246,
+      "step": 11234
+    },
+    {
+      "epoch": 0.11235,
+      "grad_norm": 0.9730948794785325,
+      "learning_rate": 0.003,
+      "loss": 4.0312,
+      "step": 11235
+    },
+    {
+      "epoch": 0.11236,
+      "grad_norm": 1.2005508877119877,
+      "learning_rate": 0.003,
+      "loss": 4.0459,
+      "step": 11236
+    },
+    {
+      "epoch": 0.11237,
+      "grad_norm": 1.2147190669139891,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 11237
+    },
+    {
+      "epoch": 0.11238,
+      "grad_norm": 1.1338750404710822,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 11238
+    },
+    {
+      "epoch": 0.11239,
+      "grad_norm": 1.138170848080884,
+      "learning_rate": 0.003,
+      "loss": 4.0365,
+      "step": 11239
+    },
+    {
+      "epoch": 0.1124,
+      "grad_norm": 1.031187833440196,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 11240
+    },
+    {
+      "epoch": 0.11241,
+      "grad_norm": 1.2789914501541244,
+      "learning_rate": 0.003,
+      "loss": 4.0358,
+      "step": 11241
+    },
+    {
+      "epoch": 0.11242,
+      "grad_norm": 1.2236110455515656,
+      "learning_rate": 0.003,
+      "loss": 4.0347,
+      "step": 11242
+    },
+    {
+      "epoch": 0.11243,
+      "grad_norm": 1.1093808126477966,
+      "learning_rate": 0.003,
+      "loss": 4.0296,
+      "step": 11243
+    },
+    {
+      "epoch": 0.11244,
+      "grad_norm": 1.288173452117589,
+      "learning_rate": 0.003,
+      "loss": 4.0213,
+      "step": 11244
+    },
+    {
+      "epoch": 0.11245,
+      "grad_norm": 1.0828260500666966,
+      "learning_rate": 0.003,
+      "loss": 4.0408,
+      "step": 11245
+    },
+    {
+      "epoch": 0.11246,
+      "grad_norm": 1.1567543117559596,
+      "learning_rate": 0.003,
+      "loss": 4.0146,
+      "step": 11246
+    },
+    {
+      "epoch": 0.11247,
+      "grad_norm": 1.3948568681025382,
+      "learning_rate": 0.003,
+      "loss": 4.0069,
+      "step": 11247
+    },
+    {
+      "epoch": 0.11248,
+      "grad_norm": 1.0116804641863193,
+      "learning_rate": 0.003,
+      "loss": 4.0463,
+      "step": 11248
+    },
+    {
+      "epoch": 0.11249,
+      "grad_norm": 1.3046509496910825,
+      "learning_rate": 0.003,
+      "loss": 4.0379,
+      "step": 11249
+    },
+    {
+      "epoch": 0.1125,
+      "grad_norm": 0.9991437863954199,
+      "learning_rate": 0.003,
+      "loss": 4.0072,
+      "step": 11250
+    },
+    {
+      "epoch": 0.11251,
+      "grad_norm": 1.6013468281261287,
+      "learning_rate": 0.003,
+      "loss": 4.0288,
+      "step": 11251
+    },
+    {
+      "epoch": 0.11252,
+      "grad_norm": 0.8687536172748702,
+      "learning_rate": 0.003,
+      "loss": 4.0434,
+      "step": 11252
+    },
+    {
+      "epoch": 0.11253,
+      "grad_norm": 1.0983958258583018,
+      "learning_rate": 0.003,
+      "loss": 4.0275,
+      "step": 11253
+    },
+    {
+      "epoch": 0.11254,
+      "grad_norm": 1.226302806954953,
+      "learning_rate": 0.003,
+      "loss": 4.0303,
+      "step": 11254
+    },
+    {
+      "epoch": 0.11255,
+      "grad_norm": 1.0662642116642365,
+      "learning_rate": 0.003,
+      "loss": 4.0359,
+      "step": 11255
+    },
+    {
+      "epoch": 0.11256,
+      "grad_norm": 1.243463378805967,
+      "learning_rate": 0.003,
+      "loss": 4.0374,
+      "step": 11256
+    },
+    {
+      "epoch": 0.11257,
+      "grad_norm": 1.0726761813569627,
+      "learning_rate": 0.003,
+      "loss": 4.0282,
+      "step": 11257
+    },
+    {
+      "epoch": 0.11258,
+      "grad_norm": 1.101242624393574,
+      "learning_rate": 0.003,
+      "loss": 4.025,
+      "step": 11258
+    },
+    {
+      "epoch": 0.11259,
+      "grad_norm": 1.2600622231655116,
+      "learning_rate": 0.003,
+      "loss": 4.0615,
+      "step": 11259
+    },
+    {
+      "epoch": 0.1126,
+      "grad_norm": 0.9980365205693852,
+      "learning_rate": 0.003,
+      "loss": 4.0085,
+      "step": 11260
+    },
+    {
+      "epoch": 0.11261,
+      "grad_norm": 1.2636683024025575,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 11261
+    },
+    {
+      "epoch": 0.11262,
+      "grad_norm": 0.91784299165787,
+      "learning_rate": 0.003,
+      "loss": 4.0297,
+      "step": 11262
+    },
+    {
+      "epoch": 0.11263,
+      "grad_norm": 1.2597297519227169,
+      "learning_rate": 0.003,
+      "loss": 4.02,
+      "step": 11263
+    },
+    {
+      "epoch": 0.11264,
+      "grad_norm": 1.1932659497244533,
+      "learning_rate": 0.003,
+      "loss": 4.0284,
+      "step": 11264
+    },
+    {
+      "epoch": 0.11265,
+      "grad_norm": 0.9816454818813927,
+      "learning_rate": 0.003,
+      "loss": 4.0043,
+      "step": 11265
+    },
+    {
+      "epoch": 0.11266,
+      "grad_norm": 1.2284969289133585,
+      "learning_rate": 0.003,
+      "loss": 4.0467,
+      "step": 11266
+    },
+    {
+      "epoch": 0.11267,
+      "grad_norm": 1.1261765260002052,
+      "learning_rate": 0.003,
+      "loss": 4.0563,
+      "step": 11267
+    },
+    {
+      "epoch": 0.11268,
+      "grad_norm": 1.08799226517971,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 11268
+    },
+    {
+      "epoch": 0.11269,
+      "grad_norm": 1.1922244816252106,
+      "learning_rate": 0.003,
+      "loss": 4.0068,
+      "step": 11269
+    },
+    {
+      "epoch": 0.1127,
+      "grad_norm": 0.9472450427826526,
+      "learning_rate": 0.003,
+      "loss": 4.0294,
+      "step": 11270
+    },
+    {
+      "epoch": 0.11271,
+      "grad_norm": 1.1932608996712035,
+      "learning_rate": 0.003,
+      "loss": 4.0367,
+      "step": 11271
+    },
+    {
+      "epoch": 0.11272,
+      "grad_norm": 1.1892191152480909,
+      "learning_rate": 0.003,
+      "loss": 4.0223,
+      "step": 11272
+    },
+    {
+      "epoch": 0.11273,
+      "grad_norm": 1.2513453159910775,
+      "learning_rate": 0.003,
+      "loss": 4.0462,
+      "step": 11273
+    },
+    {
+      "epoch": 0.11274,
+      "grad_norm": 1.2756382922479819,
+      "learning_rate": 0.003,
+      "loss": 4.0294,
+      "step": 11274
+    },
+    {
+      "epoch": 0.11275,
+      "grad_norm": 1.0956380848789105,
+      "learning_rate": 0.003,
+      "loss": 3.9996,
+      "step": 11275
+    },
+    {
+      "epoch": 0.11276,
+      "grad_norm": 1.166088340374118,
+      "learning_rate": 0.003,
+      "loss": 4.0419,
+      "step": 11276
+    },
+    {
+      "epoch": 0.11277,
+      "grad_norm": 0.9706800035472809,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 11277
+    },
+    {
+      "epoch": 0.11278,
+      "grad_norm": 1.277416659504229,
+      "learning_rate": 0.003,
+      "loss": 4.053,
+      "step": 11278
+    },
+    {
+      "epoch": 0.11279,
+      "grad_norm": 1.1485716440991571,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 11279
+    },
+    {
+      "epoch": 0.1128,
+      "grad_norm": 1.2519360307904746,
+      "learning_rate": 0.003,
+      "loss": 4.0367,
+      "step": 11280
+    },
+    {
+      "epoch": 0.11281,
+      "grad_norm": 1.1269013514569461,
+      "learning_rate": 0.003,
+      "loss": 4.0323,
+      "step": 11281
+    },
+    {
+      "epoch": 0.11282,
+      "grad_norm": 1.2633224979713635,
+      "learning_rate": 0.003,
+      "loss": 4.0286,
+      "step": 11282
+    },
+    {
+      "epoch": 0.11283,
+      "grad_norm": 0.9853227805426616,
+      "learning_rate": 0.003,
+      "loss": 4.03,
+      "step": 11283
+    },
+    {
+      "epoch": 0.11284,
+      "grad_norm": 1.3468725308834935,
+      "learning_rate": 0.003,
+      "loss": 4.021,
+      "step": 11284
+    },
+    {
+      "epoch": 0.11285,
+      "grad_norm": 0.9626469410998565,
+      "learning_rate": 0.003,
+      "loss": 4.024,
+      "step": 11285
+    },
+    {
+      "epoch": 0.11286,
+      "grad_norm": 1.2746488595767842,
+      "learning_rate": 0.003,
+      "loss": 4.0389,
+      "step": 11286
+    },
+    {
+      "epoch": 0.11287,
+      "grad_norm": 1.023668641008112,
+      "learning_rate": 0.003,
+      "loss": 4.0118,
+      "step": 11287
+    },
+    {
+      "epoch": 0.11288,
+      "grad_norm": 1.2629926336246062,
+      "learning_rate": 0.003,
+      "loss": 4.0072,
+      "step": 11288
+    },
+    {
+      "epoch": 0.11289,
+      "grad_norm": 1.1125863518225823,
+      "learning_rate": 0.003,
+      "loss": 4.0488,
+      "step": 11289
+    },
+    {
+      "epoch": 0.1129,
+      "grad_norm": 1.2085679726727696,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 11290
+    },
+    {
+      "epoch": 0.11291,
+      "grad_norm": 1.020769473245544,
+      "learning_rate": 0.003,
+      "loss": 4.047,
+      "step": 11291
+    },
+    {
+      "epoch": 0.11292,
+      "grad_norm": 1.5500356961334352,
+      "learning_rate": 0.003,
+      "loss": 4.0358,
+      "step": 11292
+    },
+    {
+      "epoch": 0.11293,
+      "grad_norm": 1.1685657842109745,
+      "learning_rate": 0.003,
+      "loss": 4.0586,
+      "step": 11293
+    },
+    {
+      "epoch": 0.11294,
+      "grad_norm": 1.3386643011461636,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 11294
+    },
+    {
+      "epoch": 0.11295,
+      "grad_norm": 1.1580125532963097,
+      "learning_rate": 0.003,
+      "loss": 4.0428,
+      "step": 11295
+    },
+    {
+      "epoch": 0.11296,
+      "grad_norm": 1.0489120902824944,
+      "learning_rate": 0.003,
+      "loss": 4.0122,
+      "step": 11296
+    },
+    {
+      "epoch": 0.11297,
+      "grad_norm": 1.103839461492198,
+      "learning_rate": 0.003,
+      "loss": 4.0562,
+      "step": 11297
+    },
+    {
+      "epoch": 0.11298,
+      "grad_norm": 1.2294443402904203,
+      "learning_rate": 0.003,
+      "loss": 4.0432,
+      "step": 11298
+    },
+    {
+      "epoch": 0.11299,
+      "grad_norm": 1.2428348430774223,
+      "learning_rate": 0.003,
+      "loss": 4.0332,
+      "step": 11299
+    },
+    {
+      "epoch": 0.113,
+      "grad_norm": 1.188214219956524,
+      "learning_rate": 0.003,
+      "loss": 4.0489,
+      "step": 11300
+    },
+    {
+      "epoch": 0.11301,
+      "grad_norm": 1.091582106196238,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 11301
+    },
+    {
+      "epoch": 0.11302,
+      "grad_norm": 0.9288460311574318,
+      "learning_rate": 0.003,
+      "loss": 4.0196,
+      "step": 11302
+    },
+    {
+      "epoch": 0.11303,
+      "grad_norm": 1.4007101429225426,
+      "learning_rate": 0.003,
+      "loss": 4.0346,
+      "step": 11303
+    },
+    {
+      "epoch": 0.11304,
+      "grad_norm": 1.0982722121566038,
+      "learning_rate": 0.003,
+      "loss": 4.0361,
+      "step": 11304
+    },
+    {
+      "epoch": 0.11305,
+      "grad_norm": 1.2279359120748694,
+      "learning_rate": 0.003,
+      "loss": 4.012,
+      "step": 11305
+    },
+    {
+      "epoch": 0.11306,
+      "grad_norm": 1.0604973608241537,
+      "learning_rate": 0.003,
+      "loss": 4.0086,
+      "step": 11306
+    },
+    {
+      "epoch": 0.11307,
+      "grad_norm": 1.1270170083873274,
+      "learning_rate": 0.003,
+      "loss": 4.0314,
+      "step": 11307
+    },
+    {
+      "epoch": 0.11308,
+      "grad_norm": 1.0777135494150742,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 11308
+    },
+    {
+      "epoch": 0.11309,
+      "grad_norm": 1.3544518294662256,
+      "learning_rate": 0.003,
+      "loss": 4.0423,
+      "step": 11309
+    },
+    {
+      "epoch": 0.1131,
+      "grad_norm": 0.9727494707141395,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 11310
+    },
+    {
+      "epoch": 0.11311,
+      "grad_norm": 1.3272901030217268,
+      "learning_rate": 0.003,
+      "loss": 4.0414,
+      "step": 11311
+    },
+    {
+      "epoch": 0.11312,
+      "grad_norm": 1.2076378589872812,
+      "learning_rate": 0.003,
+      "loss": 4.036,
+      "step": 11312
+    },
+    {
+      "epoch": 0.11313,
+      "grad_norm": 1.1833510307690753,
+      "learning_rate": 0.003,
+      "loss": 4.016,
+      "step": 11313
+    },
+    {
+      "epoch": 0.11314,
+      "grad_norm": 1.2380294898068176,
+      "learning_rate": 0.003,
+      "loss": 4.0482,
+      "step": 11314
+    },
+    {
+      "epoch": 0.11315,
+      "grad_norm": 0.9562005645157645,
+      "learning_rate": 0.003,
+      "loss": 4.0314,
+      "step": 11315
+    },
+    {
+      "epoch": 0.11316,
+      "grad_norm": 1.2025637701941305,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 11316
+    },
+    {
+      "epoch": 0.11317,
+      "grad_norm": 1.1779191244129885,
+      "learning_rate": 0.003,
+      "loss": 4.0402,
+      "step": 11317
+    },
+    {
+      "epoch": 0.11318,
+      "grad_norm": 1.2236972961058097,
+      "learning_rate": 0.003,
+      "loss": 4.046,
+      "step": 11318
+    },
+    {
+      "epoch": 0.11319,
+      "grad_norm": 1.0549576319959617,
+      "learning_rate": 0.003,
+      "loss": 4.0416,
+      "step": 11319
+    },
+    {
+      "epoch": 0.1132,
+      "grad_norm": 1.2105786067689994,
+      "learning_rate": 0.003,
+      "loss": 4.0374,
+      "step": 11320
+    },
+    {
+      "epoch": 0.11321,
+      "grad_norm": 1.3397052436576664,
+      "learning_rate": 0.003,
+      "loss": 4.004,
+      "step": 11321
+    },
+    {
+      "epoch": 0.11322,
+      "grad_norm": 1.2893426572692654,
+      "learning_rate": 0.003,
+      "loss": 4.0424,
+      "step": 11322
+    },
+    {
+      "epoch": 0.11323,
+      "grad_norm": 0.9407951953734294,
+      "learning_rate": 0.003,
+      "loss": 3.9886,
+      "step": 11323
+    },
+    {
+      "epoch": 0.11324,
+      "grad_norm": 1.177515193019718,
+      "learning_rate": 0.003,
+      "loss": 4.0279,
+      "step": 11324
+    },
+    {
+      "epoch": 0.11325,
+      "grad_norm": 1.2549973557620873,
+      "learning_rate": 0.003,
+      "loss": 4.0491,
+      "step": 11325
+    },
+    {
+      "epoch": 0.11326,
+      "grad_norm": 1.0497216278305863,
+      "learning_rate": 0.003,
+      "loss": 4.0329,
+      "step": 11326
+    },
+    {
+      "epoch": 0.11327,
+      "grad_norm": 1.2211928944892607,
+      "learning_rate": 0.003,
+      "loss": 4.0406,
+      "step": 11327
+    },
+    {
+      "epoch": 0.11328,
+      "grad_norm": 1.1575302001609644,
+      "learning_rate": 0.003,
+      "loss": 4.0286,
+      "step": 11328
+    },
+    {
+      "epoch": 0.11329,
+      "grad_norm": 1.353259362168543,
+      "learning_rate": 0.003,
+      "loss": 4.0307,
+      "step": 11329
+    },
+    {
+      "epoch": 0.1133,
+      "grad_norm": 1.1673317612996932,
+      "learning_rate": 0.003,
+      "loss": 4.0287,
+      "step": 11330
+    },
+    {
+      "epoch": 0.11331,
+      "grad_norm": 1.110344451425524,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 11331
+    },
+    {
+      "epoch": 0.11332,
+      "grad_norm": 0.9832494841416515,
+      "learning_rate": 0.003,
+      "loss": 4.0435,
+      "step": 11332
+    },
+    {
+      "epoch": 0.11333,
+      "grad_norm": 1.1341803958872791,
+      "learning_rate": 0.003,
+      "loss": 3.9836,
+      "step": 11333
+    },
+    {
+      "epoch": 0.11334,
+      "grad_norm": 1.0713985717412855,
+      "learning_rate": 0.003,
+      "loss": 4.0134,
+      "step": 11334
+    },
+    {
+      "epoch": 0.11335,
+      "grad_norm": 1.4947599748518885,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 11335
+    },
+    {
+      "epoch": 0.11336,
+      "grad_norm": 1.0176108077782644,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 11336
+    },
+    {
+      "epoch": 0.11337,
+      "grad_norm": 1.0630112841599184,
+      "learning_rate": 0.003,
+      "loss": 4.0207,
+      "step": 11337
+    },
+    {
+      "epoch": 0.11338,
+      "grad_norm": 1.0818296952259,
+      "learning_rate": 0.003,
+      "loss": 4.0359,
+      "step": 11338
+    },
+    {
+      "epoch": 0.11339,
+      "grad_norm": 1.2490039926297583,
+      "learning_rate": 0.003,
+      "loss": 4.0659,
+      "step": 11339
+    },
+    {
+      "epoch": 0.1134,
+      "grad_norm": 1.094214947697734,
+      "learning_rate": 0.003,
+      "loss": 4.0317,
+      "step": 11340
+    },
+    {
+      "epoch": 0.11341,
+      "grad_norm": 1.1634860925483297,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 11341
+    },
+    {
+      "epoch": 0.11342,
+      "grad_norm": 1.1087200576928156,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 11342
+    },
+    {
+      "epoch": 0.11343,
+      "grad_norm": 1.0264504151598612,
+      "learning_rate": 0.003,
+      "loss": 4.0323,
+      "step": 11343
+    },
+    {
+      "epoch": 0.11344,
+      "grad_norm": 1.1105495732679151,
+      "learning_rate": 0.003,
+      "loss": 4.0458,
+      "step": 11344
+    },
+    {
+      "epoch": 0.11345,
+      "grad_norm": 0.94706032963158,
+      "learning_rate": 0.003,
+      "loss": 4.0476,
+      "step": 11345
+    },
+    {
+      "epoch": 0.11346,
+      "grad_norm": 1.2356018822595631,
+      "learning_rate": 0.003,
+      "loss": 4.0408,
+      "step": 11346
+    },
+    {
+      "epoch": 0.11347,
+      "grad_norm": 1.1999112899120898,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 11347
+    },
+    {
+      "epoch": 0.11348,
+      "grad_norm": 1.123422414749545,
+      "learning_rate": 0.003,
+      "loss": 4.0392,
+      "step": 11348
+    },
+    {
+      "epoch": 0.11349,
+      "grad_norm": 1.1936480899464423,
+      "learning_rate": 0.003,
+      "loss": 4.0413,
+      "step": 11349
+    },
+    {
+      "epoch": 0.1135,
+      "grad_norm": 1.3146543934382486,
+      "learning_rate": 0.003,
+      "loss": 4.0608,
+      "step": 11350
+    },
+    {
+      "epoch": 0.11351,
+      "grad_norm": 1.2860271723469208,
+      "learning_rate": 0.003,
+      "loss": 4.0356,
+      "step": 11351
+    },
+    {
+      "epoch": 0.11352,
+      "grad_norm": 1.0410866520299848,
+      "learning_rate": 0.003,
+      "loss": 3.9984,
+      "step": 11352
+    },
+    {
+      "epoch": 0.11353,
+      "grad_norm": 1.09507802259332,
+      "learning_rate": 0.003,
+      "loss": 4.0128,
+      "step": 11353
+    },
+    {
+      "epoch": 0.11354,
+      "grad_norm": 1.2049447545318868,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 11354
+    },
+    {
+      "epoch": 0.11355,
+      "grad_norm": 1.1115701247127443,
+      "learning_rate": 0.003,
+      "loss": 4.0624,
+      "step": 11355
+    },
+    {
+      "epoch": 0.11356,
+      "grad_norm": 1.235219941310057,
+      "learning_rate": 0.003,
+      "loss": 4.028,
+      "step": 11356
+    },
+    {
+      "epoch": 0.11357,
+      "grad_norm": 1.165360102724017,
+      "learning_rate": 0.003,
+      "loss": 4.0016,
+      "step": 11357
+    },
+    {
+      "epoch": 0.11358,
+      "grad_norm": 1.3339213697650871,
+      "learning_rate": 0.003,
+      "loss": 4.0345,
+      "step": 11358
+    },
+    {
+      "epoch": 0.11359,
+      "grad_norm": 0.972211541313835,
+      "learning_rate": 0.003,
+      "loss": 4.0489,
+      "step": 11359
+    },
+    {
+      "epoch": 0.1136,
+      "grad_norm": 1.0879757952074331,
+      "learning_rate": 0.003,
+      "loss": 4.0305,
+      "step": 11360
+    },
+    {
+      "epoch": 0.11361,
+      "grad_norm": 1.065115109105123,
+      "learning_rate": 0.003,
+      "loss": 4.0001,
+      "step": 11361
+    },
+    {
+      "epoch": 0.11362,
+      "grad_norm": 1.24872681243813,
+      "learning_rate": 0.003,
+      "loss": 4.0308,
+      "step": 11362
+    },
+    {
+      "epoch": 0.11363,
+      "grad_norm": 1.072079099300088,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 11363
+    },
+    {
+      "epoch": 0.11364,
+      "grad_norm": 1.055551473984547,
+      "learning_rate": 0.003,
+      "loss": 4.0412,
+      "step": 11364
+    },
+    {
+      "epoch": 0.11365,
+      "grad_norm": 1.2328441149042377,
+      "learning_rate": 0.003,
+      "loss": 4.0284,
+      "step": 11365
+    },
+    {
+      "epoch": 0.11366,
+      "grad_norm": 1.0839732791788177,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 11366
+    },
+    {
+      "epoch": 0.11367,
+      "grad_norm": 1.1584220106389211,
+      "learning_rate": 0.003,
+      "loss": 4.0,
+      "step": 11367
+    },
+    {
+      "epoch": 0.11368,
+      "grad_norm": 1.0721506976287616,
+      "learning_rate": 0.003,
+      "loss": 4.0432,
+      "step": 11368
+    },
+    {
+      "epoch": 0.11369,
+      "grad_norm": 1.3646659075049767,
+      "learning_rate": 0.003,
+      "loss": 4.043,
+      "step": 11369
+    },
+    {
+      "epoch": 0.1137,
+      "grad_norm": 1.1322442704014541,
+      "learning_rate": 0.003,
+      "loss": 4.05,
+      "step": 11370
+    },
+    {
+      "epoch": 0.11371,
+      "grad_norm": 1.0558050074955512,
+      "learning_rate": 0.003,
+      "loss": 4.0272,
+      "step": 11371
+    },
+    {
+      "epoch": 0.11372,
+      "grad_norm": 1.1301617951796612,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 11372
+    },
+    {
+      "epoch": 0.11373,
+      "grad_norm": 0.9812153815857593,
+      "learning_rate": 0.003,
+      "loss": 4.0307,
+      "step": 11373
+    },
+    {
+      "epoch": 0.11374,
+      "grad_norm": 1.359681063168288,
+      "learning_rate": 0.003,
+      "loss": 4.0342,
+      "step": 11374
+    },
+    {
+      "epoch": 0.11375,
+      "grad_norm": 1.1989683827617195,
+      "learning_rate": 0.003,
+      "loss": 4.041,
+      "step": 11375
+    },
+    {
+      "epoch": 0.11376,
+      "grad_norm": 0.9981750577047487,
+      "learning_rate": 0.003,
+      "loss": 4.061,
+      "step": 11376
+    },
+    {
+      "epoch": 0.11377,
+      "grad_norm": 1.2247933958516897,
+      "learning_rate": 0.003,
+      "loss": 4.0321,
+      "step": 11377
+    },
+    {
+      "epoch": 0.11378,
+      "grad_norm": 1.219591085402563,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 11378
+    },
+    {
+      "epoch": 0.11379,
+      "grad_norm": 1.1739864083872635,
+      "learning_rate": 0.003,
+      "loss": 4.0099,
+      "step": 11379
+    },
+    {
+      "epoch": 0.1138,
+      "grad_norm": 1.1150848974614544,
+      "learning_rate": 0.003,
+      "loss": 4.0033,
+      "step": 11380
+    },
+    {
+      "epoch": 0.11381,
+      "grad_norm": 1.32267660459148,
+      "learning_rate": 0.003,
+      "loss": 3.9899,
+      "step": 11381
+    },
+    {
+      "epoch": 0.11382,
+      "grad_norm": 1.19364357962154,
+      "learning_rate": 0.003,
+      "loss": 4.0418,
+      "step": 11382
+    },
+    {
+      "epoch": 0.11383,
+      "grad_norm": 1.1210298550024658,
+      "learning_rate": 0.003,
+      "loss": 4.0096,
+      "step": 11383
+    },
+    {
+      "epoch": 0.11384,
+      "grad_norm": 1.2945719303722119,
+      "learning_rate": 0.003,
+      "loss": 4.0286,
+      "step": 11384
+    },
+    {
+      "epoch": 0.11385,
+      "grad_norm": 0.8328895137306691,
+      "learning_rate": 0.003,
+      "loss": 4.0099,
+      "step": 11385
+    },
+    {
+      "epoch": 0.11386,
+      "grad_norm": 0.9686545135829541,
+      "learning_rate": 0.003,
+      "loss": 4.0375,
+      "step": 11386
+    },
+    {
+      "epoch": 0.11387,
+      "grad_norm": 1.3533951425487776,
+      "learning_rate": 0.003,
+      "loss": 4.0245,
+      "step": 11387
+    },
+    {
+      "epoch": 0.11388,
+      "grad_norm": 0.9410384710036733,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 11388
+    },
+    {
+      "epoch": 0.11389,
+      "grad_norm": 1.2925355506565999,
+      "learning_rate": 0.003,
+      "loss": 4.0327,
+      "step": 11389
+    },
+    {
+      "epoch": 0.1139,
+      "grad_norm": 1.187327748759508,
+      "learning_rate": 0.003,
+      "loss": 4.0163,
+      "step": 11390
+    },
+    {
+      "epoch": 0.11391,
+      "grad_norm": 1.2938651872263744,
+      "learning_rate": 0.003,
+      "loss": 4.0132,
+      "step": 11391
+    },
+    {
+      "epoch": 0.11392,
+      "grad_norm": 1.1411249301402717,
+      "learning_rate": 0.003,
+      "loss": 4.0401,
+      "step": 11392
+    },
+    {
+      "epoch": 0.11393,
+      "grad_norm": 1.1337183382980245,
+      "learning_rate": 0.003,
+      "loss": 3.9984,
+      "step": 11393
+    },
+    {
+      "epoch": 0.11394,
+      "grad_norm": 1.0889052885198094,
+      "learning_rate": 0.003,
+      "loss": 3.9953,
+      "step": 11394
+    },
+    {
+      "epoch": 0.11395,
+      "grad_norm": 1.1071420401026428,
+      "learning_rate": 0.003,
+      "loss": 4.0298,
+      "step": 11395
+    },
+    {
+      "epoch": 0.11396,
+      "grad_norm": 0.9858679783214278,
+      "learning_rate": 0.003,
+      "loss": 4.0291,
+      "step": 11396
+    },
+    {
+      "epoch": 0.11397,
+      "grad_norm": 1.2081766368555051,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 11397
+    },
+    {
+      "epoch": 0.11398,
+      "grad_norm": 1.1904796842320102,
+      "learning_rate": 0.003,
+      "loss": 4.003,
+      "step": 11398
+    },
+    {
+      "epoch": 0.11399,
+      "grad_norm": 1.1652838732036046,
+      "learning_rate": 0.003,
+      "loss": 4.0378,
+      "step": 11399
+    },
+    {
+      "epoch": 0.114,
+      "grad_norm": 1.1994199478656578,
+      "learning_rate": 0.003,
+      "loss": 4.0538,
+      "step": 11400
+    },
+    {
+      "epoch": 0.11401,
+      "grad_norm": 1.0582784467349977,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 11401
+    },
+    {
+      "epoch": 0.11402,
+      "grad_norm": 1.1358520028873116,
+      "learning_rate": 0.003,
+      "loss": 4.0085,
+      "step": 11402
+    },
+    {
+      "epoch": 0.11403,
+      "grad_norm": 1.0954439597350976,
+      "learning_rate": 0.003,
+      "loss": 3.9905,
+      "step": 11403
+    },
+    {
+      "epoch": 0.11404,
+      "grad_norm": 1.2934994659161199,
+      "learning_rate": 0.003,
+      "loss": 4.0369,
+      "step": 11404
+    },
+    {
+      "epoch": 0.11405,
+      "grad_norm": 1.1821009379113836,
+      "learning_rate": 0.003,
+      "loss": 4.0322,
+      "step": 11405
+    },
+    {
+      "epoch": 0.11406,
+      "grad_norm": 0.9323614807006916,
+      "learning_rate": 0.003,
+      "loss": 4.0448,
+      "step": 11406
+    },
+    {
+      "epoch": 0.11407,
+      "grad_norm": 1.044288690455784,
+      "learning_rate": 0.003,
+      "loss": 4.0146,
+      "step": 11407
+    },
+    {
+      "epoch": 0.11408,
+      "grad_norm": 1.288371712660551,
+      "learning_rate": 0.003,
+      "loss": 4.0401,
+      "step": 11408
+    },
+    {
+      "epoch": 0.11409,
+      "grad_norm": 1.1013793581481566,
+      "learning_rate": 0.003,
+      "loss": 4.038,
+      "step": 11409
+    },
+    {
+      "epoch": 0.1141,
+      "grad_norm": 1.2399270262890425,
+      "learning_rate": 0.003,
+      "loss": 4.0334,
+      "step": 11410
+    },
+    {
+      "epoch": 0.11411,
+      "grad_norm": 1.198910800893651,
+      "learning_rate": 0.003,
+      "loss": 4.0452,
+      "step": 11411
+    },
+    {
+      "epoch": 0.11412,
+      "grad_norm": 1.2722748826966932,
+      "learning_rate": 0.003,
+      "loss": 4.0424,
+      "step": 11412
+    },
+    {
+      "epoch": 0.11413,
+      "grad_norm": 1.2237602676428068,
+      "learning_rate": 0.003,
+      "loss": 4.022,
+      "step": 11413
+    },
+    {
+      "epoch": 0.11414,
+      "grad_norm": 1.054751554499624,
+      "learning_rate": 0.003,
+      "loss": 4.0324,
+      "step": 11414
+    },
+    {
+      "epoch": 0.11415,
+      "grad_norm": 1.3859608251730753,
+      "learning_rate": 0.003,
+      "loss": 4.0349,
+      "step": 11415
+    },
+    {
+      "epoch": 0.11416,
+      "grad_norm": 0.9023833580320342,
+      "learning_rate": 0.003,
+      "loss": 4.0191,
+      "step": 11416
+    },
+    {
+      "epoch": 0.11417,
+      "grad_norm": 1.163717032717626,
+      "learning_rate": 0.003,
+      "loss": 4.0126,
+      "step": 11417
+    },
+    {
+      "epoch": 0.11418,
+      "grad_norm": 1.1635066782698764,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 11418
+    },
+    {
+      "epoch": 0.11419,
+      "grad_norm": 1.2785442673812115,
+      "learning_rate": 0.003,
+      "loss": 4.0065,
+      "step": 11419
+    },
+    {
+      "epoch": 0.1142,
+      "grad_norm": 1.1457574905694237,
+      "learning_rate": 0.003,
+      "loss": 4.0046,
+      "step": 11420
+    },
+    {
+      "epoch": 0.11421,
+      "grad_norm": 1.2404496422928504,
+      "learning_rate": 0.003,
+      "loss": 3.9984,
+      "step": 11421
+    },
+    {
+      "epoch": 0.11422,
+      "grad_norm": 0.9198614788403456,
+      "learning_rate": 0.003,
+      "loss": 4.0289,
+      "step": 11422
+    },
+    {
+      "epoch": 0.11423,
+      "grad_norm": 0.9771207760414389,
+      "learning_rate": 0.003,
+      "loss": 4.0561,
+      "step": 11423
+    },
+    {
+      "epoch": 0.11424,
+      "grad_norm": 1.0105210455470817,
+      "learning_rate": 0.003,
+      "loss": 4.05,
+      "step": 11424
+    },
+    {
+      "epoch": 0.11425,
+      "grad_norm": 1.209588882831824,
+      "learning_rate": 0.003,
+      "loss": 4.0332,
+      "step": 11425
+    },
+    {
+      "epoch": 0.11426,
+      "grad_norm": 1.0050650721043324,
+      "learning_rate": 0.003,
+      "loss": 4.0482,
+      "step": 11426
+    },
+    {
+      "epoch": 0.11427,
+      "grad_norm": 1.2129381435966735,
+      "learning_rate": 0.003,
+      "loss": 4.0247,
+      "step": 11427
+    },
+    {
+      "epoch": 0.11428,
+      "grad_norm": 1.1725659285639696,
+      "learning_rate": 0.003,
+      "loss": 4.0349,
+      "step": 11428
+    },
+    {
+      "epoch": 0.11429,
+      "grad_norm": 1.3772603501943919,
+      "learning_rate": 0.003,
+      "loss": 4.0633,
+      "step": 11429
+    },
+    {
+      "epoch": 0.1143,
+      "grad_norm": 1.1189640923482154,
+      "learning_rate": 0.003,
+      "loss": 4.0461,
+      "step": 11430
+    },
+    {
+      "epoch": 0.11431,
+      "grad_norm": 1.0788431637790905,
+      "learning_rate": 0.003,
+      "loss": 4.0451,
+      "step": 11431
+    },
+    {
+      "epoch": 0.11432,
+      "grad_norm": 1.3054445233916165,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 11432
+    },
+    {
+      "epoch": 0.11433,
+      "grad_norm": 1.0880911650168383,
+      "learning_rate": 0.003,
+      "loss": 4.0626,
+      "step": 11433
+    },
+    {
+      "epoch": 0.11434,
+      "grad_norm": 1.1337028141735888,
+      "learning_rate": 0.003,
+      "loss": 4.0559,
+      "step": 11434
+    },
+    {
+      "epoch": 0.11435,
+      "grad_norm": 1.1674027454792053,
+      "learning_rate": 0.003,
+      "loss": 4.0196,
+      "step": 11435
+    },
+    {
+      "epoch": 0.11436,
+      "grad_norm": 1.0782480965612236,
+      "learning_rate": 0.003,
+      "loss": 4.0353,
+      "step": 11436
+    },
+    {
+      "epoch": 0.11437,
+      "grad_norm": 1.0494128681386032,
+      "learning_rate": 0.003,
+      "loss": 3.9871,
+      "step": 11437
+    },
+    {
+      "epoch": 0.11438,
+      "grad_norm": 1.2824338283149883,
+      "learning_rate": 0.003,
+      "loss": 4.034,
+      "step": 11438
+    },
+    {
+      "epoch": 0.11439,
+      "grad_norm": 1.1874792640867795,
+      "learning_rate": 0.003,
+      "loss": 4.0084,
+      "step": 11439
+    },
+    {
+      "epoch": 0.1144,
+      "grad_norm": 1.1122036447087196,
+      "learning_rate": 0.003,
+      "loss": 4.0361,
+      "step": 11440
+    },
+    {
+      "epoch": 0.11441,
+      "grad_norm": 1.190227594977014,
+      "learning_rate": 0.003,
+      "loss": 4.0196,
+      "step": 11441
+    },
+    {
+      "epoch": 0.11442,
+      "grad_norm": 1.270616577468654,
+      "learning_rate": 0.003,
+      "loss": 4.0267,
+      "step": 11442
+    },
+    {
+      "epoch": 0.11443,
+      "grad_norm": 1.3070052964269097,
+      "learning_rate": 0.003,
+      "loss": 4.0307,
+      "step": 11443
+    },
+    {
+      "epoch": 0.11444,
+      "grad_norm": 1.03052198866508,
+      "learning_rate": 0.003,
+      "loss": 4.0126,
+      "step": 11444
+    },
+    {
+      "epoch": 0.11445,
+      "grad_norm": 1.1390169230493974,
+      "learning_rate": 0.003,
+      "loss": 4.0388,
+      "step": 11445
+    },
+    {
+      "epoch": 0.11446,
+      "grad_norm": 1.3887008590353196,
+      "learning_rate": 0.003,
+      "loss": 4.0308,
+      "step": 11446
+    },
+    {
+      "epoch": 0.11447,
+      "grad_norm": 1.3046931809126903,
+      "learning_rate": 0.003,
+      "loss": 4.0671,
+      "step": 11447
+    },
+    {
+      "epoch": 0.11448,
+      "grad_norm": 1.1339224847291296,
+      "learning_rate": 0.003,
+      "loss": 4.0548,
+      "step": 11448
+    },
+    {
+      "epoch": 0.11449,
+      "grad_norm": 1.2471236226671056,
+      "learning_rate": 0.003,
+      "loss": 4.0239,
+      "step": 11449
+    },
+    {
+      "epoch": 0.1145,
+      "grad_norm": 1.1731443345300483,
+      "learning_rate": 0.003,
+      "loss": 4.0407,
+      "step": 11450
+    },
+    {
+      "epoch": 0.11451,
+      "grad_norm": 1.051150483846822,
+      "learning_rate": 0.003,
+      "loss": 4.032,
+      "step": 11451
+    },
+    {
+      "epoch": 0.11452,
+      "grad_norm": 1.2092227172081997,
+      "learning_rate": 0.003,
+      "loss": 4.0061,
+      "step": 11452
+    },
+    {
+      "epoch": 0.11453,
+      "grad_norm": 1.167343479702236,
+      "learning_rate": 0.003,
+      "loss": 4.02,
+      "step": 11453
+    },
+    {
+      "epoch": 0.11454,
+      "grad_norm": 1.3660105566993612,
+      "learning_rate": 0.003,
+      "loss": 4.0283,
+      "step": 11454
+    },
+    {
+      "epoch": 0.11455,
+      "grad_norm": 1.1629557310949954,
+      "learning_rate": 0.003,
+      "loss": 3.9962,
+      "step": 11455
+    },
+    {
+      "epoch": 0.11456,
+      "grad_norm": 1.1739663234451223,
+      "learning_rate": 0.003,
+      "loss": 4.0209,
+      "step": 11456
+    },
+    {
+      "epoch": 0.11457,
+      "grad_norm": 1.1156521901007033,
+      "learning_rate": 0.003,
+      "loss": 4.0112,
+      "step": 11457
+    },
+    {
+      "epoch": 0.11458,
+      "grad_norm": 1.2592133833338512,
+      "learning_rate": 0.003,
+      "loss": 4.0413,
+      "step": 11458
+    },
+    {
+      "epoch": 0.11459,
+      "grad_norm": 0.9615728049014559,
+      "learning_rate": 0.003,
+      "loss": 4.0184,
+      "step": 11459
+    },
+    {
+      "epoch": 0.1146,
+      "grad_norm": 1.2024834313430042,
+      "learning_rate": 0.003,
+      "loss": 4.0154,
+      "step": 11460
+    },
+    {
+      "epoch": 0.11461,
+      "grad_norm": 1.2257514801570328,
+      "learning_rate": 0.003,
+      "loss": 4.0395,
+      "step": 11461
+    },
+    {
+      "epoch": 0.11462,
+      "grad_norm": 1.0895605253494356,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 11462
+    },
+    {
+      "epoch": 0.11463,
+      "grad_norm": 1.1193074344631706,
+      "learning_rate": 0.003,
+      "loss": 4.0155,
+      "step": 11463
+    },
+    {
+      "epoch": 0.11464,
+      "grad_norm": 1.294479238346295,
+      "learning_rate": 0.003,
+      "loss": 4.0275,
+      "step": 11464
+    },
+    {
+      "epoch": 0.11465,
+      "grad_norm": 1.2104511450000375,
+      "learning_rate": 0.003,
+      "loss": 4.0406,
+      "step": 11465
+    },
+    {
+      "epoch": 0.11466,
+      "grad_norm": 1.3503066663860102,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 11466
+    },
+    {
+      "epoch": 0.11467,
+      "grad_norm": 1.0443750276791484,
+      "learning_rate": 0.003,
+      "loss": 4.0411,
+      "step": 11467
+    },
+    {
+      "epoch": 0.11468,
+      "grad_norm": 1.0505892359693176,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 11468
+    },
+    {
+      "epoch": 0.11469,
+      "grad_norm": 1.2825643225762613,
+      "learning_rate": 0.003,
+      "loss": 4.0331,
+      "step": 11469
+    },
+    {
+      "epoch": 0.1147,
+      "grad_norm": 1.0516531724729168,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 11470
+    },
+    {
+      "epoch": 0.11471,
+      "grad_norm": 1.3346428910912866,
+      "learning_rate": 0.003,
+      "loss": 4.0287,
+      "step": 11471
+    },
+    {
+      "epoch": 0.11472,
+      "grad_norm": 1.095097921047335,
+      "learning_rate": 0.003,
+      "loss": 4.02,
+      "step": 11472
+    },
+    {
+      "epoch": 0.11473,
+      "grad_norm": 1.0698418013171735,
+      "learning_rate": 0.003,
+      "loss": 4.0252,
+      "step": 11473
+    },
+    {
+      "epoch": 0.11474,
+      "grad_norm": 1.248084189275581,
+      "learning_rate": 0.003,
+      "loss": 4.032,
+      "step": 11474
+    },
+    {
+      "epoch": 0.11475,
+      "grad_norm": 1.021325495043658,
+      "learning_rate": 0.003,
+      "loss": 4.004,
+      "step": 11475
+    },
+    {
+      "epoch": 0.11476,
+      "grad_norm": 1.1909200855488131,
+      "learning_rate": 0.003,
+      "loss": 4.0238,
+      "step": 11476
+    },
+    {
+      "epoch": 0.11477,
+      "grad_norm": 0.8421493513707902,
+      "learning_rate": 0.003,
+      "loss": 4.0524,
+      "step": 11477
+    },
+    {
+      "epoch": 0.11478,
+      "grad_norm": 1.0695750946042661,
+      "learning_rate": 0.003,
+      "loss": 4.0002,
+      "step": 11478
+    },
+    {
+      "epoch": 0.11479,
+      "grad_norm": 1.2622542869695088,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 11479
+    },
+    {
+      "epoch": 0.1148,
+      "grad_norm": 1.092292616556263,
+      "learning_rate": 0.003,
+      "loss": 4.0519,
+      "step": 11480
+    },
+    {
+      "epoch": 0.11481,
+      "grad_norm": 1.145024396755148,
+      "learning_rate": 0.003,
+      "loss": 4.0091,
+      "step": 11481
+    },
+    {
+      "epoch": 0.11482,
+      "grad_norm": 1.2473154146510699,
+      "learning_rate": 0.003,
+      "loss": 4.0604,
+      "step": 11482
+    },
+    {
+      "epoch": 0.11483,
+      "grad_norm": 1.2566705438815857,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 11483
+    },
+    {
+      "epoch": 0.11484,
+      "grad_norm": 1.0349608438987377,
+      "learning_rate": 0.003,
+      "loss": 4.0265,
+      "step": 11484
+    },
+    {
+      "epoch": 0.11485,
+      "grad_norm": 1.4355317012427342,
+      "learning_rate": 0.003,
+      "loss": 4.0287,
+      "step": 11485
+    },
+    {
+      "epoch": 0.11486,
+      "grad_norm": 0.9326495474339838,
+      "learning_rate": 0.003,
+      "loss": 4.0361,
+      "step": 11486
+    },
+    {
+      "epoch": 0.11487,
+      "grad_norm": 1.192660410494905,
+      "learning_rate": 0.003,
+      "loss": 4.0334,
+      "step": 11487
+    },
+    {
+      "epoch": 0.11488,
+      "grad_norm": 1.2529854252829626,
+      "learning_rate": 0.003,
+      "loss": 4.0438,
+      "step": 11488
+    },
+    {
+      "epoch": 0.11489,
+      "grad_norm": 1.089982573108441,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 11489
+    },
+    {
+      "epoch": 0.1149,
+      "grad_norm": 1.0719423276675382,
+      "learning_rate": 0.003,
+      "loss": 4.0331,
+      "step": 11490
+    },
+    {
+      "epoch": 0.11491,
+      "grad_norm": 1.2350327715936045,
+      "learning_rate": 0.003,
+      "loss": 4.0025,
+      "step": 11491
+    },
+    {
+      "epoch": 0.11492,
+      "grad_norm": 1.031561706435445,
+      "learning_rate": 0.003,
+      "loss": 4.0349,
+      "step": 11492
+    },
+    {
+      "epoch": 0.11493,
+      "grad_norm": 1.2174218874516956,
+      "learning_rate": 0.003,
+      "loss": 4.0324,
+      "step": 11493
+    },
+    {
+      "epoch": 0.11494,
+      "grad_norm": 1.1968611290413345,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 11494
+    },
+    {
+      "epoch": 0.11495,
+      "grad_norm": 1.0535886969774535,
+      "learning_rate": 0.003,
+      "loss": 4.0321,
+      "step": 11495
+    },
+    {
+      "epoch": 0.11496,
+      "grad_norm": 1.3975302549169397,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 11496
+    },
+    {
+      "epoch": 0.11497,
+      "grad_norm": 1.2029400300955522,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 11497
+    },
+    {
+      "epoch": 0.11498,
+      "grad_norm": 1.234457060439203,
+      "learning_rate": 0.003,
+      "loss": 4.0282,
+      "step": 11498
+    },
+    {
+      "epoch": 0.11499,
+      "grad_norm": 1.1060078725145015,
+      "learning_rate": 0.003,
+      "loss": 4.0309,
+      "step": 11499
+    },
+    {
+      "epoch": 0.115,
+      "grad_norm": 1.2756853936300074,
+      "learning_rate": 0.003,
+      "loss": 4.0531,
+      "step": 11500
+    },
+    {
+      "epoch": 0.11501,
+      "grad_norm": 1.2133184029419521,
+      "learning_rate": 0.003,
+      "loss": 4.0527,
+      "step": 11501
+    },
+    {
+      "epoch": 0.11502,
+      "grad_norm": 1.0818298149149148,
+      "learning_rate": 0.003,
+      "loss": 4.0405,
+      "step": 11502
+    },
+    {
+      "epoch": 0.11503,
+      "grad_norm": 1.2728423553765829,
+      "learning_rate": 0.003,
+      "loss": 4.0392,
+      "step": 11503
+    },
+    {
+      "epoch": 0.11504,
+      "grad_norm": 1.059498569959539,
+      "learning_rate": 0.003,
+      "loss": 4.0246,
+      "step": 11504
+    },
+    {
+      "epoch": 0.11505,
+      "grad_norm": 1.35683834617117,
+      "learning_rate": 0.003,
+      "loss": 4.0292,
+      "step": 11505
+    },
+    {
+      "epoch": 0.11506,
+      "grad_norm": 0.9413672569378154,
+      "learning_rate": 0.003,
+      "loss": 4.052,
+      "step": 11506
+    },
+    {
+      "epoch": 0.11507,
+      "grad_norm": 0.9314304015475336,
+      "learning_rate": 0.003,
+      "loss": 4.0413,
+      "step": 11507
+    },
+    {
+      "epoch": 0.11508,
+      "grad_norm": 1.2504519343724296,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 11508
+    },
+    {
+      "epoch": 0.11509,
+      "grad_norm": 1.151992191983872,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 11509
+    },
+    {
+      "epoch": 0.1151,
+      "grad_norm": 1.3329441485185418,
+      "learning_rate": 0.003,
+      "loss": 4.0173,
+      "step": 11510
+    },
+    {
+      "epoch": 0.11511,
+      "grad_norm": 1.0288130767327575,
+      "learning_rate": 0.003,
+      "loss": 4.0028,
+      "step": 11511
+    },
+    {
+      "epoch": 0.11512,
+      "grad_norm": 1.2555285931550986,
+      "learning_rate": 0.003,
+      "loss": 4.0186,
+      "step": 11512
+    },
+    {
+      "epoch": 0.11513,
+      "grad_norm": 1.0795969059027257,
+      "learning_rate": 0.003,
+      "loss": 4.0392,
+      "step": 11513
+    },
+    {
+      "epoch": 0.11514,
+      "grad_norm": 1.2133450305538067,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 11514
+    },
+    {
+      "epoch": 0.11515,
+      "grad_norm": 1.0929272209567709,
+      "learning_rate": 0.003,
+      "loss": 4.0622,
+      "step": 11515
+    },
+    {
+      "epoch": 0.11516,
+      "grad_norm": 1.193595963865107,
+      "learning_rate": 0.003,
+      "loss": 4.0477,
+      "step": 11516
+    },
+    {
+      "epoch": 0.11517,
+      "grad_norm": 1.1423550823219821,
+      "learning_rate": 0.003,
+      "loss": 4.0151,
+      "step": 11517
+    },
+    {
+      "epoch": 0.11518,
+      "grad_norm": 1.2145234983955724,
+      "learning_rate": 0.003,
+      "loss": 4.0574,
+      "step": 11518
+    },
+    {
+      "epoch": 0.11519,
+      "grad_norm": 1.1485471374131204,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 11519
+    },
+    {
+      "epoch": 0.1152,
+      "grad_norm": 1.1180316109358304,
+      "learning_rate": 0.003,
+      "loss": 3.9936,
+      "step": 11520
+    },
+    {
+      "epoch": 0.11521,
+      "grad_norm": 1.2276448661153612,
+      "learning_rate": 0.003,
+      "loss": 4.0315,
+      "step": 11521
+    },
+    {
+      "epoch": 0.11522,
+      "grad_norm": 1.175171062022422,
+      "learning_rate": 0.003,
+      "loss": 4.0329,
+      "step": 11522
+    },
+    {
+      "epoch": 0.11523,
+      "grad_norm": 1.3095154968379423,
+      "learning_rate": 0.003,
+      "loss": 4.039,
+      "step": 11523
+    },
+    {
+      "epoch": 0.11524,
+      "grad_norm": 1.1840175023993502,
+      "learning_rate": 0.003,
+      "loss": 4.0376,
+      "step": 11524
+    },
+    {
+      "epoch": 0.11525,
+      "grad_norm": 1.2260241036155453,
+      "learning_rate": 0.003,
+      "loss": 4.0451,
+      "step": 11525
+    },
+    {
+      "epoch": 0.11526,
+      "grad_norm": 1.2519788784999393,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 11526
+    },
+    {
+      "epoch": 0.11527,
+      "grad_norm": 1.2082041366767022,
+      "learning_rate": 0.003,
+      "loss": 4.0397,
+      "step": 11527
+    },
+    {
+      "epoch": 0.11528,
+      "grad_norm": 1.070100717213944,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 11528
+    },
+    {
+      "epoch": 0.11529,
+      "grad_norm": 1.0301876336933986,
+      "learning_rate": 0.003,
+      "loss": 4.0407,
+      "step": 11529
+    },
+    {
+      "epoch": 0.1153,
+      "grad_norm": 1.1693678818554514,
+      "learning_rate": 0.003,
+      "loss": 3.9957,
+      "step": 11530
+    },
+    {
+      "epoch": 0.11531,
+      "grad_norm": 1.0247944374771782,
+      "learning_rate": 0.003,
+      "loss": 4.0374,
+      "step": 11531
+    },
+    {
+      "epoch": 0.11532,
+      "grad_norm": 1.274822947052742,
+      "learning_rate": 0.003,
+      "loss": 4.0254,
+      "step": 11532
+    },
+    {
+      "epoch": 0.11533,
+      "grad_norm": 0.9507147171376193,
+      "learning_rate": 0.003,
+      "loss": 4.0119,
+      "step": 11533
+    },
+    {
+      "epoch": 0.11534,
+      "grad_norm": 1.3131155527752056,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 11534
+    },
+    {
+      "epoch": 0.11535,
+      "grad_norm": 1.1716121524993672,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 11535
+    },
+    {
+      "epoch": 0.11536,
+      "grad_norm": 1.3964603557745452,
+      "learning_rate": 0.003,
+      "loss": 4.0275,
+      "step": 11536
+    },
+    {
+      "epoch": 0.11537,
+      "grad_norm": 1.0643667779288306,
+      "learning_rate": 0.003,
+      "loss": 4.0353,
+      "step": 11537
+    },
+    {
+      "epoch": 0.11538,
+      "grad_norm": 1.0516657590339504,
+      "learning_rate": 0.003,
+      "loss": 4.0472,
+      "step": 11538
+    },
+    {
+      "epoch": 0.11539,
+      "grad_norm": 1.348470067591403,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 11539
+    },
+    {
+      "epoch": 0.1154,
+      "grad_norm": 0.9565756930424267,
+      "learning_rate": 0.003,
+      "loss": 4.0477,
+      "step": 11540
+    },
+    {
+      "epoch": 0.11541,
+      "grad_norm": 1.2425855920676405,
+      "learning_rate": 0.003,
+      "loss": 4.0459,
+      "step": 11541
+    },
+    {
+      "epoch": 0.11542,
+      "grad_norm": 0.9444027602069982,
+      "learning_rate": 0.003,
+      "loss": 4.0443,
+      "step": 11542
+    },
+    {
+      "epoch": 0.11543,
+      "grad_norm": 1.121214808300316,
+      "learning_rate": 0.003,
+      "loss": 4.0212,
+      "step": 11543
+    },
+    {
+      "epoch": 0.11544,
+      "grad_norm": 1.1647380685948656,
+      "learning_rate": 0.003,
+      "loss": 4.0188,
+      "step": 11544
+    },
+    {
+      "epoch": 0.11545,
+      "grad_norm": 1.0922646545602452,
+      "learning_rate": 0.003,
+      "loss": 4.0057,
+      "step": 11545
+    },
+    {
+      "epoch": 0.11546,
+      "grad_norm": 1.1959237319583436,
+      "learning_rate": 0.003,
+      "loss": 4.0396,
+      "step": 11546
+    },
+    {
+      "epoch": 0.11547,
+      "grad_norm": 1.2168385051277104,
+      "learning_rate": 0.003,
+      "loss": 4.0135,
+      "step": 11547
+    },
+    {
+      "epoch": 0.11548,
+      "grad_norm": 0.9906434040772247,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 11548
+    },
+    {
+      "epoch": 0.11549,
+      "grad_norm": 1.399388889519621,
+      "learning_rate": 0.003,
+      "loss": 4.0399,
+      "step": 11549
+    },
+    {
+      "epoch": 0.1155,
+      "grad_norm": 1.1691281692028388,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 11550
+    },
+    {
+      "epoch": 0.11551,
+      "grad_norm": 1.2610503080435684,
+      "learning_rate": 0.003,
+      "loss": 4.0342,
+      "step": 11551
+    },
+    {
+      "epoch": 0.11552,
+      "grad_norm": 1.0462781610867442,
+      "learning_rate": 0.003,
+      "loss": 4.0324,
+      "step": 11552
+    },
+    {
+      "epoch": 0.11553,
+      "grad_norm": 1.3027344485563248,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 11553
+    },
+    {
+      "epoch": 0.11554,
+      "grad_norm": 0.9956780270740784,
+      "learning_rate": 0.003,
+      "loss": 4.0191,
+      "step": 11554
+    },
+    {
+      "epoch": 0.11555,
+      "grad_norm": 1.4322465943251208,
+      "learning_rate": 0.003,
+      "loss": 4.0331,
+      "step": 11555
+    },
+    {
+      "epoch": 0.11556,
+      "grad_norm": 1.0957293213169021,
+      "learning_rate": 0.003,
+      "loss": 4.0046,
+      "step": 11556
+    },
+    {
+      "epoch": 0.11557,
+      "grad_norm": 1.3308831178294134,
+      "learning_rate": 0.003,
+      "loss": 4.0286,
+      "step": 11557
+    },
+    {
+      "epoch": 0.11558,
+      "grad_norm": 1.1715374522210913,
+      "learning_rate": 0.003,
+      "loss": 4.0366,
+      "step": 11558
+    },
+    {
+      "epoch": 0.11559,
+      "grad_norm": 1.0505976322706607,
+      "learning_rate": 0.003,
+      "loss": 4.0252,
+      "step": 11559
+    },
+    {
+      "epoch": 0.1156,
+      "grad_norm": 1.1380918415657428,
+      "learning_rate": 0.003,
+      "loss": 4.0533,
+      "step": 11560
+    },
+    {
+      "epoch": 0.11561,
+      "grad_norm": 1.1609609825263985,
+      "learning_rate": 0.003,
+      "loss": 4.0309,
+      "step": 11561
+    },
+    {
+      "epoch": 0.11562,
+      "grad_norm": 1.3050344178576219,
+      "learning_rate": 0.003,
+      "loss": 4.0072,
+      "step": 11562
+    },
+    {
+      "epoch": 0.11563,
+      "grad_norm": 0.9380550906728504,
+      "learning_rate": 0.003,
+      "loss": 4.0242,
+      "step": 11563
+    },
+    {
+      "epoch": 0.11564,
+      "grad_norm": 1.1946440621831265,
+      "learning_rate": 0.003,
+      "loss": 4.0302,
+      "step": 11564
+    },
+    {
+      "epoch": 0.11565,
+      "grad_norm": 1.3080572257976502,
+      "learning_rate": 0.003,
+      "loss": 4.0436,
+      "step": 11565
+    },
+    {
+      "epoch": 0.11566,
+      "grad_norm": 1.250707143374676,
+      "learning_rate": 0.003,
+      "loss": 4.0024,
+      "step": 11566
+    },
+    {
+      "epoch": 0.11567,
+      "grad_norm": 1.2311864131520067,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 11567
+    },
+    {
+      "epoch": 0.11568,
+      "grad_norm": 1.311111463929809,
+      "learning_rate": 0.003,
+      "loss": 4.0334,
+      "step": 11568
+    },
+    {
+      "epoch": 0.11569,
+      "grad_norm": 0.8868310857413003,
+      "learning_rate": 0.003,
+      "loss": 4.0257,
+      "step": 11569
+    },
+    {
+      "epoch": 0.1157,
+      "grad_norm": 1.1027450881292422,
+      "learning_rate": 0.003,
+      "loss": 3.9892,
+      "step": 11570
+    },
+    {
+      "epoch": 0.11571,
+      "grad_norm": 1.1161081563397963,
+      "learning_rate": 0.003,
+      "loss": 4.0275,
+      "step": 11571
+    },
+    {
+      "epoch": 0.11572,
+      "grad_norm": 0.9089775562151213,
+      "learning_rate": 0.003,
+      "loss": 3.976,
+      "step": 11572
+    },
+    {
+      "epoch": 0.11573,
+      "grad_norm": 1.003694953680766,
+      "learning_rate": 0.003,
+      "loss": 4.0295,
+      "step": 11573
+    },
+    {
+      "epoch": 0.11574,
+      "grad_norm": 1.3694504994237875,
+      "learning_rate": 0.003,
+      "loss": 3.9969,
+      "step": 11574
+    },
+    {
+      "epoch": 0.11575,
+      "grad_norm": 1.1741369881532855,
+      "learning_rate": 0.003,
+      "loss": 4.0288,
+      "step": 11575
+    },
+    {
+      "epoch": 0.11576,
+      "grad_norm": 1.436922790332464,
+      "learning_rate": 0.003,
+      "loss": 3.9982,
+      "step": 11576
+    },
+    {
+      "epoch": 0.11577,
+      "grad_norm": 0.953413832089103,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 11577
+    },
+    {
+      "epoch": 0.11578,
+      "grad_norm": 1.0815315504459275,
+      "learning_rate": 0.003,
+      "loss": 4.0415,
+      "step": 11578
+    },
+    {
+      "epoch": 0.11579,
+      "grad_norm": 1.331353293169261,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 11579
+    },
+    {
+      "epoch": 0.1158,
+      "grad_norm": 1.2014803447115574,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 11580
+    },
+    {
+      "epoch": 0.11581,
+      "grad_norm": 1.307264485319865,
+      "learning_rate": 0.003,
+      "loss": 3.9928,
+      "step": 11581
+    },
+    {
+      "epoch": 0.11582,
+      "grad_norm": 1.0474446025518378,
+      "learning_rate": 0.003,
+      "loss": 4.0142,
+      "step": 11582
+    },
+    {
+      "epoch": 0.11583,
+      "grad_norm": 1.1339925874103132,
+      "learning_rate": 0.003,
+      "loss": 4.0043,
+      "step": 11583
+    },
+    {
+      "epoch": 0.11584,
+      "grad_norm": 1.0857258271985775,
+      "learning_rate": 0.003,
+      "loss": 4.0373,
+      "step": 11584
+    },
+    {
+      "epoch": 0.11585,
+      "grad_norm": 1.2405218315074111,
+      "learning_rate": 0.003,
+      "loss": 4.0572,
+      "step": 11585
+    },
+    {
+      "epoch": 0.11586,
+      "grad_norm": 1.14159513185874,
+      "learning_rate": 0.003,
+      "loss": 4.0155,
+      "step": 11586
+    },
+    {
+      "epoch": 0.11587,
+      "grad_norm": 1.171361791538229,
+      "learning_rate": 0.003,
+      "loss": 3.9825,
+      "step": 11587
+    },
+    {
+      "epoch": 0.11588,
+      "grad_norm": 0.8984976334031136,
+      "learning_rate": 0.003,
+      "loss": 4.026,
+      "step": 11588
+    },
+    {
+      "epoch": 0.11589,
+      "grad_norm": 1.0483798909522963,
+      "learning_rate": 0.003,
+      "loss": 4.0035,
+      "step": 11589
+    },
+    {
+      "epoch": 0.1159,
+      "grad_norm": 1.3752191283722144,
+      "learning_rate": 0.003,
+      "loss": 4.0107,
+      "step": 11590
+    },
+    {
+      "epoch": 0.11591,
+      "grad_norm": 0.9271251406957484,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 11591
+    },
+    {
+      "epoch": 0.11592,
+      "grad_norm": 1.1839711004608278,
+      "learning_rate": 0.003,
+      "loss": 4.0336,
+      "step": 11592
+    },
+    {
+      "epoch": 0.11593,
+      "grad_norm": 1.327326801192664,
+      "learning_rate": 0.003,
+      "loss": 4.0257,
+      "step": 11593
+    },
+    {
+      "epoch": 0.11594,
+      "grad_norm": 1.1228264344894299,
+      "learning_rate": 0.003,
+      "loss": 4.0006,
+      "step": 11594
+    },
+    {
+      "epoch": 0.11595,
+      "grad_norm": 1.3993924632845647,
+      "learning_rate": 0.003,
+      "loss": 4.0493,
+      "step": 11595
+    },
+    {
+      "epoch": 0.11596,
+      "grad_norm": 0.9955718638323818,
+      "learning_rate": 0.003,
+      "loss": 4.0053,
+      "step": 11596
+    },
+    {
+      "epoch": 0.11597,
+      "grad_norm": 1.2034336458201005,
+      "learning_rate": 0.003,
+      "loss": 4.0459,
+      "step": 11597
+    },
+    {
+      "epoch": 0.11598,
+      "grad_norm": 1.0008043287213872,
+      "learning_rate": 0.003,
+      "loss": 4.0204,
+      "step": 11598
+    },
+    {
+      "epoch": 0.11599,
+      "grad_norm": 1.386252254726889,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 11599
+    },
+    {
+      "epoch": 0.116,
+      "grad_norm": 0.8982486028150353,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 11600
+    },
+    {
+      "epoch": 0.11601,
+      "grad_norm": 1.184347234907959,
+      "learning_rate": 0.003,
+      "loss": 4.0488,
+      "step": 11601
+    },
+    {
+      "epoch": 0.11602,
+      "grad_norm": 1.1247974246340537,
+      "learning_rate": 0.003,
+      "loss": 3.9973,
+      "step": 11602
+    },
+    {
+      "epoch": 0.11603,
+      "grad_norm": 1.1459727868895968,
+      "learning_rate": 0.003,
+      "loss": 4.0109,
+      "step": 11603
+    },
+    {
+      "epoch": 0.11604,
+      "grad_norm": 1.200299318110692,
+      "learning_rate": 0.003,
+      "loss": 4.0209,
+      "step": 11604
+    },
+    {
+      "epoch": 0.11605,
+      "grad_norm": 1.1221538219177465,
+      "learning_rate": 0.003,
+      "loss": 4.0252,
+      "step": 11605
+    },
+    {
+      "epoch": 0.11606,
+      "grad_norm": 1.3896592029006039,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 11606
+    },
+    {
+      "epoch": 0.11607,
+      "grad_norm": 1.167856625001811,
+      "learning_rate": 0.003,
+      "loss": 4.0258,
+      "step": 11607
+    },
+    {
+      "epoch": 0.11608,
+      "grad_norm": 1.2487032951275419,
+      "learning_rate": 0.003,
+      "loss": 4.0424,
+      "step": 11608
+    },
+    {
+      "epoch": 0.11609,
+      "grad_norm": 0.9357902119911607,
+      "learning_rate": 0.003,
+      "loss": 4.0577,
+      "step": 11609
+    },
+    {
+      "epoch": 0.1161,
+      "grad_norm": 1.3783721164193854,
+      "learning_rate": 0.003,
+      "loss": 4.0523,
+      "step": 11610
+    },
+    {
+      "epoch": 0.11611,
+      "grad_norm": 1.0580641425258053,
+      "learning_rate": 0.003,
+      "loss": 4.0383,
+      "step": 11611
+    },
+    {
+      "epoch": 0.11612,
+      "grad_norm": 1.0737145006413418,
+      "learning_rate": 0.003,
+      "loss": 4.0513,
+      "step": 11612
+    },
+    {
+      "epoch": 0.11613,
+      "grad_norm": 1.2046751425123334,
+      "learning_rate": 0.003,
+      "loss": 4.0184,
+      "step": 11613
+    },
+    {
+      "epoch": 0.11614,
+      "grad_norm": 1.1911854189282736,
+      "learning_rate": 0.003,
+      "loss": 4.0322,
+      "step": 11614
+    },
+    {
+      "epoch": 0.11615,
+      "grad_norm": 1.2731578822836134,
+      "learning_rate": 0.003,
+      "loss": 4.0298,
+      "step": 11615
+    },
+    {
+      "epoch": 0.11616,
+      "grad_norm": 1.486744540343558,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 11616
+    },
+    {
+      "epoch": 0.11617,
+      "grad_norm": 0.8968819287679518,
+      "learning_rate": 0.003,
+      "loss": 4.0396,
+      "step": 11617
+    },
+    {
+      "epoch": 0.11618,
+      "grad_norm": 1.0047111452204807,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 11618
+    },
+    {
+      "epoch": 0.11619,
+      "grad_norm": 1.176206234770719,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 11619
+    },
+    {
+      "epoch": 0.1162,
+      "grad_norm": 1.2253326151523196,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 11620
+    },
+    {
+      "epoch": 0.11621,
+      "grad_norm": 1.1956657043524104,
+      "learning_rate": 0.003,
+      "loss": 4.0481,
+      "step": 11621
+    },
+    {
+      "epoch": 0.11622,
+      "grad_norm": 0.9435279690248589,
+      "learning_rate": 0.003,
+      "loss": 4.035,
+      "step": 11622
+    },
+    {
+      "epoch": 0.11623,
+      "grad_norm": 1.056882142632435,
+      "learning_rate": 0.003,
+      "loss": 4.0323,
+      "step": 11623
+    },
+    {
+      "epoch": 0.11624,
+      "grad_norm": 1.1106539056889895,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 11624
+    },
+    {
+      "epoch": 0.11625,
+      "grad_norm": 1.0205099002386708,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 11625
+    },
+    {
+      "epoch": 0.11626,
+      "grad_norm": 1.258461909905812,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 11626
+    },
+    {
+      "epoch": 0.11627,
+      "grad_norm": 1.2874361312056184,
+      "learning_rate": 0.003,
+      "loss": 4.0164,
+      "step": 11627
+    },
+    {
+      "epoch": 0.11628,
+      "grad_norm": 1.1743309619531834,
+      "learning_rate": 0.003,
+      "loss": 4.0634,
+      "step": 11628
+    },
+    {
+      "epoch": 0.11629,
+      "grad_norm": 1.2243600857600132,
+      "learning_rate": 0.003,
+      "loss": 4.0242,
+      "step": 11629
+    },
+    {
+      "epoch": 0.1163,
+      "grad_norm": 1.0499602492021,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 11630
+    },
+    {
+      "epoch": 0.11631,
+      "grad_norm": 1.2573505771904938,
+      "learning_rate": 0.003,
+      "loss": 4.0155,
+      "step": 11631
+    },
+    {
+      "epoch": 0.11632,
+      "grad_norm": 1.2413025869166792,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 11632
+    },
+    {
+      "epoch": 0.11633,
+      "grad_norm": 1.4540436444418998,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 11633
+    },
+    {
+      "epoch": 0.11634,
+      "grad_norm": 0.9548533573974899,
+      "learning_rate": 0.003,
+      "loss": 4.0349,
+      "step": 11634
+    },
+    {
+      "epoch": 0.11635,
+      "grad_norm": 1.3071307922264086,
+      "learning_rate": 0.003,
+      "loss": 4.0264,
+      "step": 11635
+    },
+    {
+      "epoch": 0.11636,
+      "grad_norm": 1.03525412132836,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 11636
+    },
+    {
+      "epoch": 0.11637,
+      "grad_norm": 1.2135127098907612,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 11637
+    },
+    {
+      "epoch": 0.11638,
+      "grad_norm": 1.3223760615299698,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 11638
+    },
+    {
+      "epoch": 0.11639,
+      "grad_norm": 0.9699089271963789,
+      "learning_rate": 0.003,
+      "loss": 4.0002,
+      "step": 11639
+    },
+    {
+      "epoch": 0.1164,
+      "grad_norm": 1.2811922369560198,
+      "learning_rate": 0.003,
+      "loss": 4.0542,
+      "step": 11640
+    },
+    {
+      "epoch": 0.11641,
+      "grad_norm": 1.036699916635101,
+      "learning_rate": 0.003,
+      "loss": 4.0328,
+      "step": 11641
+    },
+    {
+      "epoch": 0.11642,
+      "grad_norm": 1.116072859573732,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 11642
+    },
+    {
+      "epoch": 0.11643,
+      "grad_norm": 1.1737632356245493,
+      "learning_rate": 0.003,
+      "loss": 4.0426,
+      "step": 11643
+    },
+    {
+      "epoch": 0.11644,
+      "grad_norm": 1.105651145301224,
+      "learning_rate": 0.003,
+      "loss": 4.0289,
+      "step": 11644
+    },
+    {
+      "epoch": 0.11645,
+      "grad_norm": 1.1206281967173415,
+      "learning_rate": 0.003,
+      "loss": 4.0388,
+      "step": 11645
+    },
+    {
+      "epoch": 0.11646,
+      "grad_norm": 1.2792254145674709,
+      "learning_rate": 0.003,
+      "loss": 4.0481,
+      "step": 11646
+    },
+    {
+      "epoch": 0.11647,
+      "grad_norm": 1.1472479706618874,
+      "learning_rate": 0.003,
+      "loss": 4.0105,
+      "step": 11647
+    },
+    {
+      "epoch": 0.11648,
+      "grad_norm": 1.3828212140161604,
+      "learning_rate": 0.003,
+      "loss": 4.0419,
+      "step": 11648
+    },
+    {
+      "epoch": 0.11649,
+      "grad_norm": 1.1798512117694304,
+      "learning_rate": 0.003,
+      "loss": 4.0209,
+      "step": 11649
+    },
+    {
+      "epoch": 0.1165,
+      "grad_norm": 1.1784795673577373,
+      "learning_rate": 0.003,
+      "loss": 4.0146,
+      "step": 11650
+    },
+    {
+      "epoch": 0.11651,
+      "grad_norm": 1.10280548005316,
+      "learning_rate": 0.003,
+      "loss": 4.0367,
+      "step": 11651
+    },
+    {
+      "epoch": 0.11652,
+      "grad_norm": 1.2084602073133586,
+      "learning_rate": 0.003,
+      "loss": 3.9953,
+      "step": 11652
+    },
+    {
+      "epoch": 0.11653,
+      "grad_norm": 1.1644747868763254,
+      "learning_rate": 0.003,
+      "loss": 4.0298,
+      "step": 11653
+    },
+    {
+      "epoch": 0.11654,
+      "grad_norm": 1.1934077001382124,
+      "learning_rate": 0.003,
+      "loss": 4.004,
+      "step": 11654
+    },
+    {
+      "epoch": 0.11655,
+      "grad_norm": 0.9579468700893862,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 11655
+    },
+    {
+      "epoch": 0.11656,
+      "grad_norm": 1.2244849985241453,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 11656
+    },
+    {
+      "epoch": 0.11657,
+      "grad_norm": 1.2201292215580293,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 11657
+    },
+    {
+      "epoch": 0.11658,
+      "grad_norm": 1.008379460556807,
+      "learning_rate": 0.003,
+      "loss": 4.0433,
+      "step": 11658
+    },
+    {
+      "epoch": 0.11659,
+      "grad_norm": 1.177586242231204,
+      "learning_rate": 0.003,
+      "loss": 4.0307,
+      "step": 11659
+    },
+    {
+      "epoch": 0.1166,
+      "grad_norm": 1.0891812559060503,
+      "learning_rate": 0.003,
+      "loss": 4.0157,
+      "step": 11660
+    },
+    {
+      "epoch": 0.11661,
+      "grad_norm": 1.3079283675775348,
+      "learning_rate": 0.003,
+      "loss": 4.0391,
+      "step": 11661
+    },
+    {
+      "epoch": 0.11662,
+      "grad_norm": 1.0842584854688044,
+      "learning_rate": 0.003,
+      "loss": 4.0232,
+      "step": 11662
+    },
+    {
+      "epoch": 0.11663,
+      "grad_norm": 1.0420405487472146,
+      "learning_rate": 0.003,
+      "loss": 4.004,
+      "step": 11663
+    },
+    {
+      "epoch": 0.11664,
+      "grad_norm": 1.3155796441920706,
+      "learning_rate": 0.003,
+      "loss": 4.0517,
+      "step": 11664
+    },
+    {
+      "epoch": 0.11665,
+      "grad_norm": 1.152392525026187,
+      "learning_rate": 0.003,
+      "loss": 3.9992,
+      "step": 11665
+    },
+    {
+      "epoch": 0.11666,
+      "grad_norm": 1.217393356467726,
+      "learning_rate": 0.003,
+      "loss": 4.0249,
+      "step": 11666
+    },
+    {
+      "epoch": 0.11667,
+      "grad_norm": 1.3174299861589436,
+      "learning_rate": 0.003,
+      "loss": 4.0601,
+      "step": 11667
+    },
+    {
+      "epoch": 0.11668,
+      "grad_norm": 1.2379665248144123,
+      "learning_rate": 0.003,
+      "loss": 4.021,
+      "step": 11668
+    },
+    {
+      "epoch": 0.11669,
+      "grad_norm": 1.2462022619969693,
+      "learning_rate": 0.003,
+      "loss": 4.031,
+      "step": 11669
+    },
+    {
+      "epoch": 0.1167,
+      "grad_norm": 1.0813016807485878,
+      "learning_rate": 0.003,
+      "loss": 4.0308,
+      "step": 11670
+    },
+    {
+      "epoch": 0.11671,
+      "grad_norm": 1.581906417209871,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 11671
+    },
+    {
+      "epoch": 0.11672,
+      "grad_norm": 0.8933295146366401,
+      "learning_rate": 0.003,
+      "loss": 4.0513,
+      "step": 11672
+    },
+    {
+      "epoch": 0.11673,
+      "grad_norm": 1.3454666952243168,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 11673
+    },
+    {
+      "epoch": 0.11674,
+      "grad_norm": 1.254430804934053,
+      "learning_rate": 0.003,
+      "loss": 4.026,
+      "step": 11674
+    },
+    {
+      "epoch": 0.11675,
+      "grad_norm": 1.1407305796856528,
+      "learning_rate": 0.003,
+      "loss": 4.0267,
+      "step": 11675
+    },
+    {
+      "epoch": 0.11676,
+      "grad_norm": 1.1240412766616164,
+      "learning_rate": 0.003,
+      "loss": 4.0316,
+      "step": 11676
+    },
+    {
+      "epoch": 0.11677,
+      "grad_norm": 1.1039857268337923,
+      "learning_rate": 0.003,
+      "loss": 4.0403,
+      "step": 11677
+    },
+    {
+      "epoch": 0.11678,
+      "grad_norm": 1.1382453808865138,
+      "learning_rate": 0.003,
+      "loss": 4.0361,
+      "step": 11678
+    },
+    {
+      "epoch": 0.11679,
+      "grad_norm": 1.1297436535746768,
+      "learning_rate": 0.003,
+      "loss": 4.0214,
+      "step": 11679
+    },
+    {
+      "epoch": 0.1168,
+      "grad_norm": 1.3532016024947384,
+      "learning_rate": 0.003,
+      "loss": 4.0531,
+      "step": 11680
+    },
+    {
+      "epoch": 0.11681,
+      "grad_norm": 0.924056872724392,
+      "learning_rate": 0.003,
+      "loss": 4.0091,
+      "step": 11681
+    },
+    {
+      "epoch": 0.11682,
+      "grad_norm": 1.3574144651574285,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 11682
+    },
+    {
+      "epoch": 0.11683,
+      "grad_norm": 0.9768276300170212,
+      "learning_rate": 0.003,
+      "loss": 4.0489,
+      "step": 11683
+    },
+    {
+      "epoch": 0.11684,
+      "grad_norm": 1.2699611558933386,
+      "learning_rate": 0.003,
+      "loss": 4.0297,
+      "step": 11684
+    },
+    {
+      "epoch": 0.11685,
+      "grad_norm": 1.2501909792103931,
+      "learning_rate": 0.003,
+      "loss": 4.0212,
+      "step": 11685
+    },
+    {
+      "epoch": 0.11686,
+      "grad_norm": 1.1022171786236412,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 11686
+    },
+    {
+      "epoch": 0.11687,
+      "grad_norm": 1.3814616040574366,
+      "learning_rate": 0.003,
+      "loss": 4.0456,
+      "step": 11687
+    },
+    {
+      "epoch": 0.11688,
+      "grad_norm": 0.9287871350220055,
+      "learning_rate": 0.003,
+      "loss": 4.0381,
+      "step": 11688
+    },
+    {
+      "epoch": 0.11689,
+      "grad_norm": 1.2281964388187652,
+      "learning_rate": 0.003,
+      "loss": 4.0391,
+      "step": 11689
+    },
+    {
+      "epoch": 0.1169,
+      "grad_norm": 1.1127225127033677,
+      "learning_rate": 0.003,
+      "loss": 4.0314,
+      "step": 11690
+    },
+    {
+      "epoch": 0.11691,
+      "grad_norm": 1.2909281509347093,
+      "learning_rate": 0.003,
+      "loss": 4.0123,
+      "step": 11691
+    },
+    {
+      "epoch": 0.11692,
+      "grad_norm": 1.3619954406747572,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 11692
+    },
+    {
+      "epoch": 0.11693,
+      "grad_norm": 1.1057049514943214,
+      "learning_rate": 0.003,
+      "loss": 4.0317,
+      "step": 11693
+    },
+    {
+      "epoch": 0.11694,
+      "grad_norm": 1.1428819960705745,
+      "learning_rate": 0.003,
+      "loss": 4.0237,
+      "step": 11694
+    },
+    {
+      "epoch": 0.11695,
+      "grad_norm": 1.1667755384935443,
+      "learning_rate": 0.003,
+      "loss": 4.0292,
+      "step": 11695
+    },
+    {
+      "epoch": 0.11696,
+      "grad_norm": 1.4469227506294298,
+      "learning_rate": 0.003,
+      "loss": 4.037,
+      "step": 11696
+    },
+    {
+      "epoch": 0.11697,
+      "grad_norm": 1.005450115587486,
+      "learning_rate": 0.003,
+      "loss": 4.0015,
+      "step": 11697
+    },
+    {
+      "epoch": 0.11698,
+      "grad_norm": 1.2238213012885069,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 11698
+    },
+    {
+      "epoch": 0.11699,
+      "grad_norm": 1.054385789261448,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 11699
+    },
+    {
+      "epoch": 0.117,
+      "grad_norm": 1.122631863282793,
+      "learning_rate": 0.003,
+      "loss": 4.0263,
+      "step": 11700
+    },
+    {
+      "epoch": 0.11701,
+      "grad_norm": 1.0125507091879444,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 11701
+    },
+    {
+      "epoch": 0.11702,
+      "grad_norm": 1.4368068506374105,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 11702
+    },
+    {
+      "epoch": 0.11703,
+      "grad_norm": 1.1065201692933635,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 11703
+    },
+    {
+      "epoch": 0.11704,
+      "grad_norm": 1.1390282672127734,
+      "learning_rate": 0.003,
+      "loss": 4.0479,
+      "step": 11704
+    },
+    {
+      "epoch": 0.11705,
+      "grad_norm": 1.2237420373219998,
+      "learning_rate": 0.003,
+      "loss": 4.0149,
+      "step": 11705
+    },
+    {
+      "epoch": 0.11706,
+      "grad_norm": 1.2701077700750183,
+      "learning_rate": 0.003,
+      "loss": 4.0453,
+      "step": 11706
+    },
+    {
+      "epoch": 0.11707,
+      "grad_norm": 1.0572688186092254,
+      "learning_rate": 0.003,
+      "loss": 4.0263,
+      "step": 11707
+    },
+    {
+      "epoch": 0.11708,
+      "grad_norm": 1.1488670670780203,
+      "learning_rate": 0.003,
+      "loss": 4.0182,
+      "step": 11708
+    },
+    {
+      "epoch": 0.11709,
+      "grad_norm": 1.157519213062066,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 11709
+    },
+    {
+      "epoch": 0.1171,
+      "grad_norm": 1.3123712860081451,
+      "learning_rate": 0.003,
+      "loss": 4.0353,
+      "step": 11710
+    },
+    {
+      "epoch": 0.11711,
+      "grad_norm": 0.9842670475281874,
+      "learning_rate": 0.003,
+      "loss": 4.0001,
+      "step": 11711
+    },
+    {
+      "epoch": 0.11712,
+      "grad_norm": 1.1200872070851704,
+      "learning_rate": 0.003,
+      "loss": 4.0632,
+      "step": 11712
+    },
+    {
+      "epoch": 0.11713,
+      "grad_norm": 1.1207768888268173,
+      "learning_rate": 0.003,
+      "loss": 4.0302,
+      "step": 11713
+    },
+    {
+      "epoch": 0.11714,
+      "grad_norm": 1.2468830968691615,
+      "learning_rate": 0.003,
+      "loss": 4.0232,
+      "step": 11714
+    },
+    {
+      "epoch": 0.11715,
+      "grad_norm": 1.1733299447516492,
+      "learning_rate": 0.003,
+      "loss": 4.0326,
+      "step": 11715
+    },
+    {
+      "epoch": 0.11716,
+      "grad_norm": 1.0093206271659385,
+      "learning_rate": 0.003,
+      "loss": 3.9932,
+      "step": 11716
+    },
+    {
+      "epoch": 0.11717,
+      "grad_norm": 1.1239353189243093,
+      "learning_rate": 0.003,
+      "loss": 4.0115,
+      "step": 11717
+    },
+    {
+      "epoch": 0.11718,
+      "grad_norm": 1.233944823509884,
+      "learning_rate": 0.003,
+      "loss": 4.0313,
+      "step": 11718
+    },
+    {
+      "epoch": 0.11719,
+      "grad_norm": 1.2985477914439674,
+      "learning_rate": 0.003,
+      "loss": 4.0086,
+      "step": 11719
+    },
+    {
+      "epoch": 0.1172,
+      "grad_norm": 1.1827958279910649,
+      "learning_rate": 0.003,
+      "loss": 4.012,
+      "step": 11720
+    },
+    {
+      "epoch": 0.11721,
+      "grad_norm": 1.1446307963150169,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 11721
+    },
+    {
+      "epoch": 0.11722,
+      "grad_norm": 1.2455474825934287,
+      "learning_rate": 0.003,
+      "loss": 4.0418,
+      "step": 11722
+    },
+    {
+      "epoch": 0.11723,
+      "grad_norm": 1.11475125208383,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 11723
+    },
+    {
+      "epoch": 0.11724,
+      "grad_norm": 1.1489869640108812,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 11724
+    },
+    {
+      "epoch": 0.11725,
+      "grad_norm": 1.148474380544057,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 11725
+    },
+    {
+      "epoch": 0.11726,
+      "grad_norm": 1.1836759594720816,
+      "learning_rate": 0.003,
+      "loss": 4.0258,
+      "step": 11726
+    },
+    {
+      "epoch": 0.11727,
+      "grad_norm": 1.1489901497136943,
+      "learning_rate": 0.003,
+      "loss": 4.0457,
+      "step": 11727
+    },
+    {
+      "epoch": 0.11728,
+      "grad_norm": 1.2213324589045766,
+      "learning_rate": 0.003,
+      "loss": 4.0116,
+      "step": 11728
+    },
+    {
+      "epoch": 0.11729,
+      "grad_norm": 1.2375612775771767,
+      "learning_rate": 0.003,
+      "loss": 4.0382,
+      "step": 11729
+    },
+    {
+      "epoch": 0.1173,
+      "grad_norm": 1.0576264778912117,
+      "learning_rate": 0.003,
+      "loss": 4.0342,
+      "step": 11730
+    },
+    {
+      "epoch": 0.11731,
+      "grad_norm": 1.1994319511524283,
+      "learning_rate": 0.003,
+      "loss": 4.0295,
+      "step": 11731
+    },
+    {
+      "epoch": 0.11732,
+      "grad_norm": 1.1470271295502747,
+      "learning_rate": 0.003,
+      "loss": 4.0186,
+      "step": 11732
+    },
+    {
+      "epoch": 0.11733,
+      "grad_norm": 1.0321703975881884,
+      "learning_rate": 0.003,
+      "loss": 4.0364,
+      "step": 11733
+    },
+    {
+      "epoch": 0.11734,
+      "grad_norm": 1.1708692520929904,
+      "learning_rate": 0.003,
+      "loss": 4.0328,
+      "step": 11734
+    },
+    {
+      "epoch": 0.11735,
+      "grad_norm": 1.1217915693347118,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 11735
+    },
+    {
+      "epoch": 0.11736,
+      "grad_norm": 1.1484409584760604,
+      "learning_rate": 0.003,
+      "loss": 4.0319,
+      "step": 11736
+    },
+    {
+      "epoch": 0.11737,
+      "grad_norm": 1.1665894926579745,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 11737
+    },
+    {
+      "epoch": 0.11738,
+      "grad_norm": 1.2813020571044718,
+      "learning_rate": 0.003,
+      "loss": 4.0429,
+      "step": 11738
+    },
+    {
+      "epoch": 0.11739,
+      "grad_norm": 1.3181678380644914,
+      "learning_rate": 0.003,
+      "loss": 4.0391,
+      "step": 11739
+    },
+    {
+      "epoch": 0.1174,
+      "grad_norm": 1.120161267354906,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 11740
+    },
+    {
+      "epoch": 0.11741,
+      "grad_norm": 1.4288394398505233,
+      "learning_rate": 0.003,
+      "loss": 4.0276,
+      "step": 11741
+    },
+    {
+      "epoch": 0.11742,
+      "grad_norm": 1.1845167399331413,
+      "learning_rate": 0.003,
+      "loss": 4.0457,
+      "step": 11742
+    },
+    {
+      "epoch": 0.11743,
+      "grad_norm": 1.2099165641421654,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 11743
+    },
+    {
+      "epoch": 0.11744,
+      "grad_norm": 1.1904859574947253,
+      "learning_rate": 0.003,
+      "loss": 4.0242,
+      "step": 11744
+    },
+    {
+      "epoch": 0.11745,
+      "grad_norm": 1.2576951023850673,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 11745
+    },
+    {
+      "epoch": 0.11746,
+      "grad_norm": 1.0964969976409622,
+      "learning_rate": 0.003,
+      "loss": 3.9998,
+      "step": 11746
+    },
+    {
+      "epoch": 0.11747,
+      "grad_norm": 1.4120869523116033,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 11747
+    },
+    {
+      "epoch": 0.11748,
+      "grad_norm": 1.1148049335571633,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 11748
+    },
+    {
+      "epoch": 0.11749,
+      "grad_norm": 1.1734647930140643,
+      "learning_rate": 0.003,
+      "loss": 4.0216,
+      "step": 11749
+    },
+    {
+      "epoch": 0.1175,
+      "grad_norm": 1.2482405487706174,
+      "learning_rate": 0.003,
+      "loss": 4.0438,
+      "step": 11750
+    },
+    {
+      "epoch": 0.11751,
+      "grad_norm": 1.1483514385526685,
+      "learning_rate": 0.003,
+      "loss": 4.0057,
+      "step": 11751
+    },
+    {
+      "epoch": 0.11752,
+      "grad_norm": 1.2500272890819837,
+      "learning_rate": 0.003,
+      "loss": 4.0173,
+      "step": 11752
+    },
+    {
+      "epoch": 0.11753,
+      "grad_norm": 1.058210157236084,
+      "learning_rate": 0.003,
+      "loss": 4.0498,
+      "step": 11753
+    },
+    {
+      "epoch": 0.11754,
+      "grad_norm": 1.1487736631750007,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 11754
+    },
+    {
+      "epoch": 0.11755,
+      "grad_norm": 1.4715443707390885,
+      "learning_rate": 0.003,
+      "loss": 4.0502,
+      "step": 11755
+    },
+    {
+      "epoch": 0.11756,
+      "grad_norm": 1.2303628287318422,
+      "learning_rate": 0.003,
+      "loss": 4.04,
+      "step": 11756
+    },
+    {
+      "epoch": 0.11757,
+      "grad_norm": 1.2922823982550529,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 11757
+    },
+    {
+      "epoch": 0.11758,
+      "grad_norm": 1.0379265137390694,
+      "learning_rate": 0.003,
+      "loss": 4.0508,
+      "step": 11758
+    },
+    {
+      "epoch": 0.11759,
+      "grad_norm": 1.3406374253364324,
+      "learning_rate": 0.003,
+      "loss": 4.0467,
+      "step": 11759
+    },
+    {
+      "epoch": 0.1176,
+      "grad_norm": 1.1304813742700188,
+      "learning_rate": 0.003,
+      "loss": 4.0279,
+      "step": 11760
+    },
+    {
+      "epoch": 0.11761,
+      "grad_norm": 1.2685923899639884,
+      "learning_rate": 0.003,
+      "loss": 4.0187,
+      "step": 11761
+    },
+    {
+      "epoch": 0.11762,
+      "grad_norm": 1.1593920836432694,
+      "learning_rate": 0.003,
+      "loss": 4.0237,
+      "step": 11762
+    },
+    {
+      "epoch": 0.11763,
+      "grad_norm": 1.1762381398269397,
+      "learning_rate": 0.003,
+      "loss": 4.024,
+      "step": 11763
+    },
+    {
+      "epoch": 0.11764,
+      "grad_norm": 1.082858055046518,
+      "learning_rate": 0.003,
+      "loss": 4.0318,
+      "step": 11764
+    },
+    {
+      "epoch": 0.11765,
+      "grad_norm": 1.191615642444593,
+      "learning_rate": 0.003,
+      "loss": 4.0408,
+      "step": 11765
+    },
+    {
+      "epoch": 0.11766,
+      "grad_norm": 1.236828958505827,
+      "learning_rate": 0.003,
+      "loss": 4.0326,
+      "step": 11766
+    },
+    {
+      "epoch": 0.11767,
+      "grad_norm": 1.293279569430646,
+      "learning_rate": 0.003,
+      "loss": 4.0394,
+      "step": 11767
+    },
+    {
+      "epoch": 0.11768,
+      "grad_norm": 1.3077046844773013,
+      "learning_rate": 0.003,
+      "loss": 4.028,
+      "step": 11768
+    },
+    {
+      "epoch": 0.11769,
+      "grad_norm": 1.0549809352357689,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 11769
+    },
+    {
+      "epoch": 0.1177,
+      "grad_norm": 1.0689765908038116,
+      "learning_rate": 0.003,
+      "loss": 4.0273,
+      "step": 11770
+    },
+    {
+      "epoch": 0.11771,
+      "grad_norm": 1.3866554981109134,
+      "learning_rate": 0.003,
+      "loss": 4.006,
+      "step": 11771
+    },
+    {
+      "epoch": 0.11772,
+      "grad_norm": 1.182097917949292,
+      "learning_rate": 0.003,
+      "loss": 4.0354,
+      "step": 11772
+    },
+    {
+      "epoch": 0.11773,
+      "grad_norm": 1.2642222535276253,
+      "learning_rate": 0.003,
+      "loss": 4.0358,
+      "step": 11773
+    },
+    {
+      "epoch": 0.11774,
+      "grad_norm": 0.9460972097814158,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 11774
+    },
+    {
+      "epoch": 0.11775,
+      "grad_norm": 1.1967480636343066,
+      "learning_rate": 0.003,
+      "loss": 4.0229,
+      "step": 11775
+    },
+    {
+      "epoch": 0.11776,
+      "grad_norm": 0.9332242169274172,
+      "learning_rate": 0.003,
+      "loss": 4.0151,
+      "step": 11776
+    },
+    {
+      "epoch": 0.11777,
+      "grad_norm": 1.394104158897881,
+      "learning_rate": 0.003,
+      "loss": 4.0364,
+      "step": 11777
+    },
+    {
+      "epoch": 0.11778,
+      "grad_norm": 0.8763693888556454,
+      "learning_rate": 0.003,
+      "loss": 4.0395,
+      "step": 11778
+    },
+    {
+      "epoch": 0.11779,
+      "grad_norm": 1.1878650279892633,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 11779
+    },
+    {
+      "epoch": 0.1178,
+      "grad_norm": 1.2171141033704371,
+      "learning_rate": 0.003,
+      "loss": 4.0184,
+      "step": 11780
+    },
+    {
+      "epoch": 0.11781,
+      "grad_norm": 1.160898330979206,
+      "learning_rate": 0.003,
+      "loss": 4.0149,
+      "step": 11781
+    },
+    {
+      "epoch": 0.11782,
+      "grad_norm": 1.3998726789246247,
+      "learning_rate": 0.003,
+      "loss": 4.0501,
+      "step": 11782
+    },
+    {
+      "epoch": 0.11783,
+      "grad_norm": 1.274867591692867,
+      "learning_rate": 0.003,
+      "loss": 4.054,
+      "step": 11783
+    },
+    {
+      "epoch": 0.11784,
+      "grad_norm": 1.122679163756025,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 11784
+    },
+    {
+      "epoch": 0.11785,
+      "grad_norm": 1.350965890967207,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 11785
+    },
+    {
+      "epoch": 0.11786,
+      "grad_norm": 1.1259362312209875,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 11786
+    },
+    {
+      "epoch": 0.11787,
+      "grad_norm": 1.1001601351081598,
+      "learning_rate": 0.003,
+      "loss": 4.044,
+      "step": 11787
+    },
+    {
+      "epoch": 0.11788,
+      "grad_norm": 1.117549418935662,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 11788
+    },
+    {
+      "epoch": 0.11789,
+      "grad_norm": 0.9414233692391143,
+      "learning_rate": 0.003,
+      "loss": 4.0039,
+      "step": 11789
+    },
+    {
+      "epoch": 0.1179,
+      "grad_norm": 1.0644099136425493,
+      "learning_rate": 0.003,
+      "loss": 4.0318,
+      "step": 11790
+    },
+    {
+      "epoch": 0.11791,
+      "grad_norm": 1.2857169916877873,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 11791
+    },
+    {
+      "epoch": 0.11792,
+      "grad_norm": 0.9110294228440342,
+      "learning_rate": 0.003,
+      "loss": 4.0307,
+      "step": 11792
+    },
+    {
+      "epoch": 0.11793,
+      "grad_norm": 1.1296405959354765,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 11793
+    },
+    {
+      "epoch": 0.11794,
+      "grad_norm": 1.098095724914582,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 11794
+    },
+    {
+      "epoch": 0.11795,
+      "grad_norm": 1.326195921928846,
+      "learning_rate": 0.003,
+      "loss": 4.0425,
+      "step": 11795
+    },
+    {
+      "epoch": 0.11796,
+      "grad_norm": 0.9378070394703966,
+      "learning_rate": 0.003,
+      "loss": 4.0425,
+      "step": 11796
+    },
+    {
+      "epoch": 0.11797,
+      "grad_norm": 1.0521633920909828,
+      "learning_rate": 0.003,
+      "loss": 4.008,
+      "step": 11797
+    },
+    {
+      "epoch": 0.11798,
+      "grad_norm": 1.1384690047707438,
+      "learning_rate": 0.003,
+      "loss": 4.0225,
+      "step": 11798
+    },
+    {
+      "epoch": 0.11799,
+      "grad_norm": 1.2149860412806044,
+      "learning_rate": 0.003,
+      "loss": 4.0572,
+      "step": 11799
+    },
+    {
+      "epoch": 0.118,
+      "grad_norm": 1.189248780058123,
+      "learning_rate": 0.003,
+      "loss": 4.0379,
+      "step": 11800
+    },
+    {
+      "epoch": 0.11801,
+      "grad_norm": 1.2211735751337582,
+      "learning_rate": 0.003,
+      "loss": 3.9968,
+      "step": 11801
+    },
+    {
+      "epoch": 0.11802,
+      "grad_norm": 1.3801254418581266,
+      "learning_rate": 0.003,
+      "loss": 4.0265,
+      "step": 11802
+    },
+    {
+      "epoch": 0.11803,
+      "grad_norm": 1.3818090508225513,
+      "learning_rate": 0.003,
+      "loss": 4.0529,
+      "step": 11803
+    },
+    {
+      "epoch": 0.11804,
+      "grad_norm": 1.1283876629557728,
+      "learning_rate": 0.003,
+      "loss": 4.0381,
+      "step": 11804
+    },
+    {
+      "epoch": 0.11805,
+      "grad_norm": 1.3132843310339615,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 11805
+    },
+    {
+      "epoch": 0.11806,
+      "grad_norm": 1.0209548234461399,
+      "learning_rate": 0.003,
+      "loss": 4.025,
+      "step": 11806
+    },
+    {
+      "epoch": 0.11807,
+      "grad_norm": 1.4212793934895955,
+      "learning_rate": 0.003,
+      "loss": 4.036,
+      "step": 11807
+    },
+    {
+      "epoch": 0.11808,
+      "grad_norm": 0.9218401109406803,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 11808
+    },
+    {
+      "epoch": 0.11809,
+      "grad_norm": 1.3124286668656089,
+      "learning_rate": 0.003,
+      "loss": 4.0474,
+      "step": 11809
+    },
+    {
+      "epoch": 0.1181,
+      "grad_norm": 1.0303704151552795,
+      "learning_rate": 0.003,
+      "loss": 4.0218,
+      "step": 11810
+    },
+    {
+      "epoch": 0.11811,
+      "grad_norm": 1.2557024156725904,
+      "learning_rate": 0.003,
+      "loss": 4.0499,
+      "step": 11811
+    },
+    {
+      "epoch": 0.11812,
+      "grad_norm": 1.092659260298407,
+      "learning_rate": 0.003,
+      "loss": 4.034,
+      "step": 11812
+    },
+    {
+      "epoch": 0.11813,
+      "grad_norm": 1.0226098263465304,
+      "learning_rate": 0.003,
+      "loss": 4.0255,
+      "step": 11813
+    },
+    {
+      "epoch": 0.11814,
+      "grad_norm": 1.251101082721384,
+      "learning_rate": 0.003,
+      "loss": 4.0288,
+      "step": 11814
+    },
+    {
+      "epoch": 0.11815,
+      "grad_norm": 1.1762980524741449,
+      "learning_rate": 0.003,
+      "loss": 4.043,
+      "step": 11815
+    },
+    {
+      "epoch": 0.11816,
+      "grad_norm": 1.1188651716671465,
+      "learning_rate": 0.003,
+      "loss": 4.0088,
+      "step": 11816
+    },
+    {
+      "epoch": 0.11817,
+      "grad_norm": 0.914012325190764,
+      "learning_rate": 0.003,
+      "loss": 4.0056,
+      "step": 11817
+    },
+    {
+      "epoch": 0.11818,
+      "grad_norm": 1.1572820201025285,
+      "learning_rate": 0.003,
+      "loss": 4.03,
+      "step": 11818
+    },
+    {
+      "epoch": 0.11819,
+      "grad_norm": 1.1097633407410619,
+      "learning_rate": 0.003,
+      "loss": 3.9945,
+      "step": 11819
+    },
+    {
+      "epoch": 0.1182,
+      "grad_norm": 1.1315572070408753,
+      "learning_rate": 0.003,
+      "loss": 4.0379,
+      "step": 11820
+    },
+    {
+      "epoch": 0.11821,
+      "grad_norm": 1.2010201734662294,
+      "learning_rate": 0.003,
+      "loss": 3.9999,
+      "step": 11821
+    },
+    {
+      "epoch": 0.11822,
+      "grad_norm": 1.206264909595148,
+      "learning_rate": 0.003,
+      "loss": 4.0423,
+      "step": 11822
+    },
+    {
+      "epoch": 0.11823,
+      "grad_norm": 1.3220064172019355,
+      "learning_rate": 0.003,
+      "loss": 4.0284,
+      "step": 11823
+    },
+    {
+      "epoch": 0.11824,
+      "grad_norm": 1.2144802838958413,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 11824
+    },
+    {
+      "epoch": 0.11825,
+      "grad_norm": 1.1598148282919596,
+      "learning_rate": 0.003,
+      "loss": 4.0149,
+      "step": 11825
+    },
+    {
+      "epoch": 0.11826,
+      "grad_norm": 1.1618791360531502,
+      "learning_rate": 0.003,
+      "loss": 4.0364,
+      "step": 11826
+    },
+    {
+      "epoch": 0.11827,
+      "grad_norm": 1.1833304595150924,
+      "learning_rate": 0.003,
+      "loss": 4.0439,
+      "step": 11827
+    },
+    {
+      "epoch": 0.11828,
+      "grad_norm": 1.1074286497081403,
+      "learning_rate": 0.003,
+      "loss": 4.0421,
+      "step": 11828
+    },
+    {
+      "epoch": 0.11829,
+      "grad_norm": 1.4539648581372682,
+      "learning_rate": 0.003,
+      "loss": 4.0524,
+      "step": 11829
+    },
+    {
+      "epoch": 0.1183,
+      "grad_norm": 1.071045817248875,
+      "learning_rate": 0.003,
+      "loss": 4.0344,
+      "step": 11830
+    },
+    {
+      "epoch": 0.11831,
+      "grad_norm": 1.317805477765449,
+      "learning_rate": 0.003,
+      "loss": 4.0359,
+      "step": 11831
+    },
+    {
+      "epoch": 0.11832,
+      "grad_norm": 1.129955427413547,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 11832
+    },
+    {
+      "epoch": 0.11833,
+      "grad_norm": 1.1888617474099106,
+      "learning_rate": 0.003,
+      "loss": 4.056,
+      "step": 11833
+    },
+    {
+      "epoch": 0.11834,
+      "grad_norm": 1.0290127949757615,
+      "learning_rate": 0.003,
+      "loss": 4.0485,
+      "step": 11834
+    },
+    {
+      "epoch": 0.11835,
+      "grad_norm": 1.346252405762597,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 11835
+    },
+    {
+      "epoch": 0.11836,
+      "grad_norm": 1.0669477339564317,
+      "learning_rate": 0.003,
+      "loss": 3.9904,
+      "step": 11836
+    },
+    {
+      "epoch": 0.11837,
+      "grad_norm": 1.2528885489785642,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 11837
+    },
+    {
+      "epoch": 0.11838,
+      "grad_norm": 1.2202450943926373,
+      "learning_rate": 0.003,
+      "loss": 4.0105,
+      "step": 11838
+    },
+    {
+      "epoch": 0.11839,
+      "grad_norm": 1.1291740083749862,
+      "learning_rate": 0.003,
+      "loss": 4.0215,
+      "step": 11839
+    },
+    {
+      "epoch": 0.1184,
+      "grad_norm": 0.973213189261728,
+      "learning_rate": 0.003,
+      "loss": 4.0153,
+      "step": 11840
+    },
+    {
+      "epoch": 0.11841,
+      "grad_norm": 1.3080093889925444,
+      "learning_rate": 0.003,
+      "loss": 4.0314,
+      "step": 11841
+    },
+    {
+      "epoch": 0.11842,
+      "grad_norm": 1.0994079309142295,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 11842
+    },
+    {
+      "epoch": 0.11843,
+      "grad_norm": 1.2562568817457294,
+      "learning_rate": 0.003,
+      "loss": 4.0182,
+      "step": 11843
+    },
+    {
+      "epoch": 0.11844,
+      "grad_norm": 0.9846686490918987,
+      "learning_rate": 0.003,
+      "loss": 4.006,
+      "step": 11844
+    },
+    {
+      "epoch": 0.11845,
+      "grad_norm": 1.1930430438444906,
+      "learning_rate": 0.003,
+      "loss": 4.0348,
+      "step": 11845
+    },
+    {
+      "epoch": 0.11846,
+      "grad_norm": 1.2231257490273924,
+      "learning_rate": 0.003,
+      "loss": 4.0259,
+      "step": 11846
+    },
+    {
+      "epoch": 0.11847,
+      "grad_norm": 1.1539088743932866,
+      "learning_rate": 0.003,
+      "loss": 4.0312,
+      "step": 11847
+    },
+    {
+      "epoch": 0.11848,
+      "grad_norm": 1.0931038156263568,
+      "learning_rate": 0.003,
+      "loss": 4.0474,
+      "step": 11848
+    },
+    {
+      "epoch": 0.11849,
+      "grad_norm": 0.9927389994471185,
+      "learning_rate": 0.003,
+      "loss": 4.0401,
+      "step": 11849
+    },
+    {
+      "epoch": 0.1185,
+      "grad_norm": 1.0974827020855389,
+      "learning_rate": 0.003,
+      "loss": 4.0275,
+      "step": 11850
+    },
+    {
+      "epoch": 0.11851,
+      "grad_norm": 1.1400193029050951,
+      "learning_rate": 0.003,
+      "loss": 4.0115,
+      "step": 11851
+    },
+    {
+      "epoch": 0.11852,
+      "grad_norm": 1.1260088424765735,
+      "learning_rate": 0.003,
+      "loss": 4.0153,
+      "step": 11852
+    },
+    {
+      "epoch": 0.11853,
+      "grad_norm": 1.203276342374635,
+      "learning_rate": 0.003,
+      "loss": 4.0288,
+      "step": 11853
+    },
+    {
+      "epoch": 0.11854,
+      "grad_norm": 1.2713596037206674,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 11854
+    },
+    {
+      "epoch": 0.11855,
+      "grad_norm": 1.0995469030408622,
+      "learning_rate": 0.003,
+      "loss": 4.0436,
+      "step": 11855
+    },
+    {
+      "epoch": 0.11856,
+      "grad_norm": 1.526076675901606,
+      "learning_rate": 0.003,
+      "loss": 4.0173,
+      "step": 11856
+    },
+    {
+      "epoch": 0.11857,
+      "grad_norm": 0.9423722463191191,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 11857
+    },
+    {
+      "epoch": 0.11858,
+      "grad_norm": 1.2391248570515367,
+      "learning_rate": 0.003,
+      "loss": 4.058,
+      "step": 11858
+    },
+    {
+      "epoch": 0.11859,
+      "grad_norm": 1.1811635655252974,
+      "learning_rate": 0.003,
+      "loss": 4.0353,
+      "step": 11859
+    },
+    {
+      "epoch": 0.1186,
+      "grad_norm": 1.2433780055764707,
+      "learning_rate": 0.003,
+      "loss": 4.024,
+      "step": 11860
+    },
+    {
+      "epoch": 0.11861,
+      "grad_norm": 1.1675448550905765,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 11861
+    },
+    {
+      "epoch": 0.11862,
+      "grad_norm": 1.522196016713748,
+      "learning_rate": 0.003,
+      "loss": 4.0488,
+      "step": 11862
+    },
+    {
+      "epoch": 0.11863,
+      "grad_norm": 0.8786368309806317,
+      "learning_rate": 0.003,
+      "loss": 4.0398,
+      "step": 11863
+    },
+    {
+      "epoch": 0.11864,
+      "grad_norm": 1.189025993197089,
+      "learning_rate": 0.003,
+      "loss": 4.0228,
+      "step": 11864
+    },
+    {
+      "epoch": 0.11865,
+      "grad_norm": 1.1997885236483348,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 11865
+    },
+    {
+      "epoch": 0.11866,
+      "grad_norm": 1.1371332429372165,
+      "learning_rate": 0.003,
+      "loss": 4.0245,
+      "step": 11866
+    },
+    {
+      "epoch": 0.11867,
+      "grad_norm": 0.9698227780141195,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 11867
+    },
+    {
+      "epoch": 0.11868,
+      "grad_norm": 1.103930407918838,
+      "learning_rate": 0.003,
+      "loss": 4.0171,
+      "step": 11868
+    },
+    {
+      "epoch": 0.11869,
+      "grad_norm": 1.302679313668843,
+      "learning_rate": 0.003,
+      "loss": 4.0349,
+      "step": 11869
+    },
+    {
+      "epoch": 0.1187,
+      "grad_norm": 1.2640687849220409,
+      "learning_rate": 0.003,
+      "loss": 4.0366,
+      "step": 11870
+    },
+    {
+      "epoch": 0.11871,
+      "grad_norm": 1.0418025168606888,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 11871
+    },
+    {
+      "epoch": 0.11872,
+      "grad_norm": 1.179394195887056,
+      "learning_rate": 0.003,
+      "loss": 4.0453,
+      "step": 11872
+    },
+    {
+      "epoch": 0.11873,
+      "grad_norm": 1.000011524256758,
+      "learning_rate": 0.003,
+      "loss": 4.0049,
+      "step": 11873
+    },
+    {
+      "epoch": 0.11874,
+      "grad_norm": 1.1944779241728058,
+      "learning_rate": 0.003,
+      "loss": 3.9909,
+      "step": 11874
+    },
+    {
+      "epoch": 0.11875,
+      "grad_norm": 1.0502451220356457,
+      "learning_rate": 0.003,
+      "loss": 4.0306,
+      "step": 11875
+    },
+    {
+      "epoch": 0.11876,
+      "grad_norm": 1.296683390399838,
+      "learning_rate": 0.003,
+      "loss": 4.0065,
+      "step": 11876
+    },
+    {
+      "epoch": 0.11877,
+      "grad_norm": 0.9584019380860173,
+      "learning_rate": 0.003,
+      "loss": 4.0469,
+      "step": 11877
+    },
+    {
+      "epoch": 0.11878,
+      "grad_norm": 1.4464987936871119,
+      "learning_rate": 0.003,
+      "loss": 4.0307,
+      "step": 11878
+    },
+    {
+      "epoch": 0.11879,
+      "grad_norm": 1.0641775759664829,
+      "learning_rate": 0.003,
+      "loss": 4.0427,
+      "step": 11879
+    },
+    {
+      "epoch": 0.1188,
+      "grad_norm": 1.1780706118325417,
+      "learning_rate": 0.003,
+      "loss": 4.0297,
+      "step": 11880
+    },
+    {
+      "epoch": 0.11881,
+      "grad_norm": 1.1247991069312226,
+      "learning_rate": 0.003,
+      "loss": 4.0261,
+      "step": 11881
+    },
+    {
+      "epoch": 0.11882,
+      "grad_norm": 1.1967564947594456,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 11882
+    },
+    {
+      "epoch": 0.11883,
+      "grad_norm": 1.1953315126688369,
+      "learning_rate": 0.003,
+      "loss": 4.0509,
+      "step": 11883
+    },
+    {
+      "epoch": 0.11884,
+      "grad_norm": 1.1616352501275355,
+      "learning_rate": 0.003,
+      "loss": 4.0222,
+      "step": 11884
+    },
+    {
+      "epoch": 0.11885,
+      "grad_norm": 1.2509035326277451,
+      "learning_rate": 0.003,
+      "loss": 4.0591,
+      "step": 11885
+    },
+    {
+      "epoch": 0.11886,
+      "grad_norm": 1.0576045189685839,
+      "learning_rate": 0.003,
+      "loss": 4.0261,
+      "step": 11886
+    },
+    {
+      "epoch": 0.11887,
+      "grad_norm": 1.298046893222415,
+      "learning_rate": 0.003,
+      "loss": 4.0092,
+      "step": 11887
+    },
+    {
+      "epoch": 0.11888,
+      "grad_norm": 0.9499057577771591,
+      "learning_rate": 0.003,
+      "loss": 4.0517,
+      "step": 11888
+    },
+    {
+      "epoch": 0.11889,
+      "grad_norm": 1.4214495434215852,
+      "learning_rate": 0.003,
+      "loss": 4.0461,
+      "step": 11889
+    },
+    {
+      "epoch": 0.1189,
+      "grad_norm": 1.303172172126755,
+      "learning_rate": 0.003,
+      "loss": 4.0524,
+      "step": 11890
+    },
+    {
+      "epoch": 0.11891,
+      "grad_norm": 1.066754223369426,
+      "learning_rate": 0.003,
+      "loss": 4.0244,
+      "step": 11891
+    },
+    {
+      "epoch": 0.11892,
+      "grad_norm": 1.1813142465709445,
+      "learning_rate": 0.003,
+      "loss": 4.0307,
+      "step": 11892
+    },
+    {
+      "epoch": 0.11893,
+      "grad_norm": 1.1318573053901286,
+      "learning_rate": 0.003,
+      "loss": 4.0316,
+      "step": 11893
+    },
+    {
+      "epoch": 0.11894,
+      "grad_norm": 1.330891602268722,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 11894
+    },
+    {
+      "epoch": 0.11895,
+      "grad_norm": 0.9313185148188895,
+      "learning_rate": 0.003,
+      "loss": 4.0134,
+      "step": 11895
+    },
+    {
+      "epoch": 0.11896,
+      "grad_norm": 1.2289925546102853,
+      "learning_rate": 0.003,
+      "loss": 4.0448,
+      "step": 11896
+    },
+    {
+      "epoch": 0.11897,
+      "grad_norm": 1.3072846874199877,
+      "learning_rate": 0.003,
+      "loss": 4.0369,
+      "step": 11897
+    },
+    {
+      "epoch": 0.11898,
+      "grad_norm": 1.1161842409655773,
+      "learning_rate": 0.003,
+      "loss": 4.0464,
+      "step": 11898
+    },
+    {
+      "epoch": 0.11899,
+      "grad_norm": 1.2666190795801895,
+      "learning_rate": 0.003,
+      "loss": 4.0014,
+      "step": 11899
+    },
+    {
+      "epoch": 0.119,
+      "grad_norm": 1.1826671258994363,
+      "learning_rate": 0.003,
+      "loss": 4.0304,
+      "step": 11900
+    },
+    {
+      "epoch": 0.11901,
+      "grad_norm": 1.137766702745759,
+      "learning_rate": 0.003,
+      "loss": 4.0461,
+      "step": 11901
+    },
+    {
+      "epoch": 0.11902,
+      "grad_norm": 1.3498299757086438,
+      "learning_rate": 0.003,
+      "loss": 4.0128,
+      "step": 11902
+    },
+    {
+      "epoch": 0.11903,
+      "grad_norm": 1.0286391095482723,
+      "learning_rate": 0.003,
+      "loss": 4.0237,
+      "step": 11903
+    },
+    {
+      "epoch": 0.11904,
+      "grad_norm": 1.291367620845929,
+      "learning_rate": 0.003,
+      "loss": 4.0282,
+      "step": 11904
+    },
+    {
+      "epoch": 0.11905,
+      "grad_norm": 1.0161169842231146,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 11905
+    },
+    {
+      "epoch": 0.11906,
+      "grad_norm": 1.363924299039333,
+      "learning_rate": 0.003,
+      "loss": 4.0272,
+      "step": 11906
+    },
+    {
+      "epoch": 0.11907,
+      "grad_norm": 0.9352441437117577,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 11907
+    },
+    {
+      "epoch": 0.11908,
+      "grad_norm": 1.3625840410584036,
+      "learning_rate": 0.003,
+      "loss": 4.049,
+      "step": 11908
+    },
+    {
+      "epoch": 0.11909,
+      "grad_norm": 1.277088311088804,
+      "learning_rate": 0.003,
+      "loss": 3.9969,
+      "step": 11909
+    },
+    {
+      "epoch": 0.1191,
+      "grad_norm": 1.212682027964726,
+      "learning_rate": 0.003,
+      "loss": 4.041,
+      "step": 11910
+    },
+    {
+      "epoch": 0.11911,
+      "grad_norm": 1.0288245659041568,
+      "learning_rate": 0.003,
+      "loss": 4.0257,
+      "step": 11911
+    },
+    {
+      "epoch": 0.11912,
+      "grad_norm": 1.2480548583938877,
+      "learning_rate": 0.003,
+      "loss": 4.0702,
+      "step": 11912
+    },
+    {
+      "epoch": 0.11913,
+      "grad_norm": 0.9880713186954828,
+      "learning_rate": 0.003,
+      "loss": 4.0455,
+      "step": 11913
+    },
+    {
+      "epoch": 0.11914,
+      "grad_norm": 1.3610335200499963,
+      "learning_rate": 0.003,
+      "loss": 4.058,
+      "step": 11914
+    },
+    {
+      "epoch": 0.11915,
+      "grad_norm": 1.180493316410989,
+      "learning_rate": 0.003,
+      "loss": 4.039,
+      "step": 11915
+    },
+    {
+      "epoch": 0.11916,
+      "grad_norm": 1.2400050091012045,
+      "learning_rate": 0.003,
+      "loss": 4.0493,
+      "step": 11916
+    },
+    {
+      "epoch": 0.11917,
+      "grad_norm": 1.136714338075228,
+      "learning_rate": 0.003,
+      "loss": 4.0428,
+      "step": 11917
+    },
+    {
+      "epoch": 0.11918,
+      "grad_norm": 1.2965300537848186,
+      "learning_rate": 0.003,
+      "loss": 4.0345,
+      "step": 11918
+    },
+    {
+      "epoch": 0.11919,
+      "grad_norm": 1.1067424063285383,
+      "learning_rate": 0.003,
+      "loss": 4.0356,
+      "step": 11919
+    },
+    {
+      "epoch": 0.1192,
+      "grad_norm": 1.143087763102736,
+      "learning_rate": 0.003,
+      "loss": 4.0324,
+      "step": 11920
+    },
+    {
+      "epoch": 0.11921,
+      "grad_norm": 1.1278594645317526,
+      "learning_rate": 0.003,
+      "loss": 3.9941,
+      "step": 11921
+    },
+    {
+      "epoch": 0.11922,
+      "grad_norm": 1.1065993088756547,
+      "learning_rate": 0.003,
+      "loss": 4.0435,
+      "step": 11922
+    },
+    {
+      "epoch": 0.11923,
+      "grad_norm": 1.0404477273555774,
+      "learning_rate": 0.003,
+      "loss": 3.9843,
+      "step": 11923
+    },
+    {
+      "epoch": 0.11924,
+      "grad_norm": 1.098544390943939,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 11924
+    },
+    {
+      "epoch": 0.11925,
+      "grad_norm": 1.261762237162695,
+      "learning_rate": 0.003,
+      "loss": 4.0399,
+      "step": 11925
+    },
+    {
+      "epoch": 0.11926,
+      "grad_norm": 1.3263357716743485,
+      "learning_rate": 0.003,
+      "loss": 4.0277,
+      "step": 11926
+    },
+    {
+      "epoch": 0.11927,
+      "grad_norm": 1.3069181491866044,
+      "learning_rate": 0.003,
+      "loss": 4.0433,
+      "step": 11927
+    },
+    {
+      "epoch": 0.11928,
+      "grad_norm": 1.2597036992613604,
+      "learning_rate": 0.003,
+      "loss": 4.0186,
+      "step": 11928
+    },
+    {
+      "epoch": 0.11929,
+      "grad_norm": 1.1978030782695692,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 11929
+    },
+    {
+      "epoch": 0.1193,
+      "grad_norm": 1.122796948660304,
+      "learning_rate": 0.003,
+      "loss": 4.0259,
+      "step": 11930
+    },
+    {
+      "epoch": 0.11931,
+      "grad_norm": 1.28639539805541,
+      "learning_rate": 0.003,
+      "loss": 4.0188,
+      "step": 11931
+    },
+    {
+      "epoch": 0.11932,
+      "grad_norm": 0.8855138744393674,
+      "learning_rate": 0.003,
+      "loss": 4.0297,
+      "step": 11932
+    },
+    {
+      "epoch": 0.11933,
+      "grad_norm": 1.237861600157439,
+      "learning_rate": 0.003,
+      "loss": 4.0037,
+      "step": 11933
+    },
+    {
+      "epoch": 0.11934,
+      "grad_norm": 1.0400155549854835,
+      "learning_rate": 0.003,
+      "loss": 4.0387,
+      "step": 11934
+    },
+    {
+      "epoch": 0.11935,
+      "grad_norm": 1.253240275222296,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 11935
+    },
+    {
+      "epoch": 0.11936,
+      "grad_norm": 1.1208436384532008,
+      "learning_rate": 0.003,
+      "loss": 4.0212,
+      "step": 11936
+    },
+    {
+      "epoch": 0.11937,
+      "grad_norm": 1.188206798413594,
+      "learning_rate": 0.003,
+      "loss": 4.0348,
+      "step": 11937
+    },
+    {
+      "epoch": 0.11938,
+      "grad_norm": 1.1880639987904473,
+      "learning_rate": 0.003,
+      "loss": 3.9957,
+      "step": 11938
+    },
+    {
+      "epoch": 0.11939,
+      "grad_norm": 1.1272675744243077,
+      "learning_rate": 0.003,
+      "loss": 4.0111,
+      "step": 11939
+    },
+    {
+      "epoch": 0.1194,
+      "grad_norm": 1.4203426618154515,
+      "learning_rate": 0.003,
+      "loss": 4.0402,
+      "step": 11940
+    },
+    {
+      "epoch": 0.11941,
+      "grad_norm": 0.8873694901506768,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 11941
+    },
+    {
+      "epoch": 0.11942,
+      "grad_norm": 1.1932380771885094,
+      "learning_rate": 0.003,
+      "loss": 4.0443,
+      "step": 11942
+    },
+    {
+      "epoch": 0.11943,
+      "grad_norm": 1.172479996445272,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 11943
+    },
+    {
+      "epoch": 0.11944,
+      "grad_norm": 1.197483536826607,
+      "learning_rate": 0.003,
+      "loss": 4.0218,
+      "step": 11944
+    },
+    {
+      "epoch": 0.11945,
+      "grad_norm": 1.367144021054736,
+      "learning_rate": 0.003,
+      "loss": 3.9978,
+      "step": 11945
+    },
+    {
+      "epoch": 0.11946,
+      "grad_norm": 1.1658588956904559,
+      "learning_rate": 0.003,
+      "loss": 4.0119,
+      "step": 11946
+    },
+    {
+      "epoch": 0.11947,
+      "grad_norm": 1.0672867133410382,
+      "learning_rate": 0.003,
+      "loss": 4.0369,
+      "step": 11947
+    },
+    {
+      "epoch": 0.11948,
+      "grad_norm": 1.217906799639158,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 11948
+    },
+    {
+      "epoch": 0.11949,
+      "grad_norm": 1.10277312621074,
+      "learning_rate": 0.003,
+      "loss": 4.0418,
+      "step": 11949
+    },
+    {
+      "epoch": 0.1195,
+      "grad_norm": 1.3388512544054778,
+      "learning_rate": 0.003,
+      "loss": 4.0236,
+      "step": 11950
+    },
+    {
+      "epoch": 0.11951,
+      "grad_norm": 1.1385910637331862,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 11951
+    },
+    {
+      "epoch": 0.11952,
+      "grad_norm": 1.129078560681352,
+      "learning_rate": 0.003,
+      "loss": 4.0339,
+      "step": 11952
+    },
+    {
+      "epoch": 0.11953,
+      "grad_norm": 1.3014140128951632,
+      "learning_rate": 0.003,
+      "loss": 4.0382,
+      "step": 11953
+    },
+    {
+      "epoch": 0.11954,
+      "grad_norm": 1.0937861343588238,
+      "learning_rate": 0.003,
+      "loss": 3.9997,
+      "step": 11954
+    },
+    {
+      "epoch": 0.11955,
+      "grad_norm": 1.3146945090315645,
+      "learning_rate": 0.003,
+      "loss": 4.0241,
+      "step": 11955
+    },
+    {
+      "epoch": 0.11956,
+      "grad_norm": 1.0561747096686331,
+      "learning_rate": 0.003,
+      "loss": 4.0394,
+      "step": 11956
+    },
+    {
+      "epoch": 0.11957,
+      "grad_norm": 1.3427476069726214,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 11957
+    },
+    {
+      "epoch": 0.11958,
+      "grad_norm": 0.9106335166241142,
+      "learning_rate": 0.003,
+      "loss": 4.021,
+      "step": 11958
+    },
+    {
+      "epoch": 0.11959,
+      "grad_norm": 1.2516591304108617,
+      "learning_rate": 0.003,
+      "loss": 4.004,
+      "step": 11959
+    },
+    {
+      "epoch": 0.1196,
+      "grad_norm": 1.2013239303032632,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 11960
+    },
+    {
+      "epoch": 0.11961,
+      "grad_norm": 1.177656487506444,
+      "learning_rate": 0.003,
+      "loss": 4.0048,
+      "step": 11961
+    },
+    {
+      "epoch": 0.11962,
+      "grad_norm": 1.2925731396377893,
+      "learning_rate": 0.003,
+      "loss": 4.0537,
+      "step": 11962
+    },
+    {
+      "epoch": 0.11963,
+      "grad_norm": 1.2500654665973316,
+      "learning_rate": 0.003,
+      "loss": 4.0242,
+      "step": 11963
+    },
+    {
+      "epoch": 0.11964,
+      "grad_norm": 1.0425860200509207,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 11964
+    },
+    {
+      "epoch": 0.11965,
+      "grad_norm": 1.2377261274781197,
+      "learning_rate": 0.003,
+      "loss": 4.0327,
+      "step": 11965
+    },
+    {
+      "epoch": 0.11966,
+      "grad_norm": 1.2032049076648743,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 11966
+    },
+    {
+      "epoch": 0.11967,
+      "grad_norm": 1.1633394155622445,
+      "learning_rate": 0.003,
+      "loss": 3.9982,
+      "step": 11967
+    },
+    {
+      "epoch": 0.11968,
+      "grad_norm": 1.319184772941729,
+      "learning_rate": 0.003,
+      "loss": 4.007,
+      "step": 11968
+    },
+    {
+      "epoch": 0.11969,
+      "grad_norm": 1.2615950720517894,
+      "learning_rate": 0.003,
+      "loss": 4.043,
+      "step": 11969
+    },
+    {
+      "epoch": 0.1197,
+      "grad_norm": 1.2523901831861972,
+      "learning_rate": 0.003,
+      "loss": 4.0358,
+      "step": 11970
+    },
+    {
+      "epoch": 0.11971,
+      "grad_norm": 1.0498669279565727,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 11971
+    },
+    {
+      "epoch": 0.11972,
+      "grad_norm": 1.2175500601101223,
+      "learning_rate": 0.003,
+      "loss": 4.001,
+      "step": 11972
+    },
+    {
+      "epoch": 0.11973,
+      "grad_norm": 1.130504073169973,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 11973
+    },
+    {
+      "epoch": 0.11974,
+      "grad_norm": 1.179537557381126,
+      "learning_rate": 0.003,
+      "loss": 4.0053,
+      "step": 11974
+    },
+    {
+      "epoch": 0.11975,
+      "grad_norm": 1.2289925798077932,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 11975
+    },
+    {
+      "epoch": 0.11976,
+      "grad_norm": 1.030004757594503,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 11976
+    },
+    {
+      "epoch": 0.11977,
+      "grad_norm": 1.3000185859855784,
+      "learning_rate": 0.003,
+      "loss": 4.0141,
+      "step": 11977
+    },
+    {
+      "epoch": 0.11978,
+      "grad_norm": 1.0874027837487101,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 11978
+    },
+    {
+      "epoch": 0.11979,
+      "grad_norm": 1.2673027617231618,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 11979
+    },
+    {
+      "epoch": 0.1198,
+      "grad_norm": 1.1353539513587794,
+      "learning_rate": 0.003,
+      "loss": 4.0552,
+      "step": 11980
+    },
+    {
+      "epoch": 0.11981,
+      "grad_norm": 1.2446619182105716,
+      "learning_rate": 0.003,
+      "loss": 4.0457,
+      "step": 11981
+    },
+    {
+      "epoch": 0.11982,
+      "grad_norm": 1.28462029530281,
+      "learning_rate": 0.003,
+      "loss": 4.0282,
+      "step": 11982
+    },
+    {
+      "epoch": 0.11983,
+      "grad_norm": 1.0633275265167388,
+      "learning_rate": 0.003,
+      "loss": 4.0407,
+      "step": 11983
+    },
+    {
+      "epoch": 0.11984,
+      "grad_norm": 1.2422112244323555,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 11984
+    },
+    {
+      "epoch": 0.11985,
+      "grad_norm": 1.0976759058442642,
+      "learning_rate": 0.003,
+      "loss": 4.0214,
+      "step": 11985
+    },
+    {
+      "epoch": 0.11986,
+      "grad_norm": 1.2409565112326744,
+      "learning_rate": 0.003,
+      "loss": 4.0296,
+      "step": 11986
+    },
+    {
+      "epoch": 0.11987,
+      "grad_norm": 1.091733743800522,
+      "learning_rate": 0.003,
+      "loss": 4.0225,
+      "step": 11987
+    },
+    {
+      "epoch": 0.11988,
+      "grad_norm": 1.0535364843904362,
+      "learning_rate": 0.003,
+      "loss": 4.0018,
+      "step": 11988
+    },
+    {
+      "epoch": 0.11989,
+      "grad_norm": 1.2117698380224653,
+      "learning_rate": 0.003,
+      "loss": 4.0289,
+      "step": 11989
+    },
+    {
+      "epoch": 0.1199,
+      "grad_norm": 1.2107541460176305,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 11990
+    },
+    {
+      "epoch": 0.11991,
+      "grad_norm": 1.128519381242994,
+      "learning_rate": 0.003,
+      "loss": 4.0234,
+      "step": 11991
+    },
+    {
+      "epoch": 0.11992,
+      "grad_norm": 1.2912774558797513,
+      "learning_rate": 0.003,
+      "loss": 4.053,
+      "step": 11992
+    },
+    {
+      "epoch": 0.11993,
+      "grad_norm": 0.9859836123745601,
+      "learning_rate": 0.003,
+      "loss": 4.0026,
+      "step": 11993
+    },
+    {
+      "epoch": 0.11994,
+      "grad_norm": 1.4062953714578372,
+      "learning_rate": 0.003,
+      "loss": 4.038,
+      "step": 11994
+    },
+    {
+      "epoch": 0.11995,
+      "grad_norm": 0.9854278363001263,
+      "learning_rate": 0.003,
+      "loss": 4.0242,
+      "step": 11995
+    },
+    {
+      "epoch": 0.11996,
+      "grad_norm": 1.1283327652944204,
+      "learning_rate": 0.003,
+      "loss": 4.0043,
+      "step": 11996
+    },
+    {
+      "epoch": 0.11997,
+      "grad_norm": 1.2578860491282189,
+      "learning_rate": 0.003,
+      "loss": 4.0149,
+      "step": 11997
+    },
+    {
+      "epoch": 0.11998,
+      "grad_norm": 1.2201013514042887,
+      "learning_rate": 0.003,
+      "loss": 4.0318,
+      "step": 11998
+    },
+    {
+      "epoch": 0.11999,
+      "grad_norm": 1.0464428472868936,
+      "learning_rate": 0.003,
+      "loss": 4.0428,
+      "step": 11999
+    },
+    {
+      "epoch": 0.12,
+      "grad_norm": 1.1442643913849708,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 12000
+    },
+    {
+      "epoch": 0.12001,
+      "grad_norm": 1.3301044375260127,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 12001
+    },
+    {
+      "epoch": 0.12002,
+      "grad_norm": 1.105906653032294,
+      "learning_rate": 0.003,
+      "loss": 4.0089,
+      "step": 12002
+    },
+    {
+      "epoch": 0.12003,
+      "grad_norm": 1.2210250779728278,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 12003
+    },
+    {
+      "epoch": 0.12004,
+      "grad_norm": 1.1362570687956641,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 12004
+    },
+    {
+      "epoch": 0.12005,
+      "grad_norm": 1.2598481775607295,
+      "learning_rate": 0.003,
+      "loss": 4.0135,
+      "step": 12005
+    },
+    {
+      "epoch": 0.12006,
+      "grad_norm": 1.238198018149289,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 12006
+    },
+    {
+      "epoch": 0.12007,
+      "grad_norm": 1.329447272103125,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 12007
+    },
+    {
+      "epoch": 0.12008,
+      "grad_norm": 1.1041434627768059,
+      "learning_rate": 0.003,
+      "loss": 4.05,
+      "step": 12008
+    },
+    {
+      "epoch": 0.12009,
+      "grad_norm": 1.0703280836160836,
+      "learning_rate": 0.003,
+      "loss": 4.0266,
+      "step": 12009
+    },
+    {
+      "epoch": 0.1201,
+      "grad_norm": 1.1943502028864001,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 12010
+    },
+    {
+      "epoch": 0.12011,
+      "grad_norm": 1.0239676315199393,
+      "learning_rate": 0.003,
+      "loss": 4.0084,
+      "step": 12011
+    },
+    {
+      "epoch": 0.12012,
+      "grad_norm": 1.4503121638562684,
+      "learning_rate": 0.003,
+      "loss": 4.0091,
+      "step": 12012
+    },
+    {
+      "epoch": 0.12013,
+      "grad_norm": 1.015108621263654,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 12013
+    },
+    {
+      "epoch": 0.12014,
+      "grad_norm": 1.389962594266883,
+      "learning_rate": 0.003,
+      "loss": 4.0194,
+      "step": 12014
+    },
+    {
+      "epoch": 0.12015,
+      "grad_norm": 1.1252082891848212,
+      "learning_rate": 0.003,
+      "loss": 4.0297,
+      "step": 12015
+    },
+    {
+      "epoch": 0.12016,
+      "grad_norm": 1.0487983474875753,
+      "learning_rate": 0.003,
+      "loss": 4.0303,
+      "step": 12016
+    },
+    {
+      "epoch": 0.12017,
+      "grad_norm": 1.4431048621531977,
+      "learning_rate": 0.003,
+      "loss": 4.0476,
+      "step": 12017
+    },
+    {
+      "epoch": 0.12018,
+      "grad_norm": 0.9166310978141474,
+      "learning_rate": 0.003,
+      "loss": 3.9751,
+      "step": 12018
+    },
+    {
+      "epoch": 0.12019,
+      "grad_norm": 1.2506433606927407,
+      "learning_rate": 0.003,
+      "loss": 4.0303,
+      "step": 12019
+    },
+    {
+      "epoch": 0.1202,
+      "grad_norm": 1.480480476667452,
+      "learning_rate": 0.003,
+      "loss": 4.0475,
+      "step": 12020
+    },
+    {
+      "epoch": 0.12021,
+      "grad_norm": 1.0689179540973426,
+      "learning_rate": 0.003,
+      "loss": 4.0413,
+      "step": 12021
+    },
+    {
+      "epoch": 0.12022,
+      "grad_norm": 1.238638964578657,
+      "learning_rate": 0.003,
+      "loss": 3.9995,
+      "step": 12022
+    },
+    {
+      "epoch": 0.12023,
+      "grad_norm": 1.217878541780031,
+      "learning_rate": 0.003,
+      "loss": 4.0104,
+      "step": 12023
+    },
+    {
+      "epoch": 0.12024,
+      "grad_norm": 1.0851112909646494,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 12024
+    },
+    {
+      "epoch": 0.12025,
+      "grad_norm": 1.1604224142434416,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 12025
+    },
+    {
+      "epoch": 0.12026,
+      "grad_norm": 1.031844987403058,
+      "learning_rate": 0.003,
+      "loss": 4.0349,
+      "step": 12026
+    },
+    {
+      "epoch": 0.12027,
+      "grad_norm": 1.1595745336365029,
+      "learning_rate": 0.003,
+      "loss": 4.0214,
+      "step": 12027
+    },
+    {
+      "epoch": 0.12028,
+      "grad_norm": 1.1947817751700118,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 12028
+    },
+    {
+      "epoch": 0.12029,
+      "grad_norm": 1.0497319637213312,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 12029
+    },
+    {
+      "epoch": 0.1203,
+      "grad_norm": 1.0887730490228416,
+      "learning_rate": 0.003,
+      "loss": 4.0177,
+      "step": 12030
+    },
+    {
+      "epoch": 0.12031,
+      "grad_norm": 1.2639844964964313,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 12031
+    },
+    {
+      "epoch": 0.12032,
+      "grad_norm": 1.1476592744733245,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 12032
+    },
+    {
+      "epoch": 0.12033,
+      "grad_norm": 1.4253662214975455,
+      "learning_rate": 0.003,
+      "loss": 4.0329,
+      "step": 12033
+    },
+    {
+      "epoch": 0.12034,
+      "grad_norm": 1.0367483017417718,
+      "learning_rate": 0.003,
+      "loss": 4.025,
+      "step": 12034
+    },
+    {
+      "epoch": 0.12035,
+      "grad_norm": 1.3672538599328856,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 12035
+    },
+    {
+      "epoch": 0.12036,
+      "grad_norm": 1.088814881263226,
+      "learning_rate": 0.003,
+      "loss": 4.04,
+      "step": 12036
+    },
+    {
+      "epoch": 0.12037,
+      "grad_norm": 1.2526346282054286,
+      "learning_rate": 0.003,
+      "loss": 4.035,
+      "step": 12037
+    },
+    {
+      "epoch": 0.12038,
+      "grad_norm": 1.160570553266028,
+      "learning_rate": 0.003,
+      "loss": 4.0369,
+      "step": 12038
+    },
+    {
+      "epoch": 0.12039,
+      "grad_norm": 1.1199414106378185,
+      "learning_rate": 0.003,
+      "loss": 4.0043,
+      "step": 12039
+    },
+    {
+      "epoch": 0.1204,
+      "grad_norm": 1.1004831372504815,
+      "learning_rate": 0.003,
+      "loss": 4.0323,
+      "step": 12040
+    },
+    {
+      "epoch": 0.12041,
+      "grad_norm": 1.168791590589254,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 12041
+    },
+    {
+      "epoch": 0.12042,
+      "grad_norm": 1.1345050629943982,
+      "learning_rate": 0.003,
+      "loss": 4.0018,
+      "step": 12042
+    },
+    {
+      "epoch": 0.12043,
+      "grad_norm": 1.1312277297552067,
+      "learning_rate": 0.003,
+      "loss": 4.0458,
+      "step": 12043
+    },
+    {
+      "epoch": 0.12044,
+      "grad_norm": 1.1218607973489465,
+      "learning_rate": 0.003,
+      "loss": 4.0191,
+      "step": 12044
+    },
+    {
+      "epoch": 0.12045,
+      "grad_norm": 1.3439830827965968,
+      "learning_rate": 0.003,
+      "loss": 4.0356,
+      "step": 12045
+    },
+    {
+      "epoch": 0.12046,
+      "grad_norm": 1.194576377831826,
+      "learning_rate": 0.003,
+      "loss": 4.0222,
+      "step": 12046
+    },
+    {
+      "epoch": 0.12047,
+      "grad_norm": 1.2242887448551507,
+      "learning_rate": 0.003,
+      "loss": 4.0444,
+      "step": 12047
+    },
+    {
+      "epoch": 0.12048,
+      "grad_norm": 1.1869139877584274,
+      "learning_rate": 0.003,
+      "loss": 4.025,
+      "step": 12048
+    },
+    {
+      "epoch": 0.12049,
+      "grad_norm": 1.23366882798688,
+      "learning_rate": 0.003,
+      "loss": 4.0211,
+      "step": 12049
+    },
+    {
+      "epoch": 0.1205,
+      "grad_norm": 1.1321804170340066,
+      "learning_rate": 0.003,
+      "loss": 4.0509,
+      "step": 12050
+    },
+    {
+      "epoch": 0.12051,
+      "grad_norm": 1.2506912484400732,
+      "learning_rate": 0.003,
+      "loss": 4.0383,
+      "step": 12051
+    },
+    {
+      "epoch": 0.12052,
+      "grad_norm": 1.171321048284884,
+      "learning_rate": 0.003,
+      "loss": 4.0385,
+      "step": 12052
+    },
+    {
+      "epoch": 0.12053,
+      "grad_norm": 1.5483651625241348,
+      "learning_rate": 0.003,
+      "loss": 4.038,
+      "step": 12053
+    },
+    {
+      "epoch": 0.12054,
+      "grad_norm": 0.9450023337340406,
+      "learning_rate": 0.003,
+      "loss": 4.0411,
+      "step": 12054
+    },
+    {
+      "epoch": 0.12055,
+      "grad_norm": 1.2518788502247158,
+      "learning_rate": 0.003,
+      "loss": 4.0505,
+      "step": 12055
+    },
+    {
+      "epoch": 0.12056,
+      "grad_norm": 1.2025596603154725,
+      "learning_rate": 0.003,
+      "loss": 4.0153,
+      "step": 12056
+    },
+    {
+      "epoch": 0.12057,
+      "grad_norm": 1.2521406673057627,
+      "learning_rate": 0.003,
+      "loss": 4.0294,
+      "step": 12057
+    },
+    {
+      "epoch": 0.12058,
+      "grad_norm": 1.0526770017752556,
+      "learning_rate": 0.003,
+      "loss": 4.0483,
+      "step": 12058
+    },
+    {
+      "epoch": 0.12059,
+      "grad_norm": 1.0626535398232526,
+      "learning_rate": 0.003,
+      "loss": 4.0434,
+      "step": 12059
+    },
+    {
+      "epoch": 0.1206,
+      "grad_norm": 1.3336122110831834,
+      "learning_rate": 0.003,
+      "loss": 4.0301,
+      "step": 12060
+    },
+    {
+      "epoch": 0.12061,
+      "grad_norm": 1.118599017531215,
+      "learning_rate": 0.003,
+      "loss": 4.0468,
+      "step": 12061
+    },
+    {
+      "epoch": 0.12062,
+      "grad_norm": 1.363473374169237,
+      "learning_rate": 0.003,
+      "loss": 4.0116,
+      "step": 12062
+    },
+    {
+      "epoch": 0.12063,
+      "grad_norm": 0.8786979557351099,
+      "learning_rate": 0.003,
+      "loss": 4.0295,
+      "step": 12063
+    },
+    {
+      "epoch": 0.12064,
+      "grad_norm": 1.1947466023568365,
+      "learning_rate": 0.003,
+      "loss": 3.9998,
+      "step": 12064
+    },
+    {
+      "epoch": 0.12065,
+      "grad_norm": 1.215993473222538,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 12065
+    },
+    {
+      "epoch": 0.12066,
+      "grad_norm": 1.2447752310217368,
+      "learning_rate": 0.003,
+      "loss": 4.0204,
+      "step": 12066
+    },
+    {
+      "epoch": 0.12067,
+      "grad_norm": 1.0837006580418418,
+      "learning_rate": 0.003,
+      "loss": 4.0542,
+      "step": 12067
+    },
+    {
+      "epoch": 0.12068,
+      "grad_norm": 1.3971605398432154,
+      "learning_rate": 0.003,
+      "loss": 4.0251,
+      "step": 12068
+    },
+    {
+      "epoch": 0.12069,
+      "grad_norm": 1.007380264646283,
+      "learning_rate": 0.003,
+      "loss": 4.0144,
+      "step": 12069
+    },
+    {
+      "epoch": 0.1207,
+      "grad_norm": 1.3122686823444492,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 12070
+    },
+    {
+      "epoch": 0.12071,
+      "grad_norm": 1.0684153514164216,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 12071
+    },
+    {
+      "epoch": 0.12072,
+      "grad_norm": 1.2604810107038449,
+      "learning_rate": 0.003,
+      "loss": 4.0679,
+      "step": 12072
+    },
+    {
+      "epoch": 0.12073,
+      "grad_norm": 1.155441969210191,
+      "learning_rate": 0.003,
+      "loss": 4.0296,
+      "step": 12073
+    },
+    {
+      "epoch": 0.12074,
+      "grad_norm": 1.5303984928048056,
+      "learning_rate": 0.003,
+      "loss": 4.036,
+      "step": 12074
+    },
+    {
+      "epoch": 0.12075,
+      "grad_norm": 0.8280353697240316,
+      "learning_rate": 0.003,
+      "loss": 3.9775,
+      "step": 12075
+    },
+    {
+      "epoch": 0.12076,
+      "grad_norm": 0.9625486618108077,
+      "learning_rate": 0.003,
+      "loss": 4.0196,
+      "step": 12076
+    },
+    {
+      "epoch": 0.12077,
+      "grad_norm": 1.2545752671715014,
+      "learning_rate": 0.003,
+      "loss": 3.9819,
+      "step": 12077
+    },
+    {
+      "epoch": 0.12078,
+      "grad_norm": 1.1154569818666091,
+      "learning_rate": 0.003,
+      "loss": 4.0059,
+      "step": 12078
+    },
+    {
+      "epoch": 0.12079,
+      "grad_norm": 1.141600191269537,
+      "learning_rate": 0.003,
+      "loss": 4.0167,
+      "step": 12079
+    },
+    {
+      "epoch": 0.1208,
+      "grad_norm": 1.238179881437241,
+      "learning_rate": 0.003,
+      "loss": 4.0367,
+      "step": 12080
+    },
+    {
+      "epoch": 0.12081,
+      "grad_norm": 1.165698596146827,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 12081
+    },
+    {
+      "epoch": 0.12082,
+      "grad_norm": 0.8889092422891364,
+      "learning_rate": 0.003,
+      "loss": 4.0112,
+      "step": 12082
+    },
+    {
+      "epoch": 0.12083,
+      "grad_norm": 1.2543161652007244,
+      "learning_rate": 0.003,
+      "loss": 4.0064,
+      "step": 12083
+    },
+    {
+      "epoch": 0.12084,
+      "grad_norm": 1.5327269316533685,
+      "learning_rate": 0.003,
+      "loss": 4.038,
+      "step": 12084
+    },
+    {
+      "epoch": 0.12085,
+      "grad_norm": 1.0365382149666054,
+      "learning_rate": 0.003,
+      "loss": 4.0299,
+      "step": 12085
+    },
+    {
+      "epoch": 0.12086,
+      "grad_norm": 1.316513743782211,
+      "learning_rate": 0.003,
+      "loss": 4.0447,
+      "step": 12086
+    },
+    {
+      "epoch": 0.12087,
+      "grad_norm": 1.1582107753316935,
+      "learning_rate": 0.003,
+      "loss": 4.0628,
+      "step": 12087
+    },
+    {
+      "epoch": 0.12088,
+      "grad_norm": 1.2026972854083593,
+      "learning_rate": 0.003,
+      "loss": 4.0187,
+      "step": 12088
+    },
+    {
+      "epoch": 0.12089,
+      "grad_norm": 1.1865818257324852,
+      "learning_rate": 0.003,
+      "loss": 4.0401,
+      "step": 12089
+    },
+    {
+      "epoch": 0.1209,
+      "grad_norm": 1.213904883132537,
+      "learning_rate": 0.003,
+      "loss": 4.0046,
+      "step": 12090
+    },
+    {
+      "epoch": 0.12091,
+      "grad_norm": 1.0648805961872354,
+      "learning_rate": 0.003,
+      "loss": 4.0207,
+      "step": 12091
+    },
+    {
+      "epoch": 0.12092,
+      "grad_norm": 1.1813755883049362,
+      "learning_rate": 0.003,
+      "loss": 4.0142,
+      "step": 12092
+    },
+    {
+      "epoch": 0.12093,
+      "grad_norm": 1.2292752165558312,
+      "learning_rate": 0.003,
+      "loss": 4.0407,
+      "step": 12093
+    },
+    {
+      "epoch": 0.12094,
+      "grad_norm": 1.259437705088851,
+      "learning_rate": 0.003,
+      "loss": 4.0421,
+      "step": 12094
+    },
+    {
+      "epoch": 0.12095,
+      "grad_norm": 1.1137767158004688,
+      "learning_rate": 0.003,
+      "loss": 4.0348,
+      "step": 12095
+    },
+    {
+      "epoch": 0.12096,
+      "grad_norm": 1.3600842565366165,
+      "learning_rate": 0.003,
+      "loss": 4.0247,
+      "step": 12096
+    },
+    {
+      "epoch": 0.12097,
+      "grad_norm": 0.8881258371739744,
+      "learning_rate": 0.003,
+      "loss": 4.0449,
+      "step": 12097
+    },
+    {
+      "epoch": 0.12098,
+      "grad_norm": 1.2265846606485018,
+      "learning_rate": 0.003,
+      "loss": 4.0092,
+      "step": 12098
+    },
+    {
+      "epoch": 0.12099,
+      "grad_norm": 1.2320301694381899,
+      "learning_rate": 0.003,
+      "loss": 4.0077,
+      "step": 12099
+    },
+    {
+      "epoch": 0.121,
+      "grad_norm": 1.1555002782743249,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 12100
+    },
+    {
+      "epoch": 0.12101,
+      "grad_norm": 1.26009712635675,
+      "learning_rate": 0.003,
+      "loss": 4.031,
+      "step": 12101
+    },
+    {
+      "epoch": 0.12102,
+      "grad_norm": 1.2765674036907484,
+      "learning_rate": 0.003,
+      "loss": 4.0236,
+      "step": 12102
+    },
+    {
+      "epoch": 0.12103,
+      "grad_norm": 1.2664976256713367,
+      "learning_rate": 0.003,
+      "loss": 4.0281,
+      "step": 12103
+    },
+    {
+      "epoch": 0.12104,
+      "grad_norm": 0.9432051107004329,
+      "learning_rate": 0.003,
+      "loss": 3.9953,
+      "step": 12104
+    },
+    {
+      "epoch": 0.12105,
+      "grad_norm": 1.326445164229193,
+      "learning_rate": 0.003,
+      "loss": 4.0218,
+      "step": 12105
+    },
+    {
+      "epoch": 0.12106,
+      "grad_norm": 1.0473709580825368,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 12106
+    },
+    {
+      "epoch": 0.12107,
+      "grad_norm": 1.5389171702810274,
+      "learning_rate": 0.003,
+      "loss": 4.0608,
+      "step": 12107
+    },
+    {
+      "epoch": 0.12108,
+      "grad_norm": 0.961160561659805,
+      "learning_rate": 0.003,
+      "loss": 4.0252,
+      "step": 12108
+    },
+    {
+      "epoch": 0.12109,
+      "grad_norm": 1.1860213316322281,
+      "learning_rate": 0.003,
+      "loss": 4.0057,
+      "step": 12109
+    },
+    {
+      "epoch": 0.1211,
+      "grad_norm": 1.276050655690645,
+      "learning_rate": 0.003,
+      "loss": 4.0503,
+      "step": 12110
+    },
+    {
+      "epoch": 0.12111,
+      "grad_norm": 1.0106495220145681,
+      "learning_rate": 0.003,
+      "loss": 4.0218,
+      "step": 12111
+    },
+    {
+      "epoch": 0.12112,
+      "grad_norm": 1.1404629571385734,
+      "learning_rate": 0.003,
+      "loss": 4.049,
+      "step": 12112
+    },
+    {
+      "epoch": 0.12113,
+      "grad_norm": 1.3379694333364878,
+      "learning_rate": 0.003,
+      "loss": 4.0486,
+      "step": 12113
+    },
+    {
+      "epoch": 0.12114,
+      "grad_norm": 1.1057144085632993,
+      "learning_rate": 0.003,
+      "loss": 4.0212,
+      "step": 12114
+    },
+    {
+      "epoch": 0.12115,
+      "grad_norm": 1.2389514049197021,
+      "learning_rate": 0.003,
+      "loss": 3.9896,
+      "step": 12115
+    },
+    {
+      "epoch": 0.12116,
+      "grad_norm": 1.081032310728611,
+      "learning_rate": 0.003,
+      "loss": 4.0566,
+      "step": 12116
+    },
+    {
+      "epoch": 0.12117,
+      "grad_norm": 1.7808747808197203,
+      "learning_rate": 0.003,
+      "loss": 4.0562,
+      "step": 12117
+    },
+    {
+      "epoch": 0.12118,
+      "grad_norm": 0.9804521331485236,
+      "learning_rate": 0.003,
+      "loss": 3.9977,
+      "step": 12118
+    },
+    {
+      "epoch": 0.12119,
+      "grad_norm": 1.2622545631694635,
+      "learning_rate": 0.003,
+      "loss": 4.0515,
+      "step": 12119
+    },
+    {
+      "epoch": 0.1212,
+      "grad_norm": 1.025608924940026,
+      "learning_rate": 0.003,
+      "loss": 4.0068,
+      "step": 12120
+    },
+    {
+      "epoch": 0.12121,
+      "grad_norm": 1.2687986530932442,
+      "learning_rate": 0.003,
+      "loss": 4.0446,
+      "step": 12121
+    },
+    {
+      "epoch": 0.12122,
+      "grad_norm": 1.2569166469856572,
+      "learning_rate": 0.003,
+      "loss": 4.0552,
+      "step": 12122
+    },
+    {
+      "epoch": 0.12123,
+      "grad_norm": 1.0888418797208075,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 12123
+    },
+    {
+      "epoch": 0.12124,
+      "grad_norm": 1.4471321534196813,
+      "learning_rate": 0.003,
+      "loss": 4.0322,
+      "step": 12124
+    },
+    {
+      "epoch": 0.12125,
+      "grad_norm": 1.0570726579438743,
+      "learning_rate": 0.003,
+      "loss": 4.0474,
+      "step": 12125
+    },
+    {
+      "epoch": 0.12126,
+      "grad_norm": 1.1452310973638715,
+      "learning_rate": 0.003,
+      "loss": 4.0309,
+      "step": 12126
+    },
+    {
+      "epoch": 0.12127,
+      "grad_norm": 1.0951003459920567,
+      "learning_rate": 0.003,
+      "loss": 4.0225,
+      "step": 12127
+    },
+    {
+      "epoch": 0.12128,
+      "grad_norm": 1.0924282991917444,
+      "learning_rate": 0.003,
+      "loss": 4.043,
+      "step": 12128
+    },
+    {
+      "epoch": 0.12129,
+      "grad_norm": 1.077336233158983,
+      "learning_rate": 0.003,
+      "loss": 4.0118,
+      "step": 12129
+    },
+    {
+      "epoch": 0.1213,
+      "grad_norm": 1.0698116679642007,
+      "learning_rate": 0.003,
+      "loss": 4.0367,
+      "step": 12130
+    },
+    {
+      "epoch": 0.12131,
+      "grad_norm": 1.0569838113736956,
+      "learning_rate": 0.003,
+      "loss": 4.0406,
+      "step": 12131
+    },
+    {
+      "epoch": 0.12132,
+      "grad_norm": 1.2236310443056324,
+      "learning_rate": 0.003,
+      "loss": 4.0366,
+      "step": 12132
+    },
+    {
+      "epoch": 0.12133,
+      "grad_norm": 1.210938464105007,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 12133
+    },
+    {
+      "epoch": 0.12134,
+      "grad_norm": 1.1431794281160383,
+      "learning_rate": 0.003,
+      "loss": 4.0444,
+      "step": 12134
+    },
+    {
+      "epoch": 0.12135,
+      "grad_norm": 1.2120388488293794,
+      "learning_rate": 0.003,
+      "loss": 4.0163,
+      "step": 12135
+    },
+    {
+      "epoch": 0.12136,
+      "grad_norm": 1.163220939636625,
+      "learning_rate": 0.003,
+      "loss": 4.0237,
+      "step": 12136
+    },
+    {
+      "epoch": 0.12137,
+      "grad_norm": 1.408415883667614,
+      "learning_rate": 0.003,
+      "loss": 4.0654,
+      "step": 12137
+    },
+    {
+      "epoch": 0.12138,
+      "grad_norm": 1.019972444797985,
+      "learning_rate": 0.003,
+      "loss": 4.0457,
+      "step": 12138
+    },
+    {
+      "epoch": 0.12139,
+      "grad_norm": 1.4794420127960417,
+      "learning_rate": 0.003,
+      "loss": 4.0525,
+      "step": 12139
+    },
+    {
+      "epoch": 0.1214,
+      "grad_norm": 0.9942984332809602,
+      "learning_rate": 0.003,
+      "loss": 4.0483,
+      "step": 12140
+    },
+    {
+      "epoch": 0.12141,
+      "grad_norm": 1.198313384318162,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 12141
+    },
+    {
+      "epoch": 0.12142,
+      "grad_norm": 1.1235175502034191,
+      "learning_rate": 0.003,
+      "loss": 4.0386,
+      "step": 12142
+    },
+    {
+      "epoch": 0.12143,
+      "grad_norm": 1.2021305789112728,
+      "learning_rate": 0.003,
+      "loss": 4.0428,
+      "step": 12143
+    },
+    {
+      "epoch": 0.12144,
+      "grad_norm": 1.2036254675483424,
+      "learning_rate": 0.003,
+      "loss": 4.0573,
+      "step": 12144
+    },
+    {
+      "epoch": 0.12145,
+      "grad_norm": 1.0563897356132683,
+      "learning_rate": 0.003,
+      "loss": 4.0479,
+      "step": 12145
+    },
+    {
+      "epoch": 0.12146,
+      "grad_norm": 1.3892405792341498,
+      "learning_rate": 0.003,
+      "loss": 4.0537,
+      "step": 12146
+    },
+    {
+      "epoch": 0.12147,
+      "grad_norm": 1.1584776401297876,
+      "learning_rate": 0.003,
+      "loss": 4.0352,
+      "step": 12147
+    },
+    {
+      "epoch": 0.12148,
+      "grad_norm": 1.4771991665797888,
+      "learning_rate": 0.003,
+      "loss": 4.0534,
+      "step": 12148
+    },
+    {
+      "epoch": 0.12149,
+      "grad_norm": 0.9036675444370385,
+      "learning_rate": 0.003,
+      "loss": 4.0259,
+      "step": 12149
+    },
+    {
+      "epoch": 0.1215,
+      "grad_norm": 1.0455355580610362,
+      "learning_rate": 0.003,
+      "loss": 4.0236,
+      "step": 12150
+    },
+    {
+      "epoch": 0.12151,
+      "grad_norm": 1.2718005495324547,
+      "learning_rate": 0.003,
+      "loss": 4.0015,
+      "step": 12151
+    },
+    {
+      "epoch": 0.12152,
+      "grad_norm": 1.1342945095583343,
+      "learning_rate": 0.003,
+      "loss": 4.006,
+      "step": 12152
+    },
+    {
+      "epoch": 0.12153,
+      "grad_norm": 1.2466145803684447,
+      "learning_rate": 0.003,
+      "loss": 4.0605,
+      "step": 12153
+    },
+    {
+      "epoch": 0.12154,
+      "grad_norm": 1.0819537046373542,
+      "learning_rate": 0.003,
+      "loss": 3.9943,
+      "step": 12154
+    },
+    {
+      "epoch": 0.12155,
+      "grad_norm": 1.1602226084211311,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 12155
+    },
+    {
+      "epoch": 0.12156,
+      "grad_norm": 1.4009042185837033,
+      "learning_rate": 0.003,
+      "loss": 4.0285,
+      "step": 12156
+    },
+    {
+      "epoch": 0.12157,
+      "grad_norm": 1.1384726322964522,
+      "learning_rate": 0.003,
+      "loss": 3.9852,
+      "step": 12157
+    },
+    {
+      "epoch": 0.12158,
+      "grad_norm": 1.1226291807229503,
+      "learning_rate": 0.003,
+      "loss": 4.0258,
+      "step": 12158
+    },
+    {
+      "epoch": 0.12159,
+      "grad_norm": 1.188819542203544,
+      "learning_rate": 0.003,
+      "loss": 4.0265,
+      "step": 12159
+    },
+    {
+      "epoch": 0.1216,
+      "grad_norm": 1.186378564541427,
+      "learning_rate": 0.003,
+      "loss": 4.016,
+      "step": 12160
+    },
+    {
+      "epoch": 0.12161,
+      "grad_norm": 1.1430987105677886,
+      "learning_rate": 0.003,
+      "loss": 4.0331,
+      "step": 12161
+    },
+    {
+      "epoch": 0.12162,
+      "grad_norm": 1.0984081471542013,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 12162
+    },
+    {
+      "epoch": 0.12163,
+      "grad_norm": 1.2375098267751024,
+      "learning_rate": 0.003,
+      "loss": 4.0232,
+      "step": 12163
+    },
+    {
+      "epoch": 0.12164,
+      "grad_norm": 1.3998298901471347,
+      "learning_rate": 0.003,
+      "loss": 4.0263,
+      "step": 12164
+    },
+    {
+      "epoch": 0.12165,
+      "grad_norm": 0.9344084284754219,
+      "learning_rate": 0.003,
+      "loss": 4.0049,
+      "step": 12165
+    },
+    {
+      "epoch": 0.12166,
+      "grad_norm": 1.512859671545336,
+      "learning_rate": 0.003,
+      "loss": 4.0452,
+      "step": 12166
+    },
+    {
+      "epoch": 0.12167,
+      "grad_norm": 1.1164706886354052,
+      "learning_rate": 0.003,
+      "loss": 4.0225,
+      "step": 12167
+    },
+    {
+      "epoch": 0.12168,
+      "grad_norm": 1.0269895024920574,
+      "learning_rate": 0.003,
+      "loss": 4.0149,
+      "step": 12168
+    },
+    {
+      "epoch": 0.12169,
+      "grad_norm": 1.2437063976530573,
+      "learning_rate": 0.003,
+      "loss": 4.0468,
+      "step": 12169
+    },
+    {
+      "epoch": 0.1217,
+      "grad_norm": 1.1616197418244247,
+      "learning_rate": 0.003,
+      "loss": 4.0134,
+      "step": 12170
+    },
+    {
+      "epoch": 0.12171,
+      "grad_norm": 1.126266794256949,
+      "learning_rate": 0.003,
+      "loss": 4.0121,
+      "step": 12171
+    },
+    {
+      "epoch": 0.12172,
+      "grad_norm": 1.365627633085238,
+      "learning_rate": 0.003,
+      "loss": 4.0489,
+      "step": 12172
+    },
+    {
+      "epoch": 0.12173,
+      "grad_norm": 1.09988351709105,
+      "learning_rate": 0.003,
+      "loss": 4.0294,
+      "step": 12173
+    },
+    {
+      "epoch": 0.12174,
+      "grad_norm": 1.199748433735864,
+      "learning_rate": 0.003,
+      "loss": 4.0518,
+      "step": 12174
+    },
+    {
+      "epoch": 0.12175,
+      "grad_norm": 1.088769417635092,
+      "learning_rate": 0.003,
+      "loss": 4.0397,
+      "step": 12175
+    },
+    {
+      "epoch": 0.12176,
+      "grad_norm": 1.2661905434748413,
+      "learning_rate": 0.003,
+      "loss": 4.0171,
+      "step": 12176
+    },
+    {
+      "epoch": 0.12177,
+      "grad_norm": 1.0836807067546872,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 12177
+    },
+    {
+      "epoch": 0.12178,
+      "grad_norm": 1.2934576723672395,
+      "learning_rate": 0.003,
+      "loss": 4.0411,
+      "step": 12178
+    },
+    {
+      "epoch": 0.12179,
+      "grad_norm": 1.1831772974220784,
+      "learning_rate": 0.003,
+      "loss": 4.0336,
+      "step": 12179
+    },
+    {
+      "epoch": 0.1218,
+      "grad_norm": 1.1747496433726747,
+      "learning_rate": 0.003,
+      "loss": 4.0085,
+      "step": 12180
+    },
+    {
+      "epoch": 0.12181,
+      "grad_norm": 1.3179604690170197,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 12181
+    },
+    {
+      "epoch": 0.12182,
+      "grad_norm": 1.135089293334139,
+      "learning_rate": 0.003,
+      "loss": 4.031,
+      "step": 12182
+    },
+    {
+      "epoch": 0.12183,
+      "grad_norm": 1.3221372241316014,
+      "learning_rate": 0.003,
+      "loss": 3.9976,
+      "step": 12183
+    },
+    {
+      "epoch": 0.12184,
+      "grad_norm": 1.0124457856344928,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 12184
+    },
+    {
+      "epoch": 0.12185,
+      "grad_norm": 1.3589982758664614,
+      "learning_rate": 0.003,
+      "loss": 4.019,
+      "step": 12185
+    },
+    {
+      "epoch": 0.12186,
+      "grad_norm": 1.0233570129876237,
+      "learning_rate": 0.003,
+      "loss": 4.0139,
+      "step": 12186
+    },
+    {
+      "epoch": 0.12187,
+      "grad_norm": 1.3079895671489778,
+      "learning_rate": 0.003,
+      "loss": 4.0359,
+      "step": 12187
+    },
+    {
+      "epoch": 0.12188,
+      "grad_norm": 1.1489865721931825,
+      "learning_rate": 0.003,
+      "loss": 4.0324,
+      "step": 12188
+    },
+    {
+      "epoch": 0.12189,
+      "grad_norm": 1.2412768268410446,
+      "learning_rate": 0.003,
+      "loss": 4.0204,
+      "step": 12189
+    },
+    {
+      "epoch": 0.1219,
+      "grad_norm": 1.114513378698603,
+      "learning_rate": 0.003,
+      "loss": 4.0324,
+      "step": 12190
+    },
+    {
+      "epoch": 0.12191,
+      "grad_norm": 1.3362592660486117,
+      "learning_rate": 0.003,
+      "loss": 4.0507,
+      "step": 12191
+    },
+    {
+      "epoch": 0.12192,
+      "grad_norm": 1.0692297451970725,
+      "learning_rate": 0.003,
+      "loss": 4.0442,
+      "step": 12192
+    },
+    {
+      "epoch": 0.12193,
+      "grad_norm": 1.2465778007234973,
+      "learning_rate": 0.003,
+      "loss": 4.0504,
+      "step": 12193
+    },
+    {
+      "epoch": 0.12194,
+      "grad_norm": 1.1092003193835691,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 12194
+    },
+    {
+      "epoch": 0.12195,
+      "grad_norm": 1.1696708395236086,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 12195
+    },
+    {
+      "epoch": 0.12196,
+      "grad_norm": 1.1112321590252383,
+      "learning_rate": 0.003,
+      "loss": 4.0079,
+      "step": 12196
+    },
+    {
+      "epoch": 0.12197,
+      "grad_norm": 1.2841076485330796,
+      "learning_rate": 0.003,
+      "loss": 4.0057,
+      "step": 12197
+    },
+    {
+      "epoch": 0.12198,
+      "grad_norm": 1.107702913323641,
+      "learning_rate": 0.003,
+      "loss": 4.0272,
+      "step": 12198
+    },
+    {
+      "epoch": 0.12199,
+      "grad_norm": 1.1836188082240122,
+      "learning_rate": 0.003,
+      "loss": 4.0352,
+      "step": 12199
+    },
+    {
+      "epoch": 0.122,
+      "grad_norm": 1.0702302603416496,
+      "learning_rate": 0.003,
+      "loss": 4.025,
+      "step": 12200
+    },
+    {
+      "epoch": 0.12201,
+      "grad_norm": 1.357237574537661,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 12201
+    },
+    {
+      "epoch": 0.12202,
+      "grad_norm": 1.107067299040779,
+      "learning_rate": 0.003,
+      "loss": 4.0361,
+      "step": 12202
+    },
+    {
+      "epoch": 0.12203,
+      "grad_norm": 1.2133228664945446,
+      "learning_rate": 0.003,
+      "loss": 4.0439,
+      "step": 12203
+    },
+    {
+      "epoch": 0.12204,
+      "grad_norm": 1.122185688848671,
+      "learning_rate": 0.003,
+      "loss": 4.0459,
+      "step": 12204
+    },
+    {
+      "epoch": 0.12205,
+      "grad_norm": 1.2378973407718126,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 12205
+    },
+    {
+      "epoch": 0.12206,
+      "grad_norm": 1.0146025080029806,
+      "learning_rate": 0.003,
+      "loss": 3.9975,
+      "step": 12206
+    },
+    {
+      "epoch": 0.12207,
+      "grad_norm": 1.2371951440426439,
+      "learning_rate": 0.003,
+      "loss": 4.0672,
+      "step": 12207
+    },
+    {
+      "epoch": 0.12208,
+      "grad_norm": 1.1941790413272386,
+      "learning_rate": 0.003,
+      "loss": 4.0389,
+      "step": 12208
+    },
+    {
+      "epoch": 0.12209,
+      "grad_norm": 1.237443502265726,
+      "learning_rate": 0.003,
+      "loss": 4.0269,
+      "step": 12209
+    },
+    {
+      "epoch": 0.1221,
+      "grad_norm": 1.1191757624817986,
+      "learning_rate": 0.003,
+      "loss": 4.0222,
+      "step": 12210
+    },
+    {
+      "epoch": 0.12211,
+      "grad_norm": 1.6015066005243488,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 12211
+    },
+    {
+      "epoch": 0.12212,
+      "grad_norm": 1.0912122851858126,
+      "learning_rate": 0.003,
+      "loss": 4.0254,
+      "step": 12212
+    },
+    {
+      "epoch": 0.12213,
+      "grad_norm": 1.22523715982098,
+      "learning_rate": 0.003,
+      "loss": 4.0407,
+      "step": 12213
+    },
+    {
+      "epoch": 0.12214,
+      "grad_norm": 1.025716267326302,
+      "learning_rate": 0.003,
+      "loss": 4.0315,
+      "step": 12214
+    },
+    {
+      "epoch": 0.12215,
+      "grad_norm": 1.1976040863355408,
+      "learning_rate": 0.003,
+      "loss": 4.0502,
+      "step": 12215
+    },
+    {
+      "epoch": 0.12216,
+      "grad_norm": 0.965569302971212,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 12216
+    },
+    {
+      "epoch": 0.12217,
+      "grad_norm": 1.1391293564356433,
+      "learning_rate": 0.003,
+      "loss": 4.0104,
+      "step": 12217
+    },
+    {
+      "epoch": 0.12218,
+      "grad_norm": 1.0031823383720437,
+      "learning_rate": 0.003,
+      "loss": 4.0014,
+      "step": 12218
+    },
+    {
+      "epoch": 0.12219,
+      "grad_norm": 1.174272945089118,
+      "learning_rate": 0.003,
+      "loss": 4.0498,
+      "step": 12219
+    },
+    {
+      "epoch": 0.1222,
+      "grad_norm": 1.1436677312129953,
+      "learning_rate": 0.003,
+      "loss": 4.0312,
+      "step": 12220
+    },
+    {
+      "epoch": 0.12221,
+      "grad_norm": 1.2898099150708564,
+      "learning_rate": 0.003,
+      "loss": 4.0328,
+      "step": 12221
+    },
+    {
+      "epoch": 0.12222,
+      "grad_norm": 1.09890346663295,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 12222
+    },
+    {
+      "epoch": 0.12223,
+      "grad_norm": 1.3194243732788846,
+      "learning_rate": 0.003,
+      "loss": 4.0401,
+      "step": 12223
+    },
+    {
+      "epoch": 0.12224,
+      "grad_norm": 1.1829019105273342,
+      "learning_rate": 0.003,
+      "loss": 4.0141,
+      "step": 12224
+    },
+    {
+      "epoch": 0.12225,
+      "grad_norm": 0.9974911798449131,
+      "learning_rate": 0.003,
+      "loss": 4.0245,
+      "step": 12225
+    },
+    {
+      "epoch": 0.12226,
+      "grad_norm": 1.2045148338615028,
+      "learning_rate": 0.003,
+      "loss": 4.0344,
+      "step": 12226
+    },
+    {
+      "epoch": 0.12227,
+      "grad_norm": 1.2757758918468345,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 12227
+    },
+    {
+      "epoch": 0.12228,
+      "grad_norm": 1.3030462989555127,
+      "learning_rate": 0.003,
+      "loss": 4.0033,
+      "step": 12228
+    },
+    {
+      "epoch": 0.12229,
+      "grad_norm": 1.1447965290278583,
+      "learning_rate": 0.003,
+      "loss": 4.0303,
+      "step": 12229
+    },
+    {
+      "epoch": 0.1223,
+      "grad_norm": 1.1830532774411873,
+      "learning_rate": 0.003,
+      "loss": 4.0573,
+      "step": 12230
+    },
+    {
+      "epoch": 0.12231,
+      "grad_norm": 0.9927189165650173,
+      "learning_rate": 0.003,
+      "loss": 4.0263,
+      "step": 12231
+    },
+    {
+      "epoch": 0.12232,
+      "grad_norm": 1.1343637660969617,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 12232
+    },
+    {
+      "epoch": 0.12233,
+      "grad_norm": 1.2339551740369135,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 12233
+    },
+    {
+      "epoch": 0.12234,
+      "grad_norm": 1.2153175541284331,
+      "learning_rate": 0.003,
+      "loss": 4.0579,
+      "step": 12234
+    },
+    {
+      "epoch": 0.12235,
+      "grad_norm": 1.2112370667362282,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 12235
+    },
+    {
+      "epoch": 0.12236,
+      "grad_norm": 1.2680813133769673,
+      "learning_rate": 0.003,
+      "loss": 4.0545,
+      "step": 12236
+    },
+    {
+      "epoch": 0.12237,
+      "grad_norm": 1.2440893879161208,
+      "learning_rate": 0.003,
+      "loss": 4.0259,
+      "step": 12237
+    },
+    {
+      "epoch": 0.12238,
+      "grad_norm": 1.0938588058052665,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 12238
+    },
+    {
+      "epoch": 0.12239,
+      "grad_norm": 1.3079644744847825,
+      "learning_rate": 0.003,
+      "loss": 4.0436,
+      "step": 12239
+    },
+    {
+      "epoch": 0.1224,
+      "grad_norm": 1.2032893752750062,
+      "learning_rate": 0.003,
+      "loss": 4.0477,
+      "step": 12240
+    },
+    {
+      "epoch": 0.12241,
+      "grad_norm": 1.5263423425857818,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 12241
+    },
+    {
+      "epoch": 0.12242,
+      "grad_norm": 1.1123122789832365,
+      "learning_rate": 0.003,
+      "loss": 4.0344,
+      "step": 12242
+    },
+    {
+      "epoch": 0.12243,
+      "grad_norm": 1.0556139683461747,
+      "learning_rate": 0.003,
+      "loss": 4.028,
+      "step": 12243
+    },
+    {
+      "epoch": 0.12244,
+      "grad_norm": 1.3289277594180184,
+      "learning_rate": 0.003,
+      "loss": 4.049,
+      "step": 12244
+    },
+    {
+      "epoch": 0.12245,
+      "grad_norm": 1.0073269421814348,
+      "learning_rate": 0.003,
+      "loss": 4.0476,
+      "step": 12245
+    },
+    {
+      "epoch": 0.12246,
+      "grad_norm": 1.4046094869371852,
+      "learning_rate": 0.003,
+      "loss": 4.03,
+      "step": 12246
+    },
+    {
+      "epoch": 0.12247,
+      "grad_norm": 1.051725583525556,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 12247
+    },
+    {
+      "epoch": 0.12248,
+      "grad_norm": 1.2368654496027767,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 12248
+    },
+    {
+      "epoch": 0.12249,
+      "grad_norm": 1.1041322243002467,
+      "learning_rate": 0.003,
+      "loss": 3.9916,
+      "step": 12249
+    },
+    {
+      "epoch": 0.1225,
+      "grad_norm": 1.2524463553074452,
+      "learning_rate": 0.003,
+      "loss": 4.0454,
+      "step": 12250
+    },
+    {
+      "epoch": 0.12251,
+      "grad_norm": 1.1622632606319239,
+      "learning_rate": 0.003,
+      "loss": 4.0482,
+      "step": 12251
+    },
+    {
+      "epoch": 0.12252,
+      "grad_norm": 1.3179849114848947,
+      "learning_rate": 0.003,
+      "loss": 4.0337,
+      "step": 12252
+    },
+    {
+      "epoch": 0.12253,
+      "grad_norm": 1.0090202086355522,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 12253
+    },
+    {
+      "epoch": 0.12254,
+      "grad_norm": 1.3230018748827679,
+      "learning_rate": 0.003,
+      "loss": 4.0305,
+      "step": 12254
+    },
+    {
+      "epoch": 0.12255,
+      "grad_norm": 1.040049687066231,
+      "learning_rate": 0.003,
+      "loss": 4.0225,
+      "step": 12255
+    },
+    {
+      "epoch": 0.12256,
+      "grad_norm": 1.390916117154564,
+      "learning_rate": 0.003,
+      "loss": 4.0284,
+      "step": 12256
+    },
+    {
+      "epoch": 0.12257,
+      "grad_norm": 1.1859129806624236,
+      "learning_rate": 0.003,
+      "loss": 4.0531,
+      "step": 12257
+    },
+    {
+      "epoch": 0.12258,
+      "grad_norm": 1.0671361782302644,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 12258
+    },
+    {
+      "epoch": 0.12259,
+      "grad_norm": 1.2161759605513203,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 12259
+    },
+    {
+      "epoch": 0.1226,
+      "grad_norm": 1.2804176653736112,
+      "learning_rate": 0.003,
+      "loss": 4.0157,
+      "step": 12260
+    },
+    {
+      "epoch": 0.12261,
+      "grad_norm": 0.983275306429123,
+      "learning_rate": 0.003,
+      "loss": 4.0096,
+      "step": 12261
+    },
+    {
+      "epoch": 0.12262,
+      "grad_norm": 1.2709090680095843,
+      "learning_rate": 0.003,
+      "loss": 4.0093,
+      "step": 12262
+    },
+    {
+      "epoch": 0.12263,
+      "grad_norm": 1.1752535790764227,
+      "learning_rate": 0.003,
+      "loss": 3.9997,
+      "step": 12263
+    },
+    {
+      "epoch": 0.12264,
+      "grad_norm": 1.276868126228694,
+      "learning_rate": 0.003,
+      "loss": 4.0381,
+      "step": 12264
+    },
+    {
+      "epoch": 0.12265,
+      "grad_norm": 1.1411677072819246,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 12265
+    },
+    {
+      "epoch": 0.12266,
+      "grad_norm": 1.0902306136530755,
+      "learning_rate": 0.003,
+      "loss": 3.9951,
+      "step": 12266
+    },
+    {
+      "epoch": 0.12267,
+      "grad_norm": 1.3205667988804515,
+      "learning_rate": 0.003,
+      "loss": 4.0464,
+      "step": 12267
+    },
+    {
+      "epoch": 0.12268,
+      "grad_norm": 1.0465548404396692,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 12268
+    },
+    {
+      "epoch": 0.12269,
+      "grad_norm": 1.525340885064841,
+      "learning_rate": 0.003,
+      "loss": 4.024,
+      "step": 12269
+    },
+    {
+      "epoch": 0.1227,
+      "grad_norm": 0.898532904288709,
+      "learning_rate": 0.003,
+      "loss": 4.0318,
+      "step": 12270
+    },
+    {
+      "epoch": 0.12271,
+      "grad_norm": 1.1625001192804503,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 12271
+    },
+    {
+      "epoch": 0.12272,
+      "grad_norm": 1.2506428969604266,
+      "learning_rate": 0.003,
+      "loss": 3.9987,
+      "step": 12272
+    },
+    {
+      "epoch": 0.12273,
+      "grad_norm": 1.1400665488880517,
+      "learning_rate": 0.003,
+      "loss": 4.032,
+      "step": 12273
+    },
+    {
+      "epoch": 0.12274,
+      "grad_norm": 1.4807402303846313,
+      "learning_rate": 0.003,
+      "loss": 4.0154,
+      "step": 12274
+    },
+    {
+      "epoch": 0.12275,
+      "grad_norm": 0.9718032014863593,
+      "learning_rate": 0.003,
+      "loss": 4.0499,
+      "step": 12275
+    },
+    {
+      "epoch": 0.12276,
+      "grad_norm": 1.1004371990631832,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 12276
+    },
+    {
+      "epoch": 0.12277,
+      "grad_norm": 1.2316751692589583,
+      "learning_rate": 0.003,
+      "loss": 4.0358,
+      "step": 12277
+    },
+    {
+      "epoch": 0.12278,
+      "grad_norm": 1.2692996495372502,
+      "learning_rate": 0.003,
+      "loss": 4.0421,
+      "step": 12278
+    },
+    {
+      "epoch": 0.12279,
+      "grad_norm": 1.1298346790675529,
+      "learning_rate": 0.003,
+      "loss": 3.9906,
+      "step": 12279
+    },
+    {
+      "epoch": 0.1228,
+      "grad_norm": 1.0241566953582193,
+      "learning_rate": 0.003,
+      "loss": 4.0338,
+      "step": 12280
+    },
+    {
+      "epoch": 0.12281,
+      "grad_norm": 1.2259128020142214,
+      "learning_rate": 0.003,
+      "loss": 4.0237,
+      "step": 12281
+    },
+    {
+      "epoch": 0.12282,
+      "grad_norm": 1.1727117627822834,
+      "learning_rate": 0.003,
+      "loss": 4.056,
+      "step": 12282
+    },
+    {
+      "epoch": 0.12283,
+      "grad_norm": 1.4802551869138976,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 12283
+    },
+    {
+      "epoch": 0.12284,
+      "grad_norm": 0.8562657072581173,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 12284
+    },
+    {
+      "epoch": 0.12285,
+      "grad_norm": 1.143034764039563,
+      "learning_rate": 0.003,
+      "loss": 4.0338,
+      "step": 12285
+    },
+    {
+      "epoch": 0.12286,
+      "grad_norm": 1.2625712453206883,
+      "learning_rate": 0.003,
+      "loss": 4.0348,
+      "step": 12286
+    },
+    {
+      "epoch": 0.12287,
+      "grad_norm": 1.0830382002896604,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 12287
+    },
+    {
+      "epoch": 0.12288,
+      "grad_norm": 1.1182157999246587,
+      "learning_rate": 0.003,
+      "loss": 4.0481,
+      "step": 12288
+    },
+    {
+      "epoch": 0.12289,
+      "grad_norm": 0.9493864192646565,
+      "learning_rate": 0.003,
+      "loss": 3.9938,
+      "step": 12289
+    },
+    {
+      "epoch": 0.1229,
+      "grad_norm": 1.1563869494269485,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 12290
+    },
+    {
+      "epoch": 0.12291,
+      "grad_norm": 1.2541160596246155,
+      "learning_rate": 0.003,
+      "loss": 4.0336,
+      "step": 12291
+    },
+    {
+      "epoch": 0.12292,
+      "grad_norm": 1.2248213033814566,
+      "learning_rate": 0.003,
+      "loss": 4.0204,
+      "step": 12292
+    },
+    {
+      "epoch": 0.12293,
+      "grad_norm": 1.2000885923117754,
+      "learning_rate": 0.003,
+      "loss": 4.0457,
+      "step": 12293
+    },
+    {
+      "epoch": 0.12294,
+      "grad_norm": 1.0981859496339343,
+      "learning_rate": 0.003,
+      "loss": 4.0408,
+      "step": 12294
+    },
+    {
+      "epoch": 0.12295,
+      "grad_norm": 1.1575443564037111,
+      "learning_rate": 0.003,
+      "loss": 4.0399,
+      "step": 12295
+    },
+    {
+      "epoch": 0.12296,
+      "grad_norm": 1.301359422856533,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 12296
+    },
+    {
+      "epoch": 0.12297,
+      "grad_norm": 1.2514385607651537,
+      "learning_rate": 0.003,
+      "loss": 4.0232,
+      "step": 12297
+    },
+    {
+      "epoch": 0.12298,
+      "grad_norm": 1.273736329357012,
+      "learning_rate": 0.003,
+      "loss": 4.0428,
+      "step": 12298
+    },
+    {
+      "epoch": 0.12299,
+      "grad_norm": 1.3886346632204654,
+      "learning_rate": 0.003,
+      "loss": 4.0228,
+      "step": 12299
+    },
+    {
+      "epoch": 0.123,
+      "grad_norm": 1.0872428215026426,
+      "learning_rate": 0.003,
+      "loss": 4.0385,
+      "step": 12300
+    },
+    {
+      "epoch": 0.12301,
+      "grad_norm": 1.370513264865828,
+      "learning_rate": 0.003,
+      "loss": 4.0523,
+      "step": 12301
+    },
+    {
+      "epoch": 0.12302,
+      "grad_norm": 0.8505983228963824,
+      "learning_rate": 0.003,
+      "loss": 4.0351,
+      "step": 12302
+    },
+    {
+      "epoch": 0.12303,
+      "grad_norm": 0.9457348435365074,
+      "learning_rate": 0.003,
+      "loss": 4.0838,
+      "step": 12303
+    },
+    {
+      "epoch": 0.12304,
+      "grad_norm": 0.9951681172464344,
+      "learning_rate": 0.003,
+      "loss": 4.0026,
+      "step": 12304
+    },
+    {
+      "epoch": 0.12305,
+      "grad_norm": 1.0681593913605725,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 12305
+    },
+    {
+      "epoch": 0.12306,
+      "grad_norm": 1.2916709766050543,
+      "learning_rate": 0.003,
+      "loss": 4.0266,
+      "step": 12306
+    },
+    {
+      "epoch": 0.12307,
+      "grad_norm": 1.2292127687463348,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 12307
+    },
+    {
+      "epoch": 0.12308,
+      "grad_norm": 1.139218468833114,
+      "learning_rate": 0.003,
+      "loss": 4.0413,
+      "step": 12308
+    },
+    {
+      "epoch": 0.12309,
+      "grad_norm": 1.2485231059799966,
+      "learning_rate": 0.003,
+      "loss": 4.0233,
+      "step": 12309
+    },
+    {
+      "epoch": 0.1231,
+      "grad_norm": 1.285310561114578,
+      "learning_rate": 0.003,
+      "loss": 4.0303,
+      "step": 12310
+    },
+    {
+      "epoch": 0.12311,
+      "grad_norm": 1.2794706200625143,
+      "learning_rate": 0.003,
+      "loss": 4.0107,
+      "step": 12311
+    },
+    {
+      "epoch": 0.12312,
+      "grad_norm": 1.0817853226112992,
+      "learning_rate": 0.003,
+      "loss": 4.0403,
+      "step": 12312
+    },
+    {
+      "epoch": 0.12313,
+      "grad_norm": 1.4834866026044042,
+      "learning_rate": 0.003,
+      "loss": 4.0553,
+      "step": 12313
+    },
+    {
+      "epoch": 0.12314,
+      "grad_norm": 0.9875551326088596,
+      "learning_rate": 0.003,
+      "loss": 4.016,
+      "step": 12314
+    },
+    {
+      "epoch": 0.12315,
+      "grad_norm": 1.4829417421206113,
+      "learning_rate": 0.003,
+      "loss": 4.0554,
+      "step": 12315
+    },
+    {
+      "epoch": 0.12316,
+      "grad_norm": 1.0408210412276793,
+      "learning_rate": 0.003,
+      "loss": 4.0352,
+      "step": 12316
+    },
+    {
+      "epoch": 0.12317,
+      "grad_norm": 1.4264169348104334,
+      "learning_rate": 0.003,
+      "loss": 4.0017,
+      "step": 12317
+    },
+    {
+      "epoch": 0.12318,
+      "grad_norm": 1.0989074999876036,
+      "learning_rate": 0.003,
+      "loss": 4.0484,
+      "step": 12318
+    },
+    {
+      "epoch": 0.12319,
+      "grad_norm": 1.2243058952350059,
+      "learning_rate": 0.003,
+      "loss": 4.0332,
+      "step": 12319
+    },
+    {
+      "epoch": 0.1232,
+      "grad_norm": 1.0845777556091951,
+      "learning_rate": 0.003,
+      "loss": 4.0477,
+      "step": 12320
+    },
+    {
+      "epoch": 0.12321,
+      "grad_norm": 1.2109554941735854,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 12321
+    },
+    {
+      "epoch": 0.12322,
+      "grad_norm": 1.0902149341547924,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 12322
+    },
+    {
+      "epoch": 0.12323,
+      "grad_norm": 1.207304856681732,
+      "learning_rate": 0.003,
+      "loss": 4.0228,
+      "step": 12323
+    },
+    {
+      "epoch": 0.12324,
+      "grad_norm": 1.1095092917775558,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 12324
+    },
+    {
+      "epoch": 0.12325,
+      "grad_norm": 1.2357130353809473,
+      "learning_rate": 0.003,
+      "loss": 3.9895,
+      "step": 12325
+    },
+    {
+      "epoch": 0.12326,
+      "grad_norm": 1.0963499157957226,
+      "learning_rate": 0.003,
+      "loss": 4.019,
+      "step": 12326
+    },
+    {
+      "epoch": 0.12327,
+      "grad_norm": 1.340824324855678,
+      "learning_rate": 0.003,
+      "loss": 3.999,
+      "step": 12327
+    },
+    {
+      "epoch": 0.12328,
+      "grad_norm": 1.2449364637709306,
+      "learning_rate": 0.003,
+      "loss": 3.9928,
+      "step": 12328
+    },
+    {
+      "epoch": 0.12329,
+      "grad_norm": 1.1876948176946969,
+      "learning_rate": 0.003,
+      "loss": 4.0416,
+      "step": 12329
+    },
+    {
+      "epoch": 0.1233,
+      "grad_norm": 1.1120658251348128,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 12330
+    },
+    {
+      "epoch": 0.12331,
+      "grad_norm": 1.2766533808324558,
+      "learning_rate": 0.003,
+      "loss": 4.0154,
+      "step": 12331
+    },
+    {
+      "epoch": 0.12332,
+      "grad_norm": 1.0419909057814578,
+      "learning_rate": 0.003,
+      "loss": 4.0039,
+      "step": 12332
+    },
+    {
+      "epoch": 0.12333,
+      "grad_norm": 1.2900410395289634,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 12333
+    },
+    {
+      "epoch": 0.12334,
+      "grad_norm": 1.194625300357013,
+      "learning_rate": 0.003,
+      "loss": 4.0299,
+      "step": 12334
+    },
+    {
+      "epoch": 0.12335,
+      "grad_norm": 1.3193993205603087,
+      "learning_rate": 0.003,
+      "loss": 4.0244,
+      "step": 12335
+    },
+    {
+      "epoch": 0.12336,
+      "grad_norm": 1.341608532912959,
+      "learning_rate": 0.003,
+      "loss": 4.0294,
+      "step": 12336
+    },
+    {
+      "epoch": 0.12337,
+      "grad_norm": 1.1486692164623977,
+      "learning_rate": 0.003,
+      "loss": 4.0224,
+      "step": 12337
+    },
+    {
+      "epoch": 0.12338,
+      "grad_norm": 1.2242326996905912,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 12338
+    },
+    {
+      "epoch": 0.12339,
+      "grad_norm": 1.0095451474374366,
+      "learning_rate": 0.003,
+      "loss": 4.0213,
+      "step": 12339
+    },
+    {
+      "epoch": 0.1234,
+      "grad_norm": 1.3942157585638366,
+      "learning_rate": 0.003,
+      "loss": 4.0506,
+      "step": 12340
+    },
+    {
+      "epoch": 0.12341,
+      "grad_norm": 0.9436108135164665,
+      "learning_rate": 0.003,
+      "loss": 4.0299,
+      "step": 12341
+    },
+    {
+      "epoch": 0.12342,
+      "grad_norm": 1.1302393729306983,
+      "learning_rate": 0.003,
+      "loss": 4.0075,
+      "step": 12342
+    },
+    {
+      "epoch": 0.12343,
+      "grad_norm": 1.3295607670812282,
+      "learning_rate": 0.003,
+      "loss": 4.0126,
+      "step": 12343
+    },
+    {
+      "epoch": 0.12344,
+      "grad_norm": 1.194555142662113,
+      "learning_rate": 0.003,
+      "loss": 4.0273,
+      "step": 12344
+    },
+    {
+      "epoch": 0.12345,
+      "grad_norm": 1.3101897952776536,
+      "learning_rate": 0.003,
+      "loss": 3.9911,
+      "step": 12345
+    },
+    {
+      "epoch": 0.12346,
+      "grad_norm": 0.9886687025758016,
+      "learning_rate": 0.003,
+      "loss": 4.0022,
+      "step": 12346
+    },
+    {
+      "epoch": 0.12347,
+      "grad_norm": 1.048339053066772,
+      "learning_rate": 0.003,
+      "loss": 4.0181,
+      "step": 12347
+    },
+    {
+      "epoch": 0.12348,
+      "grad_norm": 1.384508645610422,
+      "learning_rate": 0.003,
+      "loss": 4.0336,
+      "step": 12348
+    },
+    {
+      "epoch": 0.12349,
+      "grad_norm": 1.0486243460691573,
+      "learning_rate": 0.003,
+      "loss": 4.0099,
+      "step": 12349
+    },
+    {
+      "epoch": 0.1235,
+      "grad_norm": 1.2688673285137406,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 12350
+    },
+    {
+      "epoch": 0.12351,
+      "grad_norm": 0.9688609196568242,
+      "learning_rate": 0.003,
+      "loss": 4.0302,
+      "step": 12351
+    },
+    {
+      "epoch": 0.12352,
+      "grad_norm": 1.3053088708802922,
+      "learning_rate": 0.003,
+      "loss": 4.0181,
+      "step": 12352
+    },
+    {
+      "epoch": 0.12353,
+      "grad_norm": 1.012117558257402,
+      "learning_rate": 0.003,
+      "loss": 4.0029,
+      "step": 12353
+    },
+    {
+      "epoch": 0.12354,
+      "grad_norm": 1.4775072585954483,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 12354
+    },
+    {
+      "epoch": 0.12355,
+      "grad_norm": 0.9673416007518341,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 12355
+    },
+    {
+      "epoch": 0.12356,
+      "grad_norm": 1.1498611705148574,
+      "learning_rate": 0.003,
+      "loss": 4.0129,
+      "step": 12356
+    },
+    {
+      "epoch": 0.12357,
+      "grad_norm": 0.9580734048464713,
+      "learning_rate": 0.003,
+      "loss": 4.0074,
+      "step": 12357
+    },
+    {
+      "epoch": 0.12358,
+      "grad_norm": 1.3371631264382817,
+      "learning_rate": 0.003,
+      "loss": 4.0167,
+      "step": 12358
+    },
+    {
+      "epoch": 0.12359,
+      "grad_norm": 1.139746107919948,
+      "learning_rate": 0.003,
+      "loss": 4.0058,
+      "step": 12359
+    },
+    {
+      "epoch": 0.1236,
+      "grad_norm": 1.2004566124253264,
+      "learning_rate": 0.003,
+      "loss": 4.0098,
+      "step": 12360
+    },
+    {
+      "epoch": 0.12361,
+      "grad_norm": 1.1116903585599232,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 12361
+    },
+    {
+      "epoch": 0.12362,
+      "grad_norm": 1.0685972050756347,
+      "learning_rate": 0.003,
+      "loss": 4.0361,
+      "step": 12362
+    },
+    {
+      "epoch": 0.12363,
+      "grad_norm": 1.0483608668790687,
+      "learning_rate": 0.003,
+      "loss": 4.015,
+      "step": 12363
+    },
+    {
+      "epoch": 0.12364,
+      "grad_norm": 1.4918723329058845,
+      "learning_rate": 0.003,
+      "loss": 4.0238,
+      "step": 12364
+    },
+    {
+      "epoch": 0.12365,
+      "grad_norm": 1.0178238571752845,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 12365
+    },
+    {
+      "epoch": 0.12366,
+      "grad_norm": 1.3030151120524025,
+      "learning_rate": 0.003,
+      "loss": 4.0841,
+      "step": 12366
+    },
+    {
+      "epoch": 0.12367,
+      "grad_norm": 1.141857602951121,
+      "learning_rate": 0.003,
+      "loss": 4.0581,
+      "step": 12367
+    },
+    {
+      "epoch": 0.12368,
+      "grad_norm": 1.2565604375779478,
+      "learning_rate": 0.003,
+      "loss": 4.0365,
+      "step": 12368
+    },
+    {
+      "epoch": 0.12369,
+      "grad_norm": 1.214027777384337,
+      "learning_rate": 0.003,
+      "loss": 4.0504,
+      "step": 12369
+    },
+    {
+      "epoch": 0.1237,
+      "grad_norm": 1.0915755444135937,
+      "learning_rate": 0.003,
+      "loss": 4.0368,
+      "step": 12370
+    },
+    {
+      "epoch": 0.12371,
+      "grad_norm": 1.1867414654709663,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 12371
+    },
+    {
+      "epoch": 0.12372,
+      "grad_norm": 1.0546431117703352,
+      "learning_rate": 0.003,
+      "loss": 4.0242,
+      "step": 12372
+    },
+    {
+      "epoch": 0.12373,
+      "grad_norm": 1.4647953108218288,
+      "learning_rate": 0.003,
+      "loss": 4.0486,
+      "step": 12373
+    },
+    {
+      "epoch": 0.12374,
+      "grad_norm": 1.04762106343006,
+      "learning_rate": 0.003,
+      "loss": 4.0313,
+      "step": 12374
+    },
+    {
+      "epoch": 0.12375,
+      "grad_norm": 1.1963079339111795,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 12375
+    },
+    {
+      "epoch": 0.12376,
+      "grad_norm": 1.1499302069922561,
+      "learning_rate": 0.003,
+      "loss": 4.0487,
+      "step": 12376
+    },
+    {
+      "epoch": 0.12377,
+      "grad_norm": 1.0968516310925092,
+      "learning_rate": 0.003,
+      "loss": 3.9928,
+      "step": 12377
+    },
+    {
+      "epoch": 0.12378,
+      "grad_norm": 1.385819083799469,
+      "learning_rate": 0.003,
+      "loss": 4.0597,
+      "step": 12378
+    },
+    {
+      "epoch": 0.12379,
+      "grad_norm": 1.1485024350100335,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 12379
+    },
+    {
+      "epoch": 0.1238,
+      "grad_norm": 1.0950675066447881,
+      "learning_rate": 0.003,
+      "loss": 4.0368,
+      "step": 12380
+    },
+    {
+      "epoch": 0.12381,
+      "grad_norm": 1.1638382018028037,
+      "learning_rate": 0.003,
+      "loss": 4.031,
+      "step": 12381
+    },
+    {
+      "epoch": 0.12382,
+      "grad_norm": 1.129457787007263,
+      "learning_rate": 0.003,
+      "loss": 4.0191,
+      "step": 12382
+    },
+    {
+      "epoch": 0.12383,
+      "grad_norm": 1.1036814048890862,
+      "learning_rate": 0.003,
+      "loss": 4.004,
+      "step": 12383
+    },
+    {
+      "epoch": 0.12384,
+      "grad_norm": 1.2376100778668477,
+      "learning_rate": 0.003,
+      "loss": 4.0321,
+      "step": 12384
+    },
+    {
+      "epoch": 0.12385,
+      "grad_norm": 1.1084913141813695,
+      "learning_rate": 0.003,
+      "loss": 4.0068,
+      "step": 12385
+    },
+    {
+      "epoch": 0.12386,
+      "grad_norm": 1.178199736592679,
+      "learning_rate": 0.003,
+      "loss": 4.0294,
+      "step": 12386
+    },
+    {
+      "epoch": 0.12387,
+      "grad_norm": 1.1779043826583042,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 12387
+    },
+    {
+      "epoch": 0.12388,
+      "grad_norm": 1.2916996558932137,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 12388
+    },
+    {
+      "epoch": 0.12389,
+      "grad_norm": 1.0404310442375322,
+      "learning_rate": 0.003,
+      "loss": 4.0458,
+      "step": 12389
+    },
+    {
+      "epoch": 0.1239,
+      "grad_norm": 1.2897160473158022,
+      "learning_rate": 0.003,
+      "loss": 4.0632,
+      "step": 12390
+    },
+    {
+      "epoch": 0.12391,
+      "grad_norm": 1.1756647311322306,
+      "learning_rate": 0.003,
+      "loss": 3.9902,
+      "step": 12391
+    },
+    {
+      "epoch": 0.12392,
+      "grad_norm": 1.1316623880446204,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 12392
+    },
+    {
+      "epoch": 0.12393,
+      "grad_norm": 1.238766697242648,
+      "learning_rate": 0.003,
+      "loss": 4.0383,
+      "step": 12393
+    },
+    {
+      "epoch": 0.12394,
+      "grad_norm": 1.088911073223708,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 12394
+    },
+    {
+      "epoch": 0.12395,
+      "grad_norm": 1.1453594979436852,
+      "learning_rate": 0.003,
+      "loss": 3.9919,
+      "step": 12395
+    },
+    {
+      "epoch": 0.12396,
+      "grad_norm": 1.2257856525238398,
+      "learning_rate": 0.003,
+      "loss": 4.0459,
+      "step": 12396
+    },
+    {
+      "epoch": 0.12397,
+      "grad_norm": 1.1932329492927123,
+      "learning_rate": 0.003,
+      "loss": 4.0366,
+      "step": 12397
+    },
+    {
+      "epoch": 0.12398,
+      "grad_norm": 1.263429821271588,
+      "learning_rate": 0.003,
+      "loss": 4.0542,
+      "step": 12398
+    },
+    {
+      "epoch": 0.12399,
+      "grad_norm": 1.1410816377835378,
+      "learning_rate": 0.003,
+      "loss": 4.0549,
+      "step": 12399
+    },
+    {
+      "epoch": 0.124,
+      "grad_norm": 1.2684277262143193,
+      "learning_rate": 0.003,
+      "loss": 4.0717,
+      "step": 12400
+    },
+    {
+      "epoch": 0.12401,
+      "grad_norm": 1.3163035517059634,
+      "learning_rate": 0.003,
+      "loss": 4.0104,
+      "step": 12401
+    },
+    {
+      "epoch": 0.12402,
+      "grad_norm": 1.0710840595995579,
+      "learning_rate": 0.003,
+      "loss": 4.0345,
+      "step": 12402
+    },
+    {
+      "epoch": 0.12403,
+      "grad_norm": 1.3799174340236744,
+      "learning_rate": 0.003,
+      "loss": 4.0234,
+      "step": 12403
+    },
+    {
+      "epoch": 0.12404,
+      "grad_norm": 0.9632650030245177,
+      "learning_rate": 0.003,
+      "loss": 4.0224,
+      "step": 12404
+    },
+    {
+      "epoch": 0.12405,
+      "grad_norm": 1.5065254113151083,
+      "learning_rate": 0.003,
+      "loss": 4.0412,
+      "step": 12405
+    },
+    {
+      "epoch": 0.12406,
+      "grad_norm": 1.2014138932290637,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 12406
+    },
+    {
+      "epoch": 0.12407,
+      "grad_norm": 1.3139679781057607,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 12407
+    },
+    {
+      "epoch": 0.12408,
+      "grad_norm": 0.9751484488901144,
+      "learning_rate": 0.003,
+      "loss": 4.0384,
+      "step": 12408
+    },
+    {
+      "epoch": 0.12409,
+      "grad_norm": 1.3002152359013281,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 12409
+    },
+    {
+      "epoch": 0.1241,
+      "grad_norm": 1.0966298279249171,
+      "learning_rate": 0.003,
+      "loss": 4.0436,
+      "step": 12410
+    },
+    {
+      "epoch": 0.12411,
+      "grad_norm": 1.2426091821974081,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 12411
+    },
+    {
+      "epoch": 0.12412,
+      "grad_norm": 1.176010755554183,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 12412
+    },
+    {
+      "epoch": 0.12413,
+      "grad_norm": 1.286184335849363,
+      "learning_rate": 0.003,
+      "loss": 4.0533,
+      "step": 12413
+    },
+    {
+      "epoch": 0.12414,
+      "grad_norm": 1.1895946884822244,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 12414
+    },
+    {
+      "epoch": 0.12415,
+      "grad_norm": 1.1707059442835113,
+      "learning_rate": 0.003,
+      "loss": 4.0266,
+      "step": 12415
+    },
+    {
+      "epoch": 0.12416,
+      "grad_norm": 0.9363912878676401,
+      "learning_rate": 0.003,
+      "loss": 4.0436,
+      "step": 12416
+    },
+    {
+      "epoch": 0.12417,
+      "grad_norm": 1.046953318675986,
+      "learning_rate": 0.003,
+      "loss": 4.04,
+      "step": 12417
+    },
+    {
+      "epoch": 0.12418,
+      "grad_norm": 1.317720438395889,
+      "learning_rate": 0.003,
+      "loss": 4.0343,
+      "step": 12418
+    },
+    {
+      "epoch": 0.12419,
+      "grad_norm": 1.0657548477063905,
+      "learning_rate": 0.003,
+      "loss": 4.0075,
+      "step": 12419
+    },
+    {
+      "epoch": 0.1242,
+      "grad_norm": 1.1089272329972197,
+      "learning_rate": 0.003,
+      "loss": 3.9914,
+      "step": 12420
+    },
+    {
+      "epoch": 0.12421,
+      "grad_norm": 1.3485884933715704,
+      "learning_rate": 0.003,
+      "loss": 4.0574,
+      "step": 12421
+    },
+    {
+      "epoch": 0.12422,
+      "grad_norm": 1.222601076789822,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 12422
+    },
+    {
+      "epoch": 0.12423,
+      "grad_norm": 1.3597254489602004,
+      "learning_rate": 0.003,
+      "loss": 4.0397,
+      "step": 12423
+    },
+    {
+      "epoch": 0.12424,
+      "grad_norm": 1.1677550597946742,
+      "learning_rate": 0.003,
+      "loss": 3.9886,
+      "step": 12424
+    },
+    {
+      "epoch": 0.12425,
+      "grad_norm": 1.1477029620568557,
+      "learning_rate": 0.003,
+      "loss": 4.0149,
+      "step": 12425
+    },
+    {
+      "epoch": 0.12426,
+      "grad_norm": 1.2385626889982522,
+      "learning_rate": 0.003,
+      "loss": 4.0116,
+      "step": 12426
+    },
+    {
+      "epoch": 0.12427,
+      "grad_norm": 0.9655680600257277,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 12427
+    },
+    {
+      "epoch": 0.12428,
+      "grad_norm": 1.4020376789614961,
+      "learning_rate": 0.003,
+      "loss": 4.0466,
+      "step": 12428
+    },
+    {
+      "epoch": 0.12429,
+      "grad_norm": 1.1747627795440823,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 12429
+    },
+    {
+      "epoch": 0.1243,
+      "grad_norm": 1.3579254765229938,
+      "learning_rate": 0.003,
+      "loss": 4.0403,
+      "step": 12430
+    },
+    {
+      "epoch": 0.12431,
+      "grad_norm": 0.9294601734714742,
+      "learning_rate": 0.003,
+      "loss": 4.0115,
+      "step": 12431
+    },
+    {
+      "epoch": 0.12432,
+      "grad_norm": 1.2390178128721387,
+      "learning_rate": 0.003,
+      "loss": 4.0376,
+      "step": 12432
+    },
+    {
+      "epoch": 0.12433,
+      "grad_norm": 0.9563357941981651,
+      "learning_rate": 0.003,
+      "loss": 4.0353,
+      "step": 12433
+    },
+    {
+      "epoch": 0.12434,
+      "grad_norm": 1.3376711308452687,
+      "learning_rate": 0.003,
+      "loss": 4.0613,
+      "step": 12434
+    },
+    {
+      "epoch": 0.12435,
+      "grad_norm": 1.0536428112043779,
+      "learning_rate": 0.003,
+      "loss": 4.024,
+      "step": 12435
+    },
+    {
+      "epoch": 0.12436,
+      "grad_norm": 1.30700540627271,
+      "learning_rate": 0.003,
+      "loss": 4.0013,
+      "step": 12436
+    },
+    {
+      "epoch": 0.12437,
+      "grad_norm": 1.1830783159612694,
+      "learning_rate": 0.003,
+      "loss": 4.0056,
+      "step": 12437
+    },
+    {
+      "epoch": 0.12438,
+      "grad_norm": 1.0559360395390067,
+      "learning_rate": 0.003,
+      "loss": 3.9997,
+      "step": 12438
+    },
+    {
+      "epoch": 0.12439,
+      "grad_norm": 1.3789067519353553,
+      "learning_rate": 0.003,
+      "loss": 4.039,
+      "step": 12439
+    },
+    {
+      "epoch": 0.1244,
+      "grad_norm": 1.0241235840308207,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 12440
+    },
+    {
+      "epoch": 0.12441,
+      "grad_norm": 1.3409960385311457,
+      "learning_rate": 0.003,
+      "loss": 4.0049,
+      "step": 12441
+    },
+    {
+      "epoch": 0.12442,
+      "grad_norm": 0.9732899619983509,
+      "learning_rate": 0.003,
+      "loss": 4.0267,
+      "step": 12442
+    },
+    {
+      "epoch": 0.12443,
+      "grad_norm": 1.2486871041590437,
+      "learning_rate": 0.003,
+      "loss": 4.0342,
+      "step": 12443
+    },
+    {
+      "epoch": 0.12444,
+      "grad_norm": 1.2134843237041595,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 12444
+    },
+    {
+      "epoch": 0.12445,
+      "grad_norm": 1.269281860860668,
+      "learning_rate": 0.003,
+      "loss": 4.0355,
+      "step": 12445
+    },
+    {
+      "epoch": 0.12446,
+      "grad_norm": 1.3188318275217783,
+      "learning_rate": 0.003,
+      "loss": 4.0336,
+      "step": 12446
+    },
+    {
+      "epoch": 0.12447,
+      "grad_norm": 1.1481372478074385,
+      "learning_rate": 0.003,
+      "loss": 4.0415,
+      "step": 12447
+    },
+    {
+      "epoch": 0.12448,
+      "grad_norm": 1.2369333239119498,
+      "learning_rate": 0.003,
+      "loss": 4.0351,
+      "step": 12448
+    },
+    {
+      "epoch": 0.12449,
+      "grad_norm": 1.2694235265392477,
+      "learning_rate": 0.003,
+      "loss": 4.0561,
+      "step": 12449
+    },
+    {
+      "epoch": 0.1245,
+      "grad_norm": 1.1317858601417061,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 12450
+    },
+    {
+      "epoch": 0.12451,
+      "grad_norm": 1.074137700501863,
+      "learning_rate": 0.003,
+      "loss": 3.9826,
+      "step": 12451
+    },
+    {
+      "epoch": 0.12452,
+      "grad_norm": 1.2885266855909137,
+      "learning_rate": 0.003,
+      "loss": 4.0377,
+      "step": 12452
+    },
+    {
+      "epoch": 0.12453,
+      "grad_norm": 1.0396878994563437,
+      "learning_rate": 0.003,
+      "loss": 4.0396,
+      "step": 12453
+    },
+    {
+      "epoch": 0.12454,
+      "grad_norm": 1.4662506221052487,
+      "learning_rate": 0.003,
+      "loss": 4.0021,
+      "step": 12454
+    },
+    {
+      "epoch": 0.12455,
+      "grad_norm": 0.9532945933163989,
+      "learning_rate": 0.003,
+      "loss": 4.0267,
+      "step": 12455
+    },
+    {
+      "epoch": 0.12456,
+      "grad_norm": 1.2584449951050858,
+      "learning_rate": 0.003,
+      "loss": 4.003,
+      "step": 12456
+    },
+    {
+      "epoch": 0.12457,
+      "grad_norm": 1.1411104987476233,
+      "learning_rate": 0.003,
+      "loss": 4.0321,
+      "step": 12457
+    },
+    {
+      "epoch": 0.12458,
+      "grad_norm": 1.1800010265166672,
+      "learning_rate": 0.003,
+      "loss": 4.0057,
+      "step": 12458
+    },
+    {
+      "epoch": 0.12459,
+      "grad_norm": 1.2595512640292814,
+      "learning_rate": 0.003,
+      "loss": 4.0427,
+      "step": 12459
+    },
+    {
+      "epoch": 0.1246,
+      "grad_norm": 1.1244644633947707,
+      "learning_rate": 0.003,
+      "loss": 4.0471,
+      "step": 12460
+    },
+    {
+      "epoch": 0.12461,
+      "grad_norm": 1.4047730844954986,
+      "learning_rate": 0.003,
+      "loss": 4.0294,
+      "step": 12461
+    },
+    {
+      "epoch": 0.12462,
+      "grad_norm": 1.065955962651246,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 12462
+    },
+    {
+      "epoch": 0.12463,
+      "grad_norm": 1.23712644506417,
+      "learning_rate": 0.003,
+      "loss": 4.0204,
+      "step": 12463
+    },
+    {
+      "epoch": 0.12464,
+      "grad_norm": 1.048205712021745,
+      "learning_rate": 0.003,
+      "loss": 4.0327,
+      "step": 12464
+    },
+    {
+      "epoch": 0.12465,
+      "grad_norm": 1.2119882374602933,
+      "learning_rate": 0.003,
+      "loss": 4.0046,
+      "step": 12465
+    },
+    {
+      "epoch": 0.12466,
+      "grad_norm": 1.0136502432256684,
+      "learning_rate": 0.003,
+      "loss": 4.0448,
+      "step": 12466
+    },
+    {
+      "epoch": 0.12467,
+      "grad_norm": 1.3500074196062515,
+      "learning_rate": 0.003,
+      "loss": 4.0398,
+      "step": 12467
+    },
+    {
+      "epoch": 0.12468,
+      "grad_norm": 1.1123455140298126,
+      "learning_rate": 0.003,
+      "loss": 4.0484,
+      "step": 12468
+    },
+    {
+      "epoch": 0.12469,
+      "grad_norm": 1.1752785623116864,
+      "learning_rate": 0.003,
+      "loss": 4.0068,
+      "step": 12469
+    },
+    {
+      "epoch": 0.1247,
+      "grad_norm": 1.0239397030257342,
+      "learning_rate": 0.003,
+      "loss": 3.994,
+      "step": 12470
+    },
+    {
+      "epoch": 0.12471,
+      "grad_norm": 1.3169454051060006,
+      "learning_rate": 0.003,
+      "loss": 4.0473,
+      "step": 12471
+    },
+    {
+      "epoch": 0.12472,
+      "grad_norm": 1.076334168779626,
+      "learning_rate": 0.003,
+      "loss": 4.0437,
+      "step": 12472
+    },
+    {
+      "epoch": 0.12473,
+      "grad_norm": 1.1950416553824108,
+      "learning_rate": 0.003,
+      "loss": 4.0141,
+      "step": 12473
+    },
+    {
+      "epoch": 0.12474,
+      "grad_norm": 1.3179792713272818,
+      "learning_rate": 0.003,
+      "loss": 4.0439,
+      "step": 12474
+    },
+    {
+      "epoch": 0.12475,
+      "grad_norm": 1.245756788047074,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 12475
+    },
+    {
+      "epoch": 0.12476,
+      "grad_norm": 1.1900834831639637,
+      "learning_rate": 0.003,
+      "loss": 4.0526,
+      "step": 12476
+    },
+    {
+      "epoch": 0.12477,
+      "grad_norm": 1.2777258402774527,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 12477
+    },
+    {
+      "epoch": 0.12478,
+      "grad_norm": 1.3540644784807185,
+      "learning_rate": 0.003,
+      "loss": 4.0028,
+      "step": 12478
+    },
+    {
+      "epoch": 0.12479,
+      "grad_norm": 1.250859757659591,
+      "learning_rate": 0.003,
+      "loss": 4.034,
+      "step": 12479
+    },
+    {
+      "epoch": 0.1248,
+      "grad_norm": 1.0011661308076665,
+      "learning_rate": 0.003,
+      "loss": 4.0181,
+      "step": 12480
+    },
+    {
+      "epoch": 0.12481,
+      "grad_norm": 1.0944248906550968,
+      "learning_rate": 0.003,
+      "loss": 4.0035,
+      "step": 12481
+    },
+    {
+      "epoch": 0.12482,
+      "grad_norm": 1.0766623803287114,
+      "learning_rate": 0.003,
+      "loss": 4.0338,
+      "step": 12482
+    },
+    {
+      "epoch": 0.12483,
+      "grad_norm": 1.247377497569275,
+      "learning_rate": 0.003,
+      "loss": 4.0388,
+      "step": 12483
+    },
+    {
+      "epoch": 0.12484,
+      "grad_norm": 1.2590475389724518,
+      "learning_rate": 0.003,
+      "loss": 3.985,
+      "step": 12484
+    },
+    {
+      "epoch": 0.12485,
+      "grad_norm": 1.2563221922601575,
+      "learning_rate": 0.003,
+      "loss": 4.0373,
+      "step": 12485
+    },
+    {
+      "epoch": 0.12486,
+      "grad_norm": 1.366666755852874,
+      "learning_rate": 0.003,
+      "loss": 4.0058,
+      "step": 12486
+    },
+    {
+      "epoch": 0.12487,
+      "grad_norm": 1.0391130894161864,
+      "learning_rate": 0.003,
+      "loss": 3.9849,
+      "step": 12487
+    },
+    {
+      "epoch": 0.12488,
+      "grad_norm": 1.252739650762261,
+      "learning_rate": 0.003,
+      "loss": 4.0275,
+      "step": 12488
+    },
+    {
+      "epoch": 0.12489,
+      "grad_norm": 1.1169828804543638,
+      "learning_rate": 0.003,
+      "loss": 4.005,
+      "step": 12489
+    },
+    {
+      "epoch": 0.1249,
+      "grad_norm": 1.1550846912653996,
+      "learning_rate": 0.003,
+      "loss": 4.0241,
+      "step": 12490
+    },
+    {
+      "epoch": 0.12491,
+      "grad_norm": 1.2273757973022745,
+      "learning_rate": 0.003,
+      "loss": 4.0242,
+      "step": 12491
+    },
+    {
+      "epoch": 0.12492,
+      "grad_norm": 1.138029638011022,
+      "learning_rate": 0.003,
+      "loss": 4.0392,
+      "step": 12492
+    },
+    {
+      "epoch": 0.12493,
+      "grad_norm": 1.1633599584548278,
+      "learning_rate": 0.003,
+      "loss": 4.0343,
+      "step": 12493
+    },
+    {
+      "epoch": 0.12494,
+      "grad_norm": 1.1071951401003601,
+      "learning_rate": 0.003,
+      "loss": 4.0048,
+      "step": 12494
+    },
+    {
+      "epoch": 0.12495,
+      "grad_norm": 1.1728962633082556,
+      "learning_rate": 0.003,
+      "loss": 4.0251,
+      "step": 12495
+    },
+    {
+      "epoch": 0.12496,
+      "grad_norm": 1.1412993656234558,
+      "learning_rate": 0.003,
+      "loss": 4.006,
+      "step": 12496
+    },
+    {
+      "epoch": 0.12497,
+      "grad_norm": 1.1836701462957342,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 12497
+    },
+    {
+      "epoch": 0.12498,
+      "grad_norm": 1.3444553399522738,
+      "learning_rate": 0.003,
+      "loss": 4.0239,
+      "step": 12498
+    },
+    {
+      "epoch": 0.12499,
+      "grad_norm": 1.055299434095236,
+      "learning_rate": 0.003,
+      "loss": 4.0277,
+      "step": 12499
+    },
+    {
+      "epoch": 0.125,
+      "grad_norm": 1.1681452892889812,
+      "learning_rate": 0.003,
+      "loss": 4.0656,
+      "step": 12500
+    },
+    {
+      "epoch": 0.12501,
+      "grad_norm": 1.1566911568476128,
+      "learning_rate": 0.003,
+      "loss": 4.039,
+      "step": 12501
+    },
+    {
+      "epoch": 0.12502,
+      "grad_norm": 1.2648808509254572,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 12502
+    },
+    {
+      "epoch": 0.12503,
+      "grad_norm": 1.1849013409331248,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 12503
+    },
+    {
+      "epoch": 0.12504,
+      "grad_norm": 1.2885672439504576,
+      "learning_rate": 0.003,
+      "loss": 4.0338,
+      "step": 12504
+    },
+    {
+      "epoch": 0.12505,
+      "grad_norm": 1.1165062827533234,
+      "learning_rate": 0.003,
+      "loss": 4.0416,
+      "step": 12505
+    },
+    {
+      "epoch": 0.12506,
+      "grad_norm": 1.312246783818512,
+      "learning_rate": 0.003,
+      "loss": 4.0325,
+      "step": 12506
+    },
+    {
+      "epoch": 0.12507,
+      "grad_norm": 1.3021217260244191,
+      "learning_rate": 0.003,
+      "loss": 4.0039,
+      "step": 12507
+    },
+    {
+      "epoch": 0.12508,
+      "grad_norm": 1.080380448706609,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 12508
+    },
+    {
+      "epoch": 0.12509,
+      "grad_norm": 1.4439839528646126,
+      "learning_rate": 0.003,
+      "loss": 4.0333,
+      "step": 12509
+    },
+    {
+      "epoch": 0.1251,
+      "grad_norm": 1.0678739097641072,
+      "learning_rate": 0.003,
+      "loss": 3.9946,
+      "step": 12510
+    },
+    {
+      "epoch": 0.12511,
+      "grad_norm": 1.239905906705609,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 12511
+    },
+    {
+      "epoch": 0.12512,
+      "grad_norm": 1.0887393820962412,
+      "learning_rate": 0.003,
+      "loss": 4.0191,
+      "step": 12512
+    },
+    {
+      "epoch": 0.12513,
+      "grad_norm": 1.1044326141571423,
+      "learning_rate": 0.003,
+      "loss": 4.0298,
+      "step": 12513
+    },
+    {
+      "epoch": 0.12514,
+      "grad_norm": 1.296916204033871,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 12514
+    },
+    {
+      "epoch": 0.12515,
+      "grad_norm": 1.1111976158805856,
+      "learning_rate": 0.003,
+      "loss": 4.0184,
+      "step": 12515
+    },
+    {
+      "epoch": 0.12516,
+      "grad_norm": 1.4345634992074552,
+      "learning_rate": 0.003,
+      "loss": 4.0381,
+      "step": 12516
+    },
+    {
+      "epoch": 0.12517,
+      "grad_norm": 1.0038106730731644,
+      "learning_rate": 0.003,
+      "loss": 3.9899,
+      "step": 12517
+    },
+    {
+      "epoch": 0.12518,
+      "grad_norm": 1.233815776710949,
+      "learning_rate": 0.003,
+      "loss": 4.0427,
+      "step": 12518
+    },
+    {
+      "epoch": 0.12519,
+      "grad_norm": 1.0644914590833376,
+      "learning_rate": 0.003,
+      "loss": 4.028,
+      "step": 12519
+    },
+    {
+      "epoch": 0.1252,
+      "grad_norm": 1.1510047747949075,
+      "learning_rate": 0.003,
+      "loss": 3.9852,
+      "step": 12520
+    },
+    {
+      "epoch": 0.12521,
+      "grad_norm": 1.2458741278187768,
+      "learning_rate": 0.003,
+      "loss": 4.0306,
+      "step": 12521
+    },
+    {
+      "epoch": 0.12522,
+      "grad_norm": 1.2658284858594022,
+      "learning_rate": 0.003,
+      "loss": 4.0664,
+      "step": 12522
+    },
+    {
+      "epoch": 0.12523,
+      "grad_norm": 1.0684459848883463,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 12523
+    },
+    {
+      "epoch": 0.12524,
+      "grad_norm": 1.1841532183619132,
+      "learning_rate": 0.003,
+      "loss": 4.0068,
+      "step": 12524
+    },
+    {
+      "epoch": 0.12525,
+      "grad_norm": 1.2159923676158797,
+      "learning_rate": 0.003,
+      "loss": 3.9991,
+      "step": 12525
+    },
+    {
+      "epoch": 0.12526,
+      "grad_norm": 1.3025781911250225,
+      "learning_rate": 0.003,
+      "loss": 4.0562,
+      "step": 12526
+    },
+    {
+      "epoch": 0.12527,
+      "grad_norm": 1.0656354531638161,
+      "learning_rate": 0.003,
+      "loss": 4.0469,
+      "step": 12527
+    },
+    {
+      "epoch": 0.12528,
+      "grad_norm": 1.5808452423937671,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 12528
+    },
+    {
+      "epoch": 0.12529,
+      "grad_norm": 1.1645359316338186,
+      "learning_rate": 0.003,
+      "loss": 4.0121,
+      "step": 12529
+    },
+    {
+      "epoch": 0.1253,
+      "grad_norm": 1.0732087825270942,
+      "learning_rate": 0.003,
+      "loss": 4.0041,
+      "step": 12530
+    },
+    {
+      "epoch": 0.12531,
+      "grad_norm": 1.1743639789562694,
+      "learning_rate": 0.003,
+      "loss": 4.0233,
+      "step": 12531
+    },
+    {
+      "epoch": 0.12532,
+      "grad_norm": 1.1258543261386147,
+      "learning_rate": 0.003,
+      "loss": 4.0121,
+      "step": 12532
+    },
+    {
+      "epoch": 0.12533,
+      "grad_norm": 1.2074492118607754,
+      "learning_rate": 0.003,
+      "loss": 4.0238,
+      "step": 12533
+    },
+    {
+      "epoch": 0.12534,
+      "grad_norm": 1.1335294582199398,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 12534
+    },
+    {
+      "epoch": 0.12535,
+      "grad_norm": 1.393436023522577,
+      "learning_rate": 0.003,
+      "loss": 4.0342,
+      "step": 12535
+    },
+    {
+      "epoch": 0.12536,
+      "grad_norm": 0.8943166326444245,
+      "learning_rate": 0.003,
+      "loss": 3.9654,
+      "step": 12536
+    },
+    {
+      "epoch": 0.12537,
+      "grad_norm": 1.0287110754049926,
+      "learning_rate": 0.003,
+      "loss": 4.0238,
+      "step": 12537
+    },
+    {
+      "epoch": 0.12538,
+      "grad_norm": 1.255488963677731,
+      "learning_rate": 0.003,
+      "loss": 4.031,
+      "step": 12538
+    },
+    {
+      "epoch": 0.12539,
+      "grad_norm": 1.0084876103414888,
+      "learning_rate": 0.003,
+      "loss": 4.0477,
+      "step": 12539
+    },
+    {
+      "epoch": 0.1254,
+      "grad_norm": 1.4860849068921025,
+      "learning_rate": 0.003,
+      "loss": 4.03,
+      "step": 12540
+    },
+    {
+      "epoch": 0.12541,
+      "grad_norm": 0.9581597732122125,
+      "learning_rate": 0.003,
+      "loss": 4.0013,
+      "step": 12541
+    },
+    {
+      "epoch": 0.12542,
+      "grad_norm": 1.4035696364661694,
+      "learning_rate": 0.003,
+      "loss": 4.0419,
+      "step": 12542
+    },
+    {
+      "epoch": 0.12543,
+      "grad_norm": 1.0541027313016913,
+      "learning_rate": 0.003,
+      "loss": 3.99,
+      "step": 12543
+    },
+    {
+      "epoch": 0.12544,
+      "grad_norm": 1.1821544967346649,
+      "learning_rate": 0.003,
+      "loss": 4.0607,
+      "step": 12544
+    },
+    {
+      "epoch": 0.12545,
+      "grad_norm": 1.2469621369638744,
+      "learning_rate": 0.003,
+      "loss": 4.0323,
+      "step": 12545
+    },
+    {
+      "epoch": 0.12546,
+      "grad_norm": 1.4233370848984432,
+      "learning_rate": 0.003,
+      "loss": 4.0218,
+      "step": 12546
+    },
+    {
+      "epoch": 0.12547,
+      "grad_norm": 1.0233959256284908,
+      "learning_rate": 0.003,
+      "loss": 4.053,
+      "step": 12547
+    },
+    {
+      "epoch": 0.12548,
+      "grad_norm": 1.242054122735897,
+      "learning_rate": 0.003,
+      "loss": 4.0112,
+      "step": 12548
+    },
+    {
+      "epoch": 0.12549,
+      "grad_norm": 1.1070600937614252,
+      "learning_rate": 0.003,
+      "loss": 4.0377,
+      "step": 12549
+    },
+    {
+      "epoch": 0.1255,
+      "grad_norm": 1.1087147779990503,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 12550
+    },
+    {
+      "epoch": 0.12551,
+      "grad_norm": 1.1665980055570004,
+      "learning_rate": 0.003,
+      "loss": 4.0425,
+      "step": 12551
+    },
+    {
+      "epoch": 0.12552,
+      "grad_norm": 1.1086804659085676,
+      "learning_rate": 0.003,
+      "loss": 3.9946,
+      "step": 12552
+    },
+    {
+      "epoch": 0.12553,
+      "grad_norm": 1.3121353361094357,
+      "learning_rate": 0.003,
+      "loss": 4.0409,
+      "step": 12553
+    },
+    {
+      "epoch": 0.12554,
+      "grad_norm": 1.0719336426573771,
+      "learning_rate": 0.003,
+      "loss": 4.0182,
+      "step": 12554
+    },
+    {
+      "epoch": 0.12555,
+      "grad_norm": 1.1223239153580142,
+      "learning_rate": 0.003,
+      "loss": 4.0352,
+      "step": 12555
+    },
+    {
+      "epoch": 0.12556,
+      "grad_norm": 1.3267498467952836,
+      "learning_rate": 0.003,
+      "loss": 4.0551,
+      "step": 12556
+    },
+    {
+      "epoch": 0.12557,
+      "grad_norm": 1.3268383865328555,
+      "learning_rate": 0.003,
+      "loss": 4.0477,
+      "step": 12557
+    },
+    {
+      "epoch": 0.12558,
+      "grad_norm": 1.4931899187677369,
+      "learning_rate": 0.003,
+      "loss": 4.037,
+      "step": 12558
+    },
+    {
+      "epoch": 0.12559,
+      "grad_norm": 1.053612760407219,
+      "learning_rate": 0.003,
+      "loss": 4.0015,
+      "step": 12559
+    },
+    {
+      "epoch": 0.1256,
+      "grad_norm": 1.3211965283864615,
+      "learning_rate": 0.003,
+      "loss": 4.0242,
+      "step": 12560
+    },
+    {
+      "epoch": 0.12561,
+      "grad_norm": 1.2521128264959986,
+      "learning_rate": 0.003,
+      "loss": 3.9995,
+      "step": 12561
+    },
+    {
+      "epoch": 0.12562,
+      "grad_norm": 1.3252559218711522,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 12562
+    },
+    {
+      "epoch": 0.12563,
+      "grad_norm": 1.1034688777336168,
+      "learning_rate": 0.003,
+      "loss": 4.0234,
+      "step": 12563
+    },
+    {
+      "epoch": 0.12564,
+      "grad_norm": 1.245795847005279,
+      "learning_rate": 0.003,
+      "loss": 4.0302,
+      "step": 12564
+    },
+    {
+      "epoch": 0.12565,
+      "grad_norm": 1.0799959151676444,
+      "learning_rate": 0.003,
+      "loss": 4.0322,
+      "step": 12565
+    },
+    {
+      "epoch": 0.12566,
+      "grad_norm": 1.1399011458856696,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 12566
+    },
+    {
+      "epoch": 0.12567,
+      "grad_norm": 1.1637302807696905,
+      "learning_rate": 0.003,
+      "loss": 4.0628,
+      "step": 12567
+    },
+    {
+      "epoch": 0.12568,
+      "grad_norm": 1.3881251582640086,
+      "learning_rate": 0.003,
+      "loss": 4.0224,
+      "step": 12568
+    },
+    {
+      "epoch": 0.12569,
+      "grad_norm": 0.9148944974251467,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 12569
+    },
+    {
+      "epoch": 0.1257,
+      "grad_norm": 1.3720674020991435,
+      "learning_rate": 0.003,
+      "loss": 4.0716,
+      "step": 12570
+    },
+    {
+      "epoch": 0.12571,
+      "grad_norm": 1.1589754593303436,
+      "learning_rate": 0.003,
+      "loss": 4.0372,
+      "step": 12571
+    },
+    {
+      "epoch": 0.12572,
+      "grad_norm": 1.288935836145991,
+      "learning_rate": 0.003,
+      "loss": 4.0396,
+      "step": 12572
+    },
+    {
+      "epoch": 0.12573,
+      "grad_norm": 1.3761421534077674,
+      "learning_rate": 0.003,
+      "loss": 4.0139,
+      "step": 12573
+    },
+    {
+      "epoch": 0.12574,
+      "grad_norm": 1.0527095557807116,
+      "learning_rate": 0.003,
+      "loss": 4.0348,
+      "step": 12574
+    },
+    {
+      "epoch": 0.12575,
+      "grad_norm": 1.2462154382753858,
+      "learning_rate": 0.003,
+      "loss": 4.0664,
+      "step": 12575
+    },
+    {
+      "epoch": 0.12576,
+      "grad_norm": 1.1767019385647244,
+      "learning_rate": 0.003,
+      "loss": 4.0304,
+      "step": 12576
+    },
+    {
+      "epoch": 0.12577,
+      "grad_norm": 1.2310642908036986,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 12577
+    },
+    {
+      "epoch": 0.12578,
+      "grad_norm": 1.0681347288973733,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 12578
+    },
+    {
+      "epoch": 0.12579,
+      "grad_norm": 1.200121848817965,
+      "learning_rate": 0.003,
+      "loss": 4.0357,
+      "step": 12579
+    },
+    {
+      "epoch": 0.1258,
+      "grad_norm": 1.0547714047591237,
+      "learning_rate": 0.003,
+      "loss": 4.0314,
+      "step": 12580
+    },
+    {
+      "epoch": 0.12581,
+      "grad_norm": 1.399432745523962,
+      "learning_rate": 0.003,
+      "loss": 4.0004,
+      "step": 12581
+    },
+    {
+      "epoch": 0.12582,
+      "grad_norm": 0.9891791775910421,
+      "learning_rate": 0.003,
+      "loss": 3.9918,
+      "step": 12582
+    },
+    {
+      "epoch": 0.12583,
+      "grad_norm": 1.2865457649563121,
+      "learning_rate": 0.003,
+      "loss": 4.0447,
+      "step": 12583
+    },
+    {
+      "epoch": 0.12584,
+      "grad_norm": 1.0318108268908344,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 12584
+    },
+    {
+      "epoch": 0.12585,
+      "grad_norm": 1.2833427957706554,
+      "learning_rate": 0.003,
+      "loss": 4.0588,
+      "step": 12585
+    },
+    {
+      "epoch": 0.12586,
+      "grad_norm": 1.093722855738362,
+      "learning_rate": 0.003,
+      "loss": 4.031,
+      "step": 12586
+    },
+    {
+      "epoch": 0.12587,
+      "grad_norm": 1.1534754913393301,
+      "learning_rate": 0.003,
+      "loss": 4.0502,
+      "step": 12587
+    },
+    {
+      "epoch": 0.12588,
+      "grad_norm": 1.3439763061886305,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 12588
+    },
+    {
+      "epoch": 0.12589,
+      "grad_norm": 1.2091801981891621,
+      "learning_rate": 0.003,
+      "loss": 4.0314,
+      "step": 12589
+    },
+    {
+      "epoch": 0.1259,
+      "grad_norm": 1.1011477287499383,
+      "learning_rate": 0.003,
+      "loss": 4.021,
+      "step": 12590
+    },
+    {
+      "epoch": 0.12591,
+      "grad_norm": 1.1312006138545256,
+      "learning_rate": 0.003,
+      "loss": 4.016,
+      "step": 12591
+    },
+    {
+      "epoch": 0.12592,
+      "grad_norm": 1.1523264528504664,
+      "learning_rate": 0.003,
+      "loss": 4.0254,
+      "step": 12592
+    },
+    {
+      "epoch": 0.12593,
+      "grad_norm": 1.255533301220495,
+      "learning_rate": 0.003,
+      "loss": 4.0344,
+      "step": 12593
+    },
+    {
+      "epoch": 0.12594,
+      "grad_norm": 1.2058516957260115,
+      "learning_rate": 0.003,
+      "loss": 4.0353,
+      "step": 12594
+    },
+    {
+      "epoch": 0.12595,
+      "grad_norm": 0.9821636958326797,
+      "learning_rate": 0.003,
+      "loss": 4.0505,
+      "step": 12595
+    },
+    {
+      "epoch": 0.12596,
+      "grad_norm": 1.2542787438402092,
+      "learning_rate": 0.003,
+      "loss": 3.9961,
+      "step": 12596
+    },
+    {
+      "epoch": 0.12597,
+      "grad_norm": 1.2121687973888942,
+      "learning_rate": 0.003,
+      "loss": 4.0381,
+      "step": 12597
+    },
+    {
+      "epoch": 0.12598,
+      "grad_norm": 1.3235298233111423,
+      "learning_rate": 0.003,
+      "loss": 4.0475,
+      "step": 12598
+    },
+    {
+      "epoch": 0.12599,
+      "grad_norm": 1.000802003255851,
+      "learning_rate": 0.003,
+      "loss": 4.0484,
+      "step": 12599
+    },
+    {
+      "epoch": 0.126,
+      "grad_norm": 1.2650303129189735,
+      "learning_rate": 0.003,
+      "loss": 4.0483,
+      "step": 12600
+    },
+    {
+      "epoch": 0.12601,
+      "grad_norm": 1.2518624621845913,
+      "learning_rate": 0.003,
+      "loss": 4.0661,
+      "step": 12601
+    },
+    {
+      "epoch": 0.12602,
+      "grad_norm": 1.4759094497892915,
+      "learning_rate": 0.003,
+      "loss": 4.0443,
+      "step": 12602
+    },
+    {
+      "epoch": 0.12603,
+      "grad_norm": 1.2729425663847465,
+      "learning_rate": 0.003,
+      "loss": 3.9825,
+      "step": 12603
+    },
+    {
+      "epoch": 0.12604,
+      "grad_norm": 1.191215597044836,
+      "learning_rate": 0.003,
+      "loss": 4.0515,
+      "step": 12604
+    },
+    {
+      "epoch": 0.12605,
+      "grad_norm": 1.2346027461305622,
+      "learning_rate": 0.003,
+      "loss": 4.0244,
+      "step": 12605
+    },
+    {
+      "epoch": 0.12606,
+      "grad_norm": 1.0890178656815974,
+      "learning_rate": 0.003,
+      "loss": 4.0372,
+      "step": 12606
+    },
+    {
+      "epoch": 0.12607,
+      "grad_norm": 1.2208264878432329,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 12607
+    },
+    {
+      "epoch": 0.12608,
+      "grad_norm": 1.0725139583720165,
+      "learning_rate": 0.003,
+      "loss": 4.0146,
+      "step": 12608
+    },
+    {
+      "epoch": 0.12609,
+      "grad_norm": 1.0438403055998695,
+      "learning_rate": 0.003,
+      "loss": 4.0177,
+      "step": 12609
+    },
+    {
+      "epoch": 0.1261,
+      "grad_norm": 1.423012084634035,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 12610
+    },
+    {
+      "epoch": 0.12611,
+      "grad_norm": 0.9883128164819032,
+      "learning_rate": 0.003,
+      "loss": 4.0364,
+      "step": 12611
+    },
+    {
+      "epoch": 0.12612,
+      "grad_norm": 1.4515971588087238,
+      "learning_rate": 0.003,
+      "loss": 4.0182,
+      "step": 12612
+    },
+    {
+      "epoch": 0.12613,
+      "grad_norm": 0.9323036378252141,
+      "learning_rate": 0.003,
+      "loss": 3.9899,
+      "step": 12613
+    },
+    {
+      "epoch": 0.12614,
+      "grad_norm": 1.3119744482601774,
+      "learning_rate": 0.003,
+      "loss": 4.0406,
+      "step": 12614
+    },
+    {
+      "epoch": 0.12615,
+      "grad_norm": 1.3243048548778869,
+      "learning_rate": 0.003,
+      "loss": 4.0532,
+      "step": 12615
+    },
+    {
+      "epoch": 0.12616,
+      "grad_norm": 1.0687568535775176,
+      "learning_rate": 0.003,
+      "loss": 3.9991,
+      "step": 12616
+    },
+    {
+      "epoch": 0.12617,
+      "grad_norm": 1.2329522629931724,
+      "learning_rate": 0.003,
+      "loss": 4.0132,
+      "step": 12617
+    },
+    {
+      "epoch": 0.12618,
+      "grad_norm": 1.2447679099075022,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 12618
+    },
+    {
+      "epoch": 0.12619,
+      "grad_norm": 1.1574700071462647,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 12619
+    },
+    {
+      "epoch": 0.1262,
+      "grad_norm": 1.127223199736362,
+      "learning_rate": 0.003,
+      "loss": 4.0369,
+      "step": 12620
+    },
+    {
+      "epoch": 0.12621,
+      "grad_norm": 1.2178554897602358,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 12621
+    },
+    {
+      "epoch": 0.12622,
+      "grad_norm": 1.5252586533504058,
+      "learning_rate": 0.003,
+      "loss": 4.0356,
+      "step": 12622
+    },
+    {
+      "epoch": 0.12623,
+      "grad_norm": 0.9953370520103634,
+      "learning_rate": 0.003,
+      "loss": 4.0275,
+      "step": 12623
+    },
+    {
+      "epoch": 0.12624,
+      "grad_norm": 1.2288885934582412,
+      "learning_rate": 0.003,
+      "loss": 4.0194,
+      "step": 12624
+    },
+    {
+      "epoch": 0.12625,
+      "grad_norm": 1.2039239879403167,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 12625
+    },
+    {
+      "epoch": 0.12626,
+      "grad_norm": 1.0854130128519532,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 12626
+    },
+    {
+      "epoch": 0.12627,
+      "grad_norm": 1.1745590891614175,
+      "learning_rate": 0.003,
+      "loss": 4.021,
+      "step": 12627
+    },
+    {
+      "epoch": 0.12628,
+      "grad_norm": 1.1566678215804154,
+      "learning_rate": 0.003,
+      "loss": 4.0004,
+      "step": 12628
+    },
+    {
+      "epoch": 0.12629,
+      "grad_norm": 1.233599095010633,
+      "learning_rate": 0.003,
+      "loss": 4.0505,
+      "step": 12629
+    },
+    {
+      "epoch": 0.1263,
+      "grad_norm": 1.270380200195146,
+      "learning_rate": 0.003,
+      "loss": 4.0089,
+      "step": 12630
+    },
+    {
+      "epoch": 0.12631,
+      "grad_norm": 1.1062440334986967,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 12631
+    },
+    {
+      "epoch": 0.12632,
+      "grad_norm": 1.3055152614095051,
+      "learning_rate": 0.003,
+      "loss": 4.026,
+      "step": 12632
+    },
+    {
+      "epoch": 0.12633,
+      "grad_norm": 1.0713326524662674,
+      "learning_rate": 0.003,
+      "loss": 4.0092,
+      "step": 12633
+    },
+    {
+      "epoch": 0.12634,
+      "grad_norm": 1.328625658674953,
+      "learning_rate": 0.003,
+      "loss": 4.0331,
+      "step": 12634
+    },
+    {
+      "epoch": 0.12635,
+      "grad_norm": 0.9992586114619201,
+      "learning_rate": 0.003,
+      "loss": 4.0587,
+      "step": 12635
+    },
+    {
+      "epoch": 0.12636,
+      "grad_norm": 1.4006756734928287,
+      "learning_rate": 0.003,
+      "loss": 4.0302,
+      "step": 12636
+    },
+    {
+      "epoch": 0.12637,
+      "grad_norm": 1.0884890054982994,
+      "learning_rate": 0.003,
+      "loss": 4.0303,
+      "step": 12637
+    },
+    {
+      "epoch": 0.12638,
+      "grad_norm": 1.2337201777333826,
+      "learning_rate": 0.003,
+      "loss": 4.0306,
+      "step": 12638
+    },
+    {
+      "epoch": 0.12639,
+      "grad_norm": 1.1474779657673895,
+      "learning_rate": 0.003,
+      "loss": 4.039,
+      "step": 12639
+    },
+    {
+      "epoch": 0.1264,
+      "grad_norm": 1.2550079297749897,
+      "learning_rate": 0.003,
+      "loss": 4.0242,
+      "step": 12640
+    },
+    {
+      "epoch": 0.12641,
+      "grad_norm": 1.2570253479523554,
+      "learning_rate": 0.003,
+      "loss": 4.0296,
+      "step": 12641
+    },
+    {
+      "epoch": 0.12642,
+      "grad_norm": 1.2545340832026206,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 12642
+    },
+    {
+      "epoch": 0.12643,
+      "grad_norm": 1.1479312545047873,
+      "learning_rate": 0.003,
+      "loss": 4.0411,
+      "step": 12643
+    },
+    {
+      "epoch": 0.12644,
+      "grad_norm": 1.3820539223152442,
+      "learning_rate": 0.003,
+      "loss": 4.019,
+      "step": 12644
+    },
+    {
+      "epoch": 0.12645,
+      "grad_norm": 1.0105513446878904,
+      "learning_rate": 0.003,
+      "loss": 4.0286,
+      "step": 12645
+    },
+    {
+      "epoch": 0.12646,
+      "grad_norm": 1.2048119372599568,
+      "learning_rate": 0.003,
+      "loss": 4.0109,
+      "step": 12646
+    },
+    {
+      "epoch": 0.12647,
+      "grad_norm": 1.010959049838853,
+      "learning_rate": 0.003,
+      "loss": 4.0031,
+      "step": 12647
+    },
+    {
+      "epoch": 0.12648,
+      "grad_norm": 1.2821540434209036,
+      "learning_rate": 0.003,
+      "loss": 4.0595,
+      "step": 12648
+    },
+    {
+      "epoch": 0.12649,
+      "grad_norm": 1.0810220087640319,
+      "learning_rate": 0.003,
+      "loss": 3.9971,
+      "step": 12649
+    },
+    {
+      "epoch": 0.1265,
+      "grad_norm": 1.204458455749565,
+      "learning_rate": 0.003,
+      "loss": 4.0531,
+      "step": 12650
+    },
+    {
+      "epoch": 0.12651,
+      "grad_norm": 0.9792762762076124,
+      "learning_rate": 0.003,
+      "loss": 4.0279,
+      "step": 12651
+    },
+    {
+      "epoch": 0.12652,
+      "grad_norm": 1.5682234906370067,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 12652
+    },
+    {
+      "epoch": 0.12653,
+      "grad_norm": 0.8164817667847991,
+      "learning_rate": 0.003,
+      "loss": 4.0222,
+      "step": 12653
+    },
+    {
+      "epoch": 0.12654,
+      "grad_norm": 1.2008138076892603,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 12654
+    },
+    {
+      "epoch": 0.12655,
+      "grad_norm": 1.2201295724632517,
+      "learning_rate": 0.003,
+      "loss": 4.0358,
+      "step": 12655
+    },
+    {
+      "epoch": 0.12656,
+      "grad_norm": 1.3418859859435595,
+      "learning_rate": 0.003,
+      "loss": 4.0574,
+      "step": 12656
+    },
+    {
+      "epoch": 0.12657,
+      "grad_norm": 1.2531819255827479,
+      "learning_rate": 0.003,
+      "loss": 4.0245,
+      "step": 12657
+    },
+    {
+      "epoch": 0.12658,
+      "grad_norm": 1.2676038809684318,
+      "learning_rate": 0.003,
+      "loss": 4.0528,
+      "step": 12658
+    },
+    {
+      "epoch": 0.12659,
+      "grad_norm": 1.0091691228634534,
+      "learning_rate": 0.003,
+      "loss": 3.9969,
+      "step": 12659
+    },
+    {
+      "epoch": 0.1266,
+      "grad_norm": 1.2238608274423066,
+      "learning_rate": 0.003,
+      "loss": 4.0139,
+      "step": 12660
+    },
+    {
+      "epoch": 0.12661,
+      "grad_norm": 0.9105821714736707,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 12661
+    },
+    {
+      "epoch": 0.12662,
+      "grad_norm": 1.2446462516408656,
+      "learning_rate": 0.003,
+      "loss": 4.02,
+      "step": 12662
+    },
+    {
+      "epoch": 0.12663,
+      "grad_norm": 1.3444189852230655,
+      "learning_rate": 0.003,
+      "loss": 4.0144,
+      "step": 12663
+    },
+    {
+      "epoch": 0.12664,
+      "grad_norm": 1.215579372076641,
+      "learning_rate": 0.003,
+      "loss": 4.0215,
+      "step": 12664
+    },
+    {
+      "epoch": 0.12665,
+      "grad_norm": 1.1894756167487517,
+      "learning_rate": 0.003,
+      "loss": 4.0358,
+      "step": 12665
+    },
+    {
+      "epoch": 0.12666,
+      "grad_norm": 1.0796402434304548,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 12666
+    },
+    {
+      "epoch": 0.12667,
+      "grad_norm": 1.2729192473905775,
+      "learning_rate": 0.003,
+      "loss": 4.0016,
+      "step": 12667
+    },
+    {
+      "epoch": 0.12668,
+      "grad_norm": 1.148234082366681,
+      "learning_rate": 0.003,
+      "loss": 4.0132,
+      "step": 12668
+    },
+    {
+      "epoch": 0.12669,
+      "grad_norm": 1.1418967246731544,
+      "learning_rate": 0.003,
+      "loss": 4.0478,
+      "step": 12669
+    },
+    {
+      "epoch": 0.1267,
+      "grad_norm": 1.2700756456120366,
+      "learning_rate": 0.003,
+      "loss": 3.9987,
+      "step": 12670
+    },
+    {
+      "epoch": 0.12671,
+      "grad_norm": 1.0764358520395991,
+      "learning_rate": 0.003,
+      "loss": 4.0328,
+      "step": 12671
+    },
+    {
+      "epoch": 0.12672,
+      "grad_norm": 1.409785311657584,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 12672
+    },
+    {
+      "epoch": 0.12673,
+      "grad_norm": 1.1401916417528815,
+      "learning_rate": 0.003,
+      "loss": 4.0343,
+      "step": 12673
+    },
+    {
+      "epoch": 0.12674,
+      "grad_norm": 1.1669071703585918,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 12674
+    },
+    {
+      "epoch": 0.12675,
+      "grad_norm": 1.1980384256902983,
+      "learning_rate": 0.003,
+      "loss": 4.0437,
+      "step": 12675
+    },
+    {
+      "epoch": 0.12676,
+      "grad_norm": 1.2130331742254143,
+      "learning_rate": 0.003,
+      "loss": 4.0091,
+      "step": 12676
+    },
+    {
+      "epoch": 0.12677,
+      "grad_norm": 1.1165493248220288,
+      "learning_rate": 0.003,
+      "loss": 3.9962,
+      "step": 12677
+    },
+    {
+      "epoch": 0.12678,
+      "grad_norm": 1.3433766034805201,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 12678
+    },
+    {
+      "epoch": 0.12679,
+      "grad_norm": 1.1252445488214073,
+      "learning_rate": 0.003,
+      "loss": 4.0269,
+      "step": 12679
+    },
+    {
+      "epoch": 0.1268,
+      "grad_norm": 1.0754874878584735,
+      "learning_rate": 0.003,
+      "loss": 4.0236,
+      "step": 12680
+    },
+    {
+      "epoch": 0.12681,
+      "grad_norm": 1.1485296840321575,
+      "learning_rate": 0.003,
+      "loss": 4.0356,
+      "step": 12681
+    },
+    {
+      "epoch": 0.12682,
+      "grad_norm": 1.1760583571845962,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 12682
+    },
+    {
+      "epoch": 0.12683,
+      "grad_norm": 1.0880121539775605,
+      "learning_rate": 0.003,
+      "loss": 4.0381,
+      "step": 12683
+    },
+    {
+      "epoch": 0.12684,
+      "grad_norm": 1.3040924045912328,
+      "learning_rate": 0.003,
+      "loss": 4.0419,
+      "step": 12684
+    },
+    {
+      "epoch": 0.12685,
+      "grad_norm": 1.1876837315600164,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 12685
+    },
+    {
+      "epoch": 0.12686,
+      "grad_norm": 1.3387931565528755,
+      "learning_rate": 0.003,
+      "loss": 4.0417,
+      "step": 12686
+    },
+    {
+      "epoch": 0.12687,
+      "grad_norm": 1.0300693277032922,
+      "learning_rate": 0.003,
+      "loss": 3.9966,
+      "step": 12687
+    },
+    {
+      "epoch": 0.12688,
+      "grad_norm": 1.297604962549656,
+      "learning_rate": 0.003,
+      "loss": 4.0228,
+      "step": 12688
+    },
+    {
+      "epoch": 0.12689,
+      "grad_norm": 1.100287623472564,
+      "learning_rate": 0.003,
+      "loss": 4.0073,
+      "step": 12689
+    },
+    {
+      "epoch": 0.1269,
+      "grad_norm": 1.2595777690018868,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 12690
+    },
+    {
+      "epoch": 0.12691,
+      "grad_norm": 1.1988396498532445,
+      "learning_rate": 0.003,
+      "loss": 4.0043,
+      "step": 12691
+    },
+    {
+      "epoch": 0.12692,
+      "grad_norm": 1.130646827975255,
+      "learning_rate": 0.003,
+      "loss": 4.0357,
+      "step": 12692
+    },
+    {
+      "epoch": 0.12693,
+      "grad_norm": 1.1694215564141754,
+      "learning_rate": 0.003,
+      "loss": 4.0092,
+      "step": 12693
+    },
+    {
+      "epoch": 0.12694,
+      "grad_norm": 1.2250507543257125,
+      "learning_rate": 0.003,
+      "loss": 3.9935,
+      "step": 12694
+    },
+    {
+      "epoch": 0.12695,
+      "grad_norm": 1.205915660828806,
+      "learning_rate": 0.003,
+      "loss": 4.0315,
+      "step": 12695
+    },
+    {
+      "epoch": 0.12696,
+      "grad_norm": 1.2956504501050932,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 12696
+    },
+    {
+      "epoch": 0.12697,
+      "grad_norm": 1.0210621754158544,
+      "learning_rate": 0.003,
+      "loss": 3.9882,
+      "step": 12697
+    },
+    {
+      "epoch": 0.12698,
+      "grad_norm": 1.2599085606308953,
+      "learning_rate": 0.003,
+      "loss": 4.0007,
+      "step": 12698
+    },
+    {
+      "epoch": 0.12699,
+      "grad_norm": 1.2736017966213373,
+      "learning_rate": 0.003,
+      "loss": 3.9961,
+      "step": 12699
+    },
+    {
+      "epoch": 0.127,
+      "grad_norm": 1.069125521043092,
+      "learning_rate": 0.003,
+      "loss": 4.0059,
+      "step": 12700
+    },
+    {
+      "epoch": 0.12701,
+      "grad_norm": 1.1512390118258113,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 12701
+    },
+    {
+      "epoch": 0.12702,
+      "grad_norm": 1.2300830756231789,
+      "learning_rate": 0.003,
+      "loss": 4.024,
+      "step": 12702
+    },
+    {
+      "epoch": 0.12703,
+      "grad_norm": 1.3934301726307217,
+      "learning_rate": 0.003,
+      "loss": 4.0279,
+      "step": 12703
+    },
+    {
+      "epoch": 0.12704,
+      "grad_norm": 1.0242474018081544,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 12704
+    },
+    {
+      "epoch": 0.12705,
+      "grad_norm": 1.331495197064204,
+      "learning_rate": 0.003,
+      "loss": 4.0299,
+      "step": 12705
+    },
+    {
+      "epoch": 0.12706,
+      "grad_norm": 1.1882006045530609,
+      "learning_rate": 0.003,
+      "loss": 4.0135,
+      "step": 12706
+    },
+    {
+      "epoch": 0.12707,
+      "grad_norm": 1.2945057105662945,
+      "learning_rate": 0.003,
+      "loss": 4.0349,
+      "step": 12707
+    },
+    {
+      "epoch": 0.12708,
+      "grad_norm": 1.1382027832201964,
+      "learning_rate": 0.003,
+      "loss": 4.0406,
+      "step": 12708
+    },
+    {
+      "epoch": 0.12709,
+      "grad_norm": 1.4619644889251888,
+      "learning_rate": 0.003,
+      "loss": 4.0346,
+      "step": 12709
+    },
+    {
+      "epoch": 0.1271,
+      "grad_norm": 0.8937466524403883,
+      "learning_rate": 0.003,
+      "loss": 4.0218,
+      "step": 12710
+    },
+    {
+      "epoch": 0.12711,
+      "grad_norm": 1.0864349938474558,
+      "learning_rate": 0.003,
+      "loss": 4.012,
+      "step": 12711
+    },
+    {
+      "epoch": 0.12712,
+      "grad_norm": 1.5497917839594966,
+      "learning_rate": 0.003,
+      "loss": 4.0233,
+      "step": 12712
+    },
+    {
+      "epoch": 0.12713,
+      "grad_norm": 1.183683442872549,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 12713
+    },
+    {
+      "epoch": 0.12714,
+      "grad_norm": 1.3295079655733,
+      "learning_rate": 0.003,
+      "loss": 4.0469,
+      "step": 12714
+    },
+    {
+      "epoch": 0.12715,
+      "grad_norm": 0.9693033253470243,
+      "learning_rate": 0.003,
+      "loss": 3.9939,
+      "step": 12715
+    },
+    {
+      "epoch": 0.12716,
+      "grad_norm": 1.3432193381885342,
+      "learning_rate": 0.003,
+      "loss": 4.0289,
+      "step": 12716
+    },
+    {
+      "epoch": 0.12717,
+      "grad_norm": 1.128924336563658,
+      "learning_rate": 0.003,
+      "loss": 4.0355,
+      "step": 12717
+    },
+    {
+      "epoch": 0.12718,
+      "grad_norm": 1.2072489511705236,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 12718
+    },
+    {
+      "epoch": 0.12719,
+      "grad_norm": 1.2789377715316639,
+      "learning_rate": 0.003,
+      "loss": 4.0427,
+      "step": 12719
+    },
+    {
+      "epoch": 0.1272,
+      "grad_norm": 1.1420326588382799,
+      "learning_rate": 0.003,
+      "loss": 3.9891,
+      "step": 12720
+    },
+    {
+      "epoch": 0.12721,
+      "grad_norm": 1.1654815751718461,
+      "learning_rate": 0.003,
+      "loss": 4.0368,
+      "step": 12721
+    },
+    {
+      "epoch": 0.12722,
+      "grad_norm": 1.4278123071241624,
+      "learning_rate": 0.003,
+      "loss": 4.0388,
+      "step": 12722
+    },
+    {
+      "epoch": 0.12723,
+      "grad_norm": 1.0738923650659709,
+      "learning_rate": 0.003,
+      "loss": 4.0338,
+      "step": 12723
+    },
+    {
+      "epoch": 0.12724,
+      "grad_norm": 1.3422297872267799,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 12724
+    },
+    {
+      "epoch": 0.12725,
+      "grad_norm": 0.9996258275269408,
+      "learning_rate": 0.003,
+      "loss": 4.0289,
+      "step": 12725
+    },
+    {
+      "epoch": 0.12726,
+      "grad_norm": 1.310472670505521,
+      "learning_rate": 0.003,
+      "loss": 4.0171,
+      "step": 12726
+    },
+    {
+      "epoch": 0.12727,
+      "grad_norm": 0.9766609124996919,
+      "learning_rate": 0.003,
+      "loss": 4.0155,
+      "step": 12727
+    },
+    {
+      "epoch": 0.12728,
+      "grad_norm": 1.3007863463493645,
+      "learning_rate": 0.003,
+      "loss": 4.0355,
+      "step": 12728
+    },
+    {
+      "epoch": 0.12729,
+      "grad_norm": 1.249819730679953,
+      "learning_rate": 0.003,
+      "loss": 4.0354,
+      "step": 12729
+    },
+    {
+      "epoch": 0.1273,
+      "grad_norm": 1.0103597361587262,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 12730
+    },
+    {
+      "epoch": 0.12731,
+      "grad_norm": 1.3138143045183632,
+      "learning_rate": 0.003,
+      "loss": 4.003,
+      "step": 12731
+    },
+    {
+      "epoch": 0.12732,
+      "grad_norm": 1.1308795357386041,
+      "learning_rate": 0.003,
+      "loss": 4.0022,
+      "step": 12732
+    },
+    {
+      "epoch": 0.12733,
+      "grad_norm": 1.008945679356882,
+      "learning_rate": 0.003,
+      "loss": 4.0354,
+      "step": 12733
+    },
+    {
+      "epoch": 0.12734,
+      "grad_norm": 1.3302092880616652,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 12734
+    },
+    {
+      "epoch": 0.12735,
+      "grad_norm": 1.264541944097203,
+      "learning_rate": 0.003,
+      "loss": 4.0278,
+      "step": 12735
+    },
+    {
+      "epoch": 0.12736,
+      "grad_norm": 1.1314818993430544,
+      "learning_rate": 0.003,
+      "loss": 4.0384,
+      "step": 12736
+    },
+    {
+      "epoch": 0.12737,
+      "grad_norm": 1.2516386048861714,
+      "learning_rate": 0.003,
+      "loss": 4.0413,
+      "step": 12737
+    },
+    {
+      "epoch": 0.12738,
+      "grad_norm": 1.131131341631946,
+      "learning_rate": 0.003,
+      "loss": 4.0355,
+      "step": 12738
+    },
+    {
+      "epoch": 0.12739,
+      "grad_norm": 1.156530685800842,
+      "learning_rate": 0.003,
+      "loss": 4.0323,
+      "step": 12739
+    },
+    {
+      "epoch": 0.1274,
+      "grad_norm": 1.2694955448613015,
+      "learning_rate": 0.003,
+      "loss": 4.0157,
+      "step": 12740
+    },
+    {
+      "epoch": 0.12741,
+      "grad_norm": 1.1550920436159788,
+      "learning_rate": 0.003,
+      "loss": 4.0314,
+      "step": 12741
+    },
+    {
+      "epoch": 0.12742,
+      "grad_norm": 1.3406628844910542,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 12742
+    },
+    {
+      "epoch": 0.12743,
+      "grad_norm": 1.0580149423492489,
+      "learning_rate": 0.003,
+      "loss": 4.0085,
+      "step": 12743
+    },
+    {
+      "epoch": 0.12744,
+      "grad_norm": 1.5671618568755727,
+      "learning_rate": 0.003,
+      "loss": 4.0229,
+      "step": 12744
+    },
+    {
+      "epoch": 0.12745,
+      "grad_norm": 0.9666293075162874,
+      "learning_rate": 0.003,
+      "loss": 4.0378,
+      "step": 12745
+    },
+    {
+      "epoch": 0.12746,
+      "grad_norm": 1.2172568887775441,
+      "learning_rate": 0.003,
+      "loss": 4.0487,
+      "step": 12746
+    },
+    {
+      "epoch": 0.12747,
+      "grad_norm": 0.9342543557121035,
+      "learning_rate": 0.003,
+      "loss": 4.0367,
+      "step": 12747
+    },
+    {
+      "epoch": 0.12748,
+      "grad_norm": 1.3005333446109317,
+      "learning_rate": 0.003,
+      "loss": 4.0434,
+      "step": 12748
+    },
+    {
+      "epoch": 0.12749,
+      "grad_norm": 1.2021938544731907,
+      "learning_rate": 0.003,
+      "loss": 4.0318,
+      "step": 12749
+    },
+    {
+      "epoch": 0.1275,
+      "grad_norm": 1.2152856263128113,
+      "learning_rate": 0.003,
+      "loss": 4.0387,
+      "step": 12750
+    },
+    {
+      "epoch": 0.12751,
+      "grad_norm": 1.0638444261787958,
+      "learning_rate": 0.003,
+      "loss": 4.0458,
+      "step": 12751
+    },
+    {
+      "epoch": 0.12752,
+      "grad_norm": 1.3389818000295493,
+      "learning_rate": 0.003,
+      "loss": 4.0343,
+      "step": 12752
+    },
+    {
+      "epoch": 0.12753,
+      "grad_norm": 1.270707387228433,
+      "learning_rate": 0.003,
+      "loss": 4.0224,
+      "step": 12753
+    },
+    {
+      "epoch": 0.12754,
+      "grad_norm": 1.13608709343689,
+      "learning_rate": 0.003,
+      "loss": 4.0492,
+      "step": 12754
+    },
+    {
+      "epoch": 0.12755,
+      "grad_norm": 1.3147802356213878,
+      "learning_rate": 0.003,
+      "loss": 4.0194,
+      "step": 12755
+    },
+    {
+      "epoch": 0.12756,
+      "grad_norm": 1.2108384992780414,
+      "learning_rate": 0.003,
+      "loss": 4.037,
+      "step": 12756
+    },
+    {
+      "epoch": 0.12757,
+      "grad_norm": 1.0753953018228892,
+      "learning_rate": 0.003,
+      "loss": 4.0234,
+      "step": 12757
+    },
+    {
+      "epoch": 0.12758,
+      "grad_norm": 1.7678007404208398,
+      "learning_rate": 0.003,
+      "loss": 4.0301,
+      "step": 12758
+    },
+    {
+      "epoch": 0.12759,
+      "grad_norm": 0.8688622010901259,
+      "learning_rate": 0.003,
+      "loss": 4.0172,
+      "step": 12759
+    },
+    {
+      "epoch": 0.1276,
+      "grad_norm": 1.2116409016348908,
+      "learning_rate": 0.003,
+      "loss": 4.0395,
+      "step": 12760
+    },
+    {
+      "epoch": 0.12761,
+      "grad_norm": 1.254787253319999,
+      "learning_rate": 0.003,
+      "loss": 4.0295,
+      "step": 12761
+    },
+    {
+      "epoch": 0.12762,
+      "grad_norm": 1.1585454805799762,
+      "learning_rate": 0.003,
+      "loss": 3.994,
+      "step": 12762
+    },
+    {
+      "epoch": 0.12763,
+      "grad_norm": 1.513644571729462,
+      "learning_rate": 0.003,
+      "loss": 3.9971,
+      "step": 12763
+    },
+    {
+      "epoch": 0.12764,
+      "grad_norm": 0.9027207563494971,
+      "learning_rate": 0.003,
+      "loss": 4.0467,
+      "step": 12764
+    },
+    {
+      "epoch": 0.12765,
+      "grad_norm": 1.317176094864067,
+      "learning_rate": 0.003,
+      "loss": 4.0385,
+      "step": 12765
+    },
+    {
+      "epoch": 0.12766,
+      "grad_norm": 1.117017075660199,
+      "learning_rate": 0.003,
+      "loss": 4.038,
+      "step": 12766
+    },
+    {
+      "epoch": 0.12767,
+      "grad_norm": 1.1414308429206412,
+      "learning_rate": 0.003,
+      "loss": 4.0258,
+      "step": 12767
+    },
+    {
+      "epoch": 0.12768,
+      "grad_norm": 1.2669417304006838,
+      "learning_rate": 0.003,
+      "loss": 4.0487,
+      "step": 12768
+    },
+    {
+      "epoch": 0.12769,
+      "grad_norm": 0.9801818112259263,
+      "learning_rate": 0.003,
+      "loss": 4.0247,
+      "step": 12769
+    },
+    {
+      "epoch": 0.1277,
+      "grad_norm": 1.2263495100618897,
+      "learning_rate": 0.003,
+      "loss": 3.993,
+      "step": 12770
+    },
+    {
+      "epoch": 0.12771,
+      "grad_norm": 1.1259193359926396,
+      "learning_rate": 0.003,
+      "loss": 4.0239,
+      "step": 12771
+    },
+    {
+      "epoch": 0.12772,
+      "grad_norm": 1.2606850167055357,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 12772
+    },
+    {
+      "epoch": 0.12773,
+      "grad_norm": 1.1914645637275918,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 12773
+    },
+    {
+      "epoch": 0.12774,
+      "grad_norm": 1.110448610605845,
+      "learning_rate": 0.003,
+      "loss": 3.978,
+      "step": 12774
+    },
+    {
+      "epoch": 0.12775,
+      "grad_norm": 1.4574492731231232,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 12775
+    },
+    {
+      "epoch": 0.12776,
+      "grad_norm": 1.2136472323541958,
+      "learning_rate": 0.003,
+      "loss": 4.0186,
+      "step": 12776
+    },
+    {
+      "epoch": 0.12777,
+      "grad_norm": 1.1822253563491354,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 12777
+    },
+    {
+      "epoch": 0.12778,
+      "grad_norm": 1.0520996981570394,
+      "learning_rate": 0.003,
+      "loss": 4.02,
+      "step": 12778
+    },
+    {
+      "epoch": 0.12779,
+      "grad_norm": 1.2831106263336869,
+      "learning_rate": 0.003,
+      "loss": 4.0436,
+      "step": 12779
+    },
+    {
+      "epoch": 0.1278,
+      "grad_norm": 1.0560172383499893,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 12780
+    },
+    {
+      "epoch": 0.12781,
+      "grad_norm": 1.258889909849877,
+      "learning_rate": 0.003,
+      "loss": 4.006,
+      "step": 12781
+    },
+    {
+      "epoch": 0.12782,
+      "grad_norm": 1.2827369883028994,
+      "learning_rate": 0.003,
+      "loss": 4.0381,
+      "step": 12782
+    },
+    {
+      "epoch": 0.12783,
+      "grad_norm": 1.2007328133659945,
+      "learning_rate": 0.003,
+      "loss": 4.0291,
+      "step": 12783
+    },
+    {
+      "epoch": 0.12784,
+      "grad_norm": 1.2077859948776573,
+      "learning_rate": 0.003,
+      "loss": 4.0215,
+      "step": 12784
+    },
+    {
+      "epoch": 0.12785,
+      "grad_norm": 1.1480481310658968,
+      "learning_rate": 0.003,
+      "loss": 3.9951,
+      "step": 12785
+    },
+    {
+      "epoch": 0.12786,
+      "grad_norm": 1.2382022064067508,
+      "learning_rate": 0.003,
+      "loss": 4.026,
+      "step": 12786
+    },
+    {
+      "epoch": 0.12787,
+      "grad_norm": 0.9824667158119392,
+      "learning_rate": 0.003,
+      "loss": 4.0213,
+      "step": 12787
+    },
+    {
+      "epoch": 0.12788,
+      "grad_norm": 1.1508760952959747,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 12788
+    },
+    {
+      "epoch": 0.12789,
+      "grad_norm": 1.1160820774172333,
+      "learning_rate": 0.003,
+      "loss": 4.0122,
+      "step": 12789
+    },
+    {
+      "epoch": 0.1279,
+      "grad_norm": 1.2883683988031678,
+      "learning_rate": 0.003,
+      "loss": 4.047,
+      "step": 12790
+    },
+    {
+      "epoch": 0.12791,
+      "grad_norm": 1.333631695602577,
+      "learning_rate": 0.003,
+      "loss": 4.0311,
+      "step": 12791
+    },
+    {
+      "epoch": 0.12792,
+      "grad_norm": 1.069325969160836,
+      "learning_rate": 0.003,
+      "loss": 4.0264,
+      "step": 12792
+    },
+    {
+      "epoch": 0.12793,
+      "grad_norm": 1.3669166797853736,
+      "learning_rate": 0.003,
+      "loss": 4.0325,
+      "step": 12793
+    },
+    {
+      "epoch": 0.12794,
+      "grad_norm": 1.1649933110212536,
+      "learning_rate": 0.003,
+      "loss": 4.0234,
+      "step": 12794
+    },
+    {
+      "epoch": 0.12795,
+      "grad_norm": 1.330830664464028,
+      "learning_rate": 0.003,
+      "loss": 4.0249,
+      "step": 12795
+    },
+    {
+      "epoch": 0.12796,
+      "grad_norm": 1.1422954086481265,
+      "learning_rate": 0.003,
+      "loss": 3.9971,
+      "step": 12796
+    },
+    {
+      "epoch": 0.12797,
+      "grad_norm": 1.5008484587331534,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 12797
+    },
+    {
+      "epoch": 0.12798,
+      "grad_norm": 1.0499892932988404,
+      "learning_rate": 0.003,
+      "loss": 3.9983,
+      "step": 12798
+    },
+    {
+      "epoch": 0.12799,
+      "grad_norm": 1.1323962859092773,
+      "learning_rate": 0.003,
+      "loss": 4.0331,
+      "step": 12799
+    },
+    {
+      "epoch": 0.128,
+      "grad_norm": 1.0851630911927252,
+      "learning_rate": 0.003,
+      "loss": 4.0063,
+      "step": 12800
+    },
+    {
+      "epoch": 0.12801,
+      "grad_norm": 1.5752077380787768,
+      "learning_rate": 0.003,
+      "loss": 4.0331,
+      "step": 12801
+    },
+    {
+      "epoch": 0.12802,
+      "grad_norm": 1.1017740945533272,
+      "learning_rate": 0.003,
+      "loss": 4.0666,
+      "step": 12802
+    },
+    {
+      "epoch": 0.12803,
+      "grad_norm": 1.1667952025678203,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 12803
+    },
+    {
+      "epoch": 0.12804,
+      "grad_norm": 1.0195581846750794,
+      "learning_rate": 0.003,
+      "loss": 4.0214,
+      "step": 12804
+    },
+    {
+      "epoch": 0.12805,
+      "grad_norm": 1.2081431236473223,
+      "learning_rate": 0.003,
+      "loss": 4.0325,
+      "step": 12805
+    },
+    {
+      "epoch": 0.12806,
+      "grad_norm": 0.9719602406726462,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 12806
+    },
+    {
+      "epoch": 0.12807,
+      "grad_norm": 1.299116883426473,
+      "learning_rate": 0.003,
+      "loss": 4.0398,
+      "step": 12807
+    },
+    {
+      "epoch": 0.12808,
+      "grad_norm": 0.9210572627915543,
+      "learning_rate": 0.003,
+      "loss": 4.0238,
+      "step": 12808
+    },
+    {
+      "epoch": 0.12809,
+      "grad_norm": 1.0176288806302762,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 12809
+    },
+    {
+      "epoch": 0.1281,
+      "grad_norm": 1.179455454339618,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 12810
+    },
+    {
+      "epoch": 0.12811,
+      "grad_norm": 1.125011783055268,
+      "learning_rate": 0.003,
+      "loss": 4.0261,
+      "step": 12811
+    },
+    {
+      "epoch": 0.12812,
+      "grad_norm": 1.2823431624618908,
+      "learning_rate": 0.003,
+      "loss": 4.0334,
+      "step": 12812
+    },
+    {
+      "epoch": 0.12813,
+      "grad_norm": 1.0853685286518326,
+      "learning_rate": 0.003,
+      "loss": 4.036,
+      "step": 12813
+    },
+    {
+      "epoch": 0.12814,
+      "grad_norm": 1.3098030352639953,
+      "learning_rate": 0.003,
+      "loss": 4.0447,
+      "step": 12814
+    },
+    {
+      "epoch": 0.12815,
+      "grad_norm": 1.4009011882707822,
+      "learning_rate": 0.003,
+      "loss": 4.0068,
+      "step": 12815
+    },
+    {
+      "epoch": 0.12816,
+      "grad_norm": 1.0624113021821955,
+      "learning_rate": 0.003,
+      "loss": 4.0191,
+      "step": 12816
+    },
+    {
+      "epoch": 0.12817,
+      "grad_norm": 1.5273471712819569,
+      "learning_rate": 0.003,
+      "loss": 4.0298,
+      "step": 12817
+    },
+    {
+      "epoch": 0.12818,
+      "grad_norm": 1.1622392449946617,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 12818
+    },
+    {
+      "epoch": 0.12819,
+      "grad_norm": 1.0737180829548525,
+      "learning_rate": 0.003,
+      "loss": 4.0338,
+      "step": 12819
+    },
+    {
+      "epoch": 0.1282,
+      "grad_norm": 1.5028275338727308,
+      "learning_rate": 0.003,
+      "loss": 4.0077,
+      "step": 12820
+    },
+    {
+      "epoch": 0.12821,
+      "grad_norm": 0.9820781601781966,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 12821
+    },
+    {
+      "epoch": 0.12822,
+      "grad_norm": 1.189653346367063,
+      "learning_rate": 0.003,
+      "loss": 4.0519,
+      "step": 12822
+    },
+    {
+      "epoch": 0.12823,
+      "grad_norm": 1.305750372775048,
+      "learning_rate": 0.003,
+      "loss": 4.0459,
+      "step": 12823
+    },
+    {
+      "epoch": 0.12824,
+      "grad_norm": 1.1085853553524894,
+      "learning_rate": 0.003,
+      "loss": 4.0309,
+      "step": 12824
+    },
+    {
+      "epoch": 0.12825,
+      "grad_norm": 1.3521052760748702,
+      "learning_rate": 0.003,
+      "loss": 4.0332,
+      "step": 12825
+    },
+    {
+      "epoch": 0.12826,
+      "grad_norm": 1.1339361194151034,
+      "learning_rate": 0.003,
+      "loss": 4.0349,
+      "step": 12826
+    },
+    {
+      "epoch": 0.12827,
+      "grad_norm": 1.2919587450515737,
+      "learning_rate": 0.003,
+      "loss": 4.0309,
+      "step": 12827
+    },
+    {
+      "epoch": 0.12828,
+      "grad_norm": 1.1162216951237776,
+      "learning_rate": 0.003,
+      "loss": 4.0531,
+      "step": 12828
+    },
+    {
+      "epoch": 0.12829,
+      "grad_norm": 1.1035553365879642,
+      "learning_rate": 0.003,
+      "loss": 4.0063,
+      "step": 12829
+    },
+    {
+      "epoch": 0.1283,
+      "grad_norm": 1.418334221315941,
+      "learning_rate": 0.003,
+      "loss": 4.0361,
+      "step": 12830
+    },
+    {
+      "epoch": 0.12831,
+      "grad_norm": 0.827587958149206,
+      "learning_rate": 0.003,
+      "loss": 4.0078,
+      "step": 12831
+    },
+    {
+      "epoch": 0.12832,
+      "grad_norm": 0.9941792427885942,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 12832
+    },
+    {
+      "epoch": 0.12833,
+      "grad_norm": 1.3415460248468394,
+      "learning_rate": 0.003,
+      "loss": 4.0737,
+      "step": 12833
+    },
+    {
+      "epoch": 0.12834,
+      "grad_norm": 1.5454290749754083,
+      "learning_rate": 0.003,
+      "loss": 4.0281,
+      "step": 12834
+    },
+    {
+      "epoch": 0.12835,
+      "grad_norm": 0.966935009632508,
+      "learning_rate": 0.003,
+      "loss": 4.0187,
+      "step": 12835
+    },
+    {
+      "epoch": 0.12836,
+      "grad_norm": 1.3383965177659312,
+      "learning_rate": 0.003,
+      "loss": 4.0295,
+      "step": 12836
+    },
+    {
+      "epoch": 0.12837,
+      "grad_norm": 1.214439584671967,
+      "learning_rate": 0.003,
+      "loss": 4.0577,
+      "step": 12837
+    },
+    {
+      "epoch": 0.12838,
+      "grad_norm": 1.2522112846423572,
+      "learning_rate": 0.003,
+      "loss": 4.0294,
+      "step": 12838
+    },
+    {
+      "epoch": 0.12839,
+      "grad_norm": 1.114227336414836,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 12839
+    },
+    {
+      "epoch": 0.1284,
+      "grad_norm": 1.3933317053817007,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 12840
+    },
+    {
+      "epoch": 0.12841,
+      "grad_norm": 0.9084725468732007,
+      "learning_rate": 0.003,
+      "loss": 3.9874,
+      "step": 12841
+    },
+    {
+      "epoch": 0.12842,
+      "grad_norm": 1.3593545948818664,
+      "learning_rate": 0.003,
+      "loss": 4.0289,
+      "step": 12842
+    },
+    {
+      "epoch": 0.12843,
+      "grad_norm": 1.05750875955245,
+      "learning_rate": 0.003,
+      "loss": 4.0273,
+      "step": 12843
+    },
+    {
+      "epoch": 0.12844,
+      "grad_norm": 1.2623165134390104,
+      "learning_rate": 0.003,
+      "loss": 4.0229,
+      "step": 12844
+    },
+    {
+      "epoch": 0.12845,
+      "grad_norm": 1.215934308443997,
+      "learning_rate": 0.003,
+      "loss": 4.0122,
+      "step": 12845
+    },
+    {
+      "epoch": 0.12846,
+      "grad_norm": 1.2534991068204633,
+      "learning_rate": 0.003,
+      "loss": 4.0233,
+      "step": 12846
+    },
+    {
+      "epoch": 0.12847,
+      "grad_norm": 1.2460794845808603,
+      "learning_rate": 0.003,
+      "loss": 4.0396,
+      "step": 12847
+    },
+    {
+      "epoch": 0.12848,
+      "grad_norm": 1.1933751931734735,
+      "learning_rate": 0.003,
+      "loss": 4.0368,
+      "step": 12848
+    },
+    {
+      "epoch": 0.12849,
+      "grad_norm": 1.166245408826738,
+      "learning_rate": 0.003,
+      "loss": 3.9932,
+      "step": 12849
+    },
+    {
+      "epoch": 0.1285,
+      "grad_norm": 1.1464404870864988,
+      "learning_rate": 0.003,
+      "loss": 4.003,
+      "step": 12850
+    },
+    {
+      "epoch": 0.12851,
+      "grad_norm": 1.043965368615515,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 12851
+    },
+    {
+      "epoch": 0.12852,
+      "grad_norm": 1.6275648542340913,
+      "learning_rate": 0.003,
+      "loss": 4.0339,
+      "step": 12852
+    },
+    {
+      "epoch": 0.12853,
+      "grad_norm": 0.9973713390567429,
+      "learning_rate": 0.003,
+      "loss": 4.0275,
+      "step": 12853
+    },
+    {
+      "epoch": 0.12854,
+      "grad_norm": 1.3145129655582115,
+      "learning_rate": 0.003,
+      "loss": 4.0469,
+      "step": 12854
+    },
+    {
+      "epoch": 0.12855,
+      "grad_norm": 0.9255561273968774,
+      "learning_rate": 0.003,
+      "loss": 3.9876,
+      "step": 12855
+    },
+    {
+      "epoch": 0.12856,
+      "grad_norm": 1.123206468742272,
+      "learning_rate": 0.003,
+      "loss": 4.0082,
+      "step": 12856
+    },
+    {
+      "epoch": 0.12857,
+      "grad_norm": 1.165459338103236,
+      "learning_rate": 0.003,
+      "loss": 4.0018,
+      "step": 12857
+    },
+    {
+      "epoch": 0.12858,
+      "grad_norm": 1.1367968392625207,
+      "learning_rate": 0.003,
+      "loss": 4.0375,
+      "step": 12858
+    },
+    {
+      "epoch": 0.12859,
+      "grad_norm": 0.9960241599180506,
+      "learning_rate": 0.003,
+      "loss": 4.0013,
+      "step": 12859
+    },
+    {
+      "epoch": 0.1286,
+      "grad_norm": 1.3943574392367513,
+      "learning_rate": 0.003,
+      "loss": 4.0267,
+      "step": 12860
+    },
+    {
+      "epoch": 0.12861,
+      "grad_norm": 1.0839718347871652,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 12861
+    },
+    {
+      "epoch": 0.12862,
+      "grad_norm": 1.450534960996423,
+      "learning_rate": 0.003,
+      "loss": 4.0196,
+      "step": 12862
+    },
+    {
+      "epoch": 0.12863,
+      "grad_norm": 1.2124543848819562,
+      "learning_rate": 0.003,
+      "loss": 4.039,
+      "step": 12863
+    },
+    {
+      "epoch": 0.12864,
+      "grad_norm": 1.0730704444607297,
+      "learning_rate": 0.003,
+      "loss": 4.0279,
+      "step": 12864
+    },
+    {
+      "epoch": 0.12865,
+      "grad_norm": 1.416501422991841,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 12865
+    },
+    {
+      "epoch": 0.12866,
+      "grad_norm": 1.2864051589898484,
+      "learning_rate": 0.003,
+      "loss": 4.0359,
+      "step": 12866
+    },
+    {
+      "epoch": 0.12867,
+      "grad_norm": 1.3012900075342926,
+      "learning_rate": 0.003,
+      "loss": 4.007,
+      "step": 12867
+    },
+    {
+      "epoch": 0.12868,
+      "grad_norm": 1.2316271420711697,
+      "learning_rate": 0.003,
+      "loss": 4.0222,
+      "step": 12868
+    },
+    {
+      "epoch": 0.12869,
+      "grad_norm": 1.0559148720019054,
+      "learning_rate": 0.003,
+      "loss": 3.9927,
+      "step": 12869
+    },
+    {
+      "epoch": 0.1287,
+      "grad_norm": 1.2076505185344402,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 12870
+    },
+    {
+      "epoch": 0.12871,
+      "grad_norm": 1.0351257024278024,
+      "learning_rate": 0.003,
+      "loss": 4.0555,
+      "step": 12871
+    },
+    {
+      "epoch": 0.12872,
+      "grad_norm": 1.4946297034159481,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 12872
+    },
+    {
+      "epoch": 0.12873,
+      "grad_norm": 1.0444761583654851,
+      "learning_rate": 0.003,
+      "loss": 4.0116,
+      "step": 12873
+    },
+    {
+      "epoch": 0.12874,
+      "grad_norm": 1.4111365525307598,
+      "learning_rate": 0.003,
+      "loss": 4.0379,
+      "step": 12874
+    },
+    {
+      "epoch": 0.12875,
+      "grad_norm": 0.92305991229768,
+      "learning_rate": 0.003,
+      "loss": 4.0383,
+      "step": 12875
+    },
+    {
+      "epoch": 0.12876,
+      "grad_norm": 1.181940558244761,
+      "learning_rate": 0.003,
+      "loss": 4.0214,
+      "step": 12876
+    },
+    {
+      "epoch": 0.12877,
+      "grad_norm": 1.3588258265079936,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 12877
+    },
+    {
+      "epoch": 0.12878,
+      "grad_norm": 1.1435193997564612,
+      "learning_rate": 0.003,
+      "loss": 4.0322,
+      "step": 12878
+    },
+    {
+      "epoch": 0.12879,
+      "grad_norm": 1.1931087094596353,
+      "learning_rate": 0.003,
+      "loss": 3.9982,
+      "step": 12879
+    },
+    {
+      "epoch": 0.1288,
+      "grad_norm": 1.0300788873313722,
+      "learning_rate": 0.003,
+      "loss": 4.0035,
+      "step": 12880
+    },
+    {
+      "epoch": 0.12881,
+      "grad_norm": 1.2086016131275303,
+      "learning_rate": 0.003,
+      "loss": 4.0306,
+      "step": 12881
+    },
+    {
+      "epoch": 0.12882,
+      "grad_norm": 1.0888737394809258,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 12882
+    },
+    {
+      "epoch": 0.12883,
+      "grad_norm": 1.2692961787043513,
+      "learning_rate": 0.003,
+      "loss": 4.0299,
+      "step": 12883
+    },
+    {
+      "epoch": 0.12884,
+      "grad_norm": 0.906168836593753,
+      "learning_rate": 0.003,
+      "loss": 4.0259,
+      "step": 12884
+    },
+    {
+      "epoch": 0.12885,
+      "grad_norm": 1.0714132168907917,
+      "learning_rate": 0.003,
+      "loss": 4.0012,
+      "step": 12885
+    },
+    {
+      "epoch": 0.12886,
+      "grad_norm": 1.384801008569906,
+      "learning_rate": 0.003,
+      "loss": 4.0257,
+      "step": 12886
+    },
+    {
+      "epoch": 0.12887,
+      "grad_norm": 1.0782291140817764,
+      "learning_rate": 0.003,
+      "loss": 4.0427,
+      "step": 12887
+    },
+    {
+      "epoch": 0.12888,
+      "grad_norm": 1.2316065046066518,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 12888
+    },
+    {
+      "epoch": 0.12889,
+      "grad_norm": 1.2766952978758708,
+      "learning_rate": 0.003,
+      "loss": 4.0465,
+      "step": 12889
+    },
+    {
+      "epoch": 0.1289,
+      "grad_norm": 1.14895904230218,
+      "learning_rate": 0.003,
+      "loss": 4.0254,
+      "step": 12890
+    },
+    {
+      "epoch": 0.12891,
+      "grad_norm": 1.1935543230517358,
+      "learning_rate": 0.003,
+      "loss": 4.008,
+      "step": 12891
+    },
+    {
+      "epoch": 0.12892,
+      "grad_norm": 1.0722825731155168,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 12892
+    },
+    {
+      "epoch": 0.12893,
+      "grad_norm": 1.1006109032398521,
+      "learning_rate": 0.003,
+      "loss": 4.066,
+      "step": 12893
+    },
+    {
+      "epoch": 0.12894,
+      "grad_norm": 1.1575330038873581,
+      "learning_rate": 0.003,
+      "loss": 4.0277,
+      "step": 12894
+    },
+    {
+      "epoch": 0.12895,
+      "grad_norm": 1.4045037649238294,
+      "learning_rate": 0.003,
+      "loss": 4.0416,
+      "step": 12895
+    },
+    {
+      "epoch": 0.12896,
+      "grad_norm": 1.2226745359152975,
+      "learning_rate": 0.003,
+      "loss": 4.0167,
+      "step": 12896
+    },
+    {
+      "epoch": 0.12897,
+      "grad_norm": 0.9810855290951486,
+      "learning_rate": 0.003,
+      "loss": 3.9741,
+      "step": 12897
+    },
+    {
+      "epoch": 0.12898,
+      "grad_norm": 1.4321113882827925,
+      "learning_rate": 0.003,
+      "loss": 4.0228,
+      "step": 12898
+    },
+    {
+      "epoch": 0.12899,
+      "grad_norm": 0.9279563463843422,
+      "learning_rate": 0.003,
+      "loss": 4.0446,
+      "step": 12899
+    },
+    {
+      "epoch": 0.129,
+      "grad_norm": 1.0112843463751415,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 12900
+    },
+    {
+      "epoch": 0.12901,
+      "grad_norm": 1.258312102527465,
+      "learning_rate": 0.003,
+      "loss": 4.0304,
+      "step": 12901
+    },
+    {
+      "epoch": 0.12902,
+      "grad_norm": 1.2599438181290643,
+      "learning_rate": 0.003,
+      "loss": 4.0285,
+      "step": 12902
+    },
+    {
+      "epoch": 0.12903,
+      "grad_norm": 1.3168883187604847,
+      "learning_rate": 0.003,
+      "loss": 4.0511,
+      "step": 12903
+    },
+    {
+      "epoch": 0.12904,
+      "grad_norm": 1.158850205061437,
+      "learning_rate": 0.003,
+      "loss": 3.99,
+      "step": 12904
+    },
+    {
+      "epoch": 0.12905,
+      "grad_norm": 1.2534304093157755,
+      "learning_rate": 0.003,
+      "loss": 4.0135,
+      "step": 12905
+    },
+    {
+      "epoch": 0.12906,
+      "grad_norm": 1.2279071281854934,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 12906
+    },
+    {
+      "epoch": 0.12907,
+      "grad_norm": 1.3135077430669198,
+      "learning_rate": 0.003,
+      "loss": 4.0312,
+      "step": 12907
+    },
+    {
+      "epoch": 0.12908,
+      "grad_norm": 1.1340062936234097,
+      "learning_rate": 0.003,
+      "loss": 3.9859,
+      "step": 12908
+    },
+    {
+      "epoch": 0.12909,
+      "grad_norm": 1.2105010880042733,
+      "learning_rate": 0.003,
+      "loss": 4.0292,
+      "step": 12909
+    },
+    {
+      "epoch": 0.1291,
+      "grad_norm": 1.035585642600207,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 12910
+    },
+    {
+      "epoch": 0.12911,
+      "grad_norm": 1.2677876907556864,
+      "learning_rate": 0.003,
+      "loss": 4.0239,
+      "step": 12911
+    },
+    {
+      "epoch": 0.12912,
+      "grad_norm": 1.1098026814924011,
+      "learning_rate": 0.003,
+      "loss": 3.9937,
+      "step": 12912
+    },
+    {
+      "epoch": 0.12913,
+      "grad_norm": 1.3247567294192373,
+      "learning_rate": 0.003,
+      "loss": 4.0299,
+      "step": 12913
+    },
+    {
+      "epoch": 0.12914,
+      "grad_norm": 1.087353870991555,
+      "learning_rate": 0.003,
+      "loss": 4.0313,
+      "step": 12914
+    },
+    {
+      "epoch": 0.12915,
+      "grad_norm": 1.3235097791685535,
+      "learning_rate": 0.003,
+      "loss": 3.9984,
+      "step": 12915
+    },
+    {
+      "epoch": 0.12916,
+      "grad_norm": 1.0418896283143477,
+      "learning_rate": 0.003,
+      "loss": 4.0398,
+      "step": 12916
+    },
+    {
+      "epoch": 0.12917,
+      "grad_norm": 1.430243661406629,
+      "learning_rate": 0.003,
+      "loss": 4.0376,
+      "step": 12917
+    },
+    {
+      "epoch": 0.12918,
+      "grad_norm": 1.244820526694616,
+      "learning_rate": 0.003,
+      "loss": 4.0539,
+      "step": 12918
+    },
+    {
+      "epoch": 0.12919,
+      "grad_norm": 1.0491507724949716,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 12919
+    },
+    {
+      "epoch": 0.1292,
+      "grad_norm": 1.7564519294617458,
+      "learning_rate": 0.003,
+      "loss": 4.0342,
+      "step": 12920
+    },
+    {
+      "epoch": 0.12921,
+      "grad_norm": 0.9365160750153148,
+      "learning_rate": 0.003,
+      "loss": 4.0348,
+      "step": 12921
+    },
+    {
+      "epoch": 0.12922,
+      "grad_norm": 1.2570769167876839,
+      "learning_rate": 0.003,
+      "loss": 4.035,
+      "step": 12922
+    },
+    {
+      "epoch": 0.12923,
+      "grad_norm": 1.1340761136316975,
+      "learning_rate": 0.003,
+      "loss": 3.9872,
+      "step": 12923
+    },
+    {
+      "epoch": 0.12924,
+      "grad_norm": 1.1839216294414823,
+      "learning_rate": 0.003,
+      "loss": 4.0409,
+      "step": 12924
+    },
+    {
+      "epoch": 0.12925,
+      "grad_norm": 1.2177068177432462,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 12925
+    },
+    {
+      "epoch": 0.12926,
+      "grad_norm": 1.3158018270836227,
+      "learning_rate": 0.003,
+      "loss": 4.0311,
+      "step": 12926
+    },
+    {
+      "epoch": 0.12927,
+      "grad_norm": 1.0917985819220148,
+      "learning_rate": 0.003,
+      "loss": 4.0207,
+      "step": 12927
+    },
+    {
+      "epoch": 0.12928,
+      "grad_norm": 1.2406607657891981,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 12928
+    },
+    {
+      "epoch": 0.12929,
+      "grad_norm": 1.2452029167820236,
+      "learning_rate": 0.003,
+      "loss": 4.0092,
+      "step": 12929
+    },
+    {
+      "epoch": 0.1293,
+      "grad_norm": 1.0704826750375576,
+      "learning_rate": 0.003,
+      "loss": 4.0279,
+      "step": 12930
+    },
+    {
+      "epoch": 0.12931,
+      "grad_norm": 1.1059105445383641,
+      "learning_rate": 0.003,
+      "loss": 4.0223,
+      "step": 12931
+    },
+    {
+      "epoch": 0.12932,
+      "grad_norm": 1.2032685945299755,
+      "learning_rate": 0.003,
+      "loss": 4.0209,
+      "step": 12932
+    },
+    {
+      "epoch": 0.12933,
+      "grad_norm": 1.139197150088746,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 12933
+    },
+    {
+      "epoch": 0.12934,
+      "grad_norm": 1.1274527096083895,
+      "learning_rate": 0.003,
+      "loss": 4.03,
+      "step": 12934
+    },
+    {
+      "epoch": 0.12935,
+      "grad_norm": 1.507321804546948,
+      "learning_rate": 0.003,
+      "loss": 4.0295,
+      "step": 12935
+    },
+    {
+      "epoch": 0.12936,
+      "grad_norm": 0.975323122122746,
+      "learning_rate": 0.003,
+      "loss": 4.0329,
+      "step": 12936
+    },
+    {
+      "epoch": 0.12937,
+      "grad_norm": 1.3623939109449617,
+      "learning_rate": 0.003,
+      "loss": 4.0061,
+      "step": 12937
+    },
+    {
+      "epoch": 0.12938,
+      "grad_norm": 1.057288137364664,
+      "learning_rate": 0.003,
+      "loss": 4.04,
+      "step": 12938
+    },
+    {
+      "epoch": 0.12939,
+      "grad_norm": 1.3259203102570047,
+      "learning_rate": 0.003,
+      "loss": 4.0089,
+      "step": 12939
+    },
+    {
+      "epoch": 0.1294,
+      "grad_norm": 1.110701965074797,
+      "learning_rate": 0.003,
+      "loss": 4.0251,
+      "step": 12940
+    },
+    {
+      "epoch": 0.12941,
+      "grad_norm": 1.287210392735098,
+      "learning_rate": 0.003,
+      "loss": 4.0264,
+      "step": 12941
+    },
+    {
+      "epoch": 0.12942,
+      "grad_norm": 1.1518747016361184,
+      "learning_rate": 0.003,
+      "loss": 4.0146,
+      "step": 12942
+    },
+    {
+      "epoch": 0.12943,
+      "grad_norm": 1.1900719699662106,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 12943
+    },
+    {
+      "epoch": 0.12944,
+      "grad_norm": 1.154771928089284,
+      "learning_rate": 0.003,
+      "loss": 4.0259,
+      "step": 12944
+    },
+    {
+      "epoch": 0.12945,
+      "grad_norm": 1.1482591004768885,
+      "learning_rate": 0.003,
+      "loss": 4.0358,
+      "step": 12945
+    },
+    {
+      "epoch": 0.12946,
+      "grad_norm": 1.0932439918983072,
+      "learning_rate": 0.003,
+      "loss": 4.0436,
+      "step": 12946
+    },
+    {
+      "epoch": 0.12947,
+      "grad_norm": 1.2097249296318278,
+      "learning_rate": 0.003,
+      "loss": 4.0155,
+      "step": 12947
+    },
+    {
+      "epoch": 0.12948,
+      "grad_norm": 1.2986622397477905,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 12948
+    },
+    {
+      "epoch": 0.12949,
+      "grad_norm": 1.3825525145439725,
+      "learning_rate": 0.003,
+      "loss": 4.022,
+      "step": 12949
+    },
+    {
+      "epoch": 0.1295,
+      "grad_norm": 1.1758601196064937,
+      "learning_rate": 0.003,
+      "loss": 4.0188,
+      "step": 12950
+    },
+    {
+      "epoch": 0.12951,
+      "grad_norm": 1.3591353151221124,
+      "learning_rate": 0.003,
+      "loss": 4.0242,
+      "step": 12951
+    },
+    {
+      "epoch": 0.12952,
+      "grad_norm": 0.9131650280317642,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 12952
+    },
+    {
+      "epoch": 0.12953,
+      "grad_norm": 1.1014974217693525,
+      "learning_rate": 0.003,
+      "loss": 4.0339,
+      "step": 12953
+    },
+    {
+      "epoch": 0.12954,
+      "grad_norm": 1.3026270117165117,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 12954
+    },
+    {
+      "epoch": 0.12955,
+      "grad_norm": 1.2136899689336957,
+      "learning_rate": 0.003,
+      "loss": 4.0387,
+      "step": 12955
+    },
+    {
+      "epoch": 0.12956,
+      "grad_norm": 1.1081746289222236,
+      "learning_rate": 0.003,
+      "loss": 4.0093,
+      "step": 12956
+    },
+    {
+      "epoch": 0.12957,
+      "grad_norm": 1.3558413742243949,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 12957
+    },
+    {
+      "epoch": 0.12958,
+      "grad_norm": 1.2985873728581943,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 12958
+    },
+    {
+      "epoch": 0.12959,
+      "grad_norm": 1.0915415838285472,
+      "learning_rate": 0.003,
+      "loss": 4.0429,
+      "step": 12959
+    },
+    {
+      "epoch": 0.1296,
+      "grad_norm": 1.2271488870813643,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 12960
+    },
+    {
+      "epoch": 0.12961,
+      "grad_norm": 1.0660449140643198,
+      "learning_rate": 0.003,
+      "loss": 4.0498,
+      "step": 12961
+    },
+    {
+      "epoch": 0.12962,
+      "grad_norm": 1.2713070418720303,
+      "learning_rate": 0.003,
+      "loss": 4.0366,
+      "step": 12962
+    },
+    {
+      "epoch": 0.12963,
+      "grad_norm": 1.1140722977923936,
+      "learning_rate": 0.003,
+      "loss": 4.0267,
+      "step": 12963
+    },
+    {
+      "epoch": 0.12964,
+      "grad_norm": 1.4334928816984343,
+      "learning_rate": 0.003,
+      "loss": 4.0224,
+      "step": 12964
+    },
+    {
+      "epoch": 0.12965,
+      "grad_norm": 1.0798644543572762,
+      "learning_rate": 0.003,
+      "loss": 4.0005,
+      "step": 12965
+    },
+    {
+      "epoch": 0.12966,
+      "grad_norm": 1.3576702711558502,
+      "learning_rate": 0.003,
+      "loss": 4.0198,
+      "step": 12966
+    },
+    {
+      "epoch": 0.12967,
+      "grad_norm": 0.9565988586229004,
+      "learning_rate": 0.003,
+      "loss": 3.9703,
+      "step": 12967
+    },
+    {
+      "epoch": 0.12968,
+      "grad_norm": 1.356676054202947,
+      "learning_rate": 0.003,
+      "loss": 4.0367,
+      "step": 12968
+    },
+    {
+      "epoch": 0.12969,
+      "grad_norm": 1.1836635707869203,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 12969
+    },
+    {
+      "epoch": 0.1297,
+      "grad_norm": 1.1187549800265795,
+      "learning_rate": 0.003,
+      "loss": 3.9948,
+      "step": 12970
+    },
+    {
+      "epoch": 0.12971,
+      "grad_norm": 1.1920675764967605,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 12971
+    },
+    {
+      "epoch": 0.12972,
+      "grad_norm": 1.1969678957485745,
+      "learning_rate": 0.003,
+      "loss": 4.0236,
+      "step": 12972
+    },
+    {
+      "epoch": 0.12973,
+      "grad_norm": 1.1830367876527692,
+      "learning_rate": 0.003,
+      "loss": 4.0571,
+      "step": 12973
+    },
+    {
+      "epoch": 0.12974,
+      "grad_norm": 1.094989770680786,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 12974
+    },
+    {
+      "epoch": 0.12975,
+      "grad_norm": 1.2852262825752716,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 12975
+    },
+    {
+      "epoch": 0.12976,
+      "grad_norm": 1.2513488888113358,
+      "learning_rate": 0.003,
+      "loss": 4.0376,
+      "step": 12976
+    },
+    {
+      "epoch": 0.12977,
+      "grad_norm": 1.160455662683476,
+      "learning_rate": 0.003,
+      "loss": 4.0386,
+      "step": 12977
+    },
+    {
+      "epoch": 0.12978,
+      "grad_norm": 1.1863743123144554,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 12978
+    },
+    {
+      "epoch": 0.12979,
+      "grad_norm": 0.9523514728095331,
+      "learning_rate": 0.003,
+      "loss": 3.9928,
+      "step": 12979
+    },
+    {
+      "epoch": 0.1298,
+      "grad_norm": 1.2746725183316685,
+      "learning_rate": 0.003,
+      "loss": 3.9987,
+      "step": 12980
+    },
+    {
+      "epoch": 0.12981,
+      "grad_norm": 1.2113630031469498,
+      "learning_rate": 0.003,
+      "loss": 4.0215,
+      "step": 12981
+    },
+    {
+      "epoch": 0.12982,
+      "grad_norm": 1.4401568049490077,
+      "learning_rate": 0.003,
+      "loss": 4.0547,
+      "step": 12982
+    },
+    {
+      "epoch": 0.12983,
+      "grad_norm": 1.0589139916757784,
+      "learning_rate": 0.003,
+      "loss": 3.9969,
+      "step": 12983
+    },
+    {
+      "epoch": 0.12984,
+      "grad_norm": 1.3111421740836104,
+      "learning_rate": 0.003,
+      "loss": 4.0294,
+      "step": 12984
+    },
+    {
+      "epoch": 0.12985,
+      "grad_norm": 1.0916263279984209,
+      "learning_rate": 0.003,
+      "loss": 4.0249,
+      "step": 12985
+    },
+    {
+      "epoch": 0.12986,
+      "grad_norm": 1.3150320490843068,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 12986
+    },
+    {
+      "epoch": 0.12987,
+      "grad_norm": 1.1622979768305781,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 12987
+    },
+    {
+      "epoch": 0.12988,
+      "grad_norm": 1.1795490087421945,
+      "learning_rate": 0.003,
+      "loss": 4.0261,
+      "step": 12988
+    },
+    {
+      "epoch": 0.12989,
+      "grad_norm": 1.2023289153396604,
+      "learning_rate": 0.003,
+      "loss": 4.0204,
+      "step": 12989
+    },
+    {
+      "epoch": 0.1299,
+      "grad_norm": 1.2904015849325297,
+      "learning_rate": 0.003,
+      "loss": 4.0321,
+      "step": 12990
+    },
+    {
+      "epoch": 0.12991,
+      "grad_norm": 1.1788631395030478,
+      "learning_rate": 0.003,
+      "loss": 4.0092,
+      "step": 12991
+    },
+    {
+      "epoch": 0.12992,
+      "grad_norm": 1.1325099422571487,
+      "learning_rate": 0.003,
+      "loss": 4.0346,
+      "step": 12992
+    },
+    {
+      "epoch": 0.12993,
+      "grad_norm": 1.2285007962804115,
+      "learning_rate": 0.003,
+      "loss": 4.0035,
+      "step": 12993
+    },
+    {
+      "epoch": 0.12994,
+      "grad_norm": 1.1110998831328704,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 12994
+    },
+    {
+      "epoch": 0.12995,
+      "grad_norm": 1.345079656718258,
+      "learning_rate": 0.003,
+      "loss": 4.0001,
+      "step": 12995
+    },
+    {
+      "epoch": 0.12996,
+      "grad_norm": 1.3307329461460866,
+      "learning_rate": 0.003,
+      "loss": 3.995,
+      "step": 12996
+    },
+    {
+      "epoch": 0.12997,
+      "grad_norm": 1.1226943235816225,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 12997
+    },
+    {
+      "epoch": 0.12998,
+      "grad_norm": 1.3592512691421177,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 12998
+    },
+    {
+      "epoch": 0.12999,
+      "grad_norm": 1.0450028186646942,
+      "learning_rate": 0.003,
+      "loss": 3.9929,
+      "step": 12999
+    },
+    {
+      "epoch": 0.13,
+      "grad_norm": 1.1867332569892648,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 13000
+    },
+    {
+      "epoch": 0.13001,
+      "grad_norm": 1.23639273204252,
+      "learning_rate": 0.003,
+      "loss": 4.0223,
+      "step": 13001
+    },
+    {
+      "epoch": 0.13002,
+      "grad_norm": 1.1831666531849891,
+      "learning_rate": 0.003,
+      "loss": 3.9997,
+      "step": 13002
+    },
+    {
+      "epoch": 0.13003,
+      "grad_norm": 1.2823332562222987,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 13003
+    },
+    {
+      "epoch": 0.13004,
+      "grad_norm": 1.191483777431203,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 13004
+    },
+    {
+      "epoch": 0.13005,
+      "grad_norm": 1.1804479129189784,
+      "learning_rate": 0.003,
+      "loss": 4.0109,
+      "step": 13005
+    },
+    {
+      "epoch": 0.13006,
+      "grad_norm": 1.0660348758813278,
+      "learning_rate": 0.003,
+      "loss": 3.9914,
+      "step": 13006
+    },
+    {
+      "epoch": 0.13007,
+      "grad_norm": 1.1998590835283771,
+      "learning_rate": 0.003,
+      "loss": 4.0391,
+      "step": 13007
+    },
+    {
+      "epoch": 0.13008,
+      "grad_norm": 1.211992067571615,
+      "learning_rate": 0.003,
+      "loss": 4.0412,
+      "step": 13008
+    },
+    {
+      "epoch": 0.13009,
+      "grad_norm": 1.0715092102241917,
+      "learning_rate": 0.003,
+      "loss": 4.0229,
+      "step": 13009
+    },
+    {
+      "epoch": 0.1301,
+      "grad_norm": 1.2311289310569815,
+      "learning_rate": 0.003,
+      "loss": 4.0276,
+      "step": 13010
+    },
+    {
+      "epoch": 0.13011,
+      "grad_norm": 1.1482130599826008,
+      "learning_rate": 0.003,
+      "loss": 4.0312,
+      "step": 13011
+    },
+    {
+      "epoch": 0.13012,
+      "grad_norm": 1.419970438601161,
+      "learning_rate": 0.003,
+      "loss": 4.0515,
+      "step": 13012
+    },
+    {
+      "epoch": 0.13013,
+      "grad_norm": 1.289807647099006,
+      "learning_rate": 0.003,
+      "loss": 4.0521,
+      "step": 13013
+    },
+    {
+      "epoch": 0.13014,
+      "grad_norm": 1.3265352693337469,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 13014
+    },
+    {
+      "epoch": 0.13015,
+      "grad_norm": 1.3345475639012292,
+      "learning_rate": 0.003,
+      "loss": 4.0378,
+      "step": 13015
+    },
+    {
+      "epoch": 0.13016,
+      "grad_norm": 1.0368363723912866,
+      "learning_rate": 0.003,
+      "loss": 4.0047,
+      "step": 13016
+    },
+    {
+      "epoch": 0.13017,
+      "grad_norm": 1.2877451811752048,
+      "learning_rate": 0.003,
+      "loss": 4.0402,
+      "step": 13017
+    },
+    {
+      "epoch": 0.13018,
+      "grad_norm": 1.012908939390002,
+      "learning_rate": 0.003,
+      "loss": 4.0053,
+      "step": 13018
+    },
+    {
+      "epoch": 0.13019,
+      "grad_norm": 1.3703226825238353,
+      "learning_rate": 0.003,
+      "loss": 4.043,
+      "step": 13019
+    },
+    {
+      "epoch": 0.1302,
+      "grad_norm": 1.176604438017887,
+      "learning_rate": 0.003,
+      "loss": 4.0187,
+      "step": 13020
+    },
+    {
+      "epoch": 0.13021,
+      "grad_norm": 1.1122871690777643,
+      "learning_rate": 0.003,
+      "loss": 4.0214,
+      "step": 13021
+    },
+    {
+      "epoch": 0.13022,
+      "grad_norm": 1.2974145454477173,
+      "learning_rate": 0.003,
+      "loss": 4.041,
+      "step": 13022
+    },
+    {
+      "epoch": 0.13023,
+      "grad_norm": 1.2352521460749524,
+      "learning_rate": 0.003,
+      "loss": 4.0361,
+      "step": 13023
+    },
+    {
+      "epoch": 0.13024,
+      "grad_norm": 1.168606954054629,
+      "learning_rate": 0.003,
+      "loss": 3.9779,
+      "step": 13024
+    },
+    {
+      "epoch": 0.13025,
+      "grad_norm": 1.2499960934819396,
+      "learning_rate": 0.003,
+      "loss": 4.0228,
+      "step": 13025
+    },
+    {
+      "epoch": 0.13026,
+      "grad_norm": 1.3152707080427926,
+      "learning_rate": 0.003,
+      "loss": 4.039,
+      "step": 13026
+    },
+    {
+      "epoch": 0.13027,
+      "grad_norm": 1.1778120535765901,
+      "learning_rate": 0.003,
+      "loss": 4.002,
+      "step": 13027
+    },
+    {
+      "epoch": 0.13028,
+      "grad_norm": 1.2079691050764194,
+      "learning_rate": 0.003,
+      "loss": 4.0475,
+      "step": 13028
+    },
+    {
+      "epoch": 0.13029,
+      "grad_norm": 1.1071644576188255,
+      "learning_rate": 0.003,
+      "loss": 4.0015,
+      "step": 13029
+    },
+    {
+      "epoch": 0.1303,
+      "grad_norm": 1.3864312407201933,
+      "learning_rate": 0.003,
+      "loss": 4.0497,
+      "step": 13030
+    },
+    {
+      "epoch": 0.13031,
+      "grad_norm": 1.1125770889548465,
+      "learning_rate": 0.003,
+      "loss": 4.0085,
+      "step": 13031
+    },
+    {
+      "epoch": 0.13032,
+      "grad_norm": 1.3413211527215434,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 13032
+    },
+    {
+      "epoch": 0.13033,
+      "grad_norm": 1.06880186736233,
+      "learning_rate": 0.003,
+      "loss": 4.0118,
+      "step": 13033
+    },
+    {
+      "epoch": 0.13034,
+      "grad_norm": 1.2728892725094454,
+      "learning_rate": 0.003,
+      "loss": 4.0502,
+      "step": 13034
+    },
+    {
+      "epoch": 0.13035,
+      "grad_norm": 1.0651150503240214,
+      "learning_rate": 0.003,
+      "loss": 4.038,
+      "step": 13035
+    },
+    {
+      "epoch": 0.13036,
+      "grad_norm": 1.1830519127271366,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 13036
+    },
+    {
+      "epoch": 0.13037,
+      "grad_norm": 1.1141922529152608,
+      "learning_rate": 0.003,
+      "loss": 4.0172,
+      "step": 13037
+    },
+    {
+      "epoch": 0.13038,
+      "grad_norm": 1.3343469496900424,
+      "learning_rate": 0.003,
+      "loss": 4.0483,
+      "step": 13038
+    },
+    {
+      "epoch": 0.13039,
+      "grad_norm": 1.134356082858109,
+      "learning_rate": 0.003,
+      "loss": 4.0446,
+      "step": 13039
+    },
+    {
+      "epoch": 0.1304,
+      "grad_norm": 1.0985262524667785,
+      "learning_rate": 0.003,
+      "loss": 4.0089,
+      "step": 13040
+    },
+    {
+      "epoch": 0.13041,
+      "grad_norm": 1.4877606344783099,
+      "learning_rate": 0.003,
+      "loss": 4.0347,
+      "step": 13041
+    },
+    {
+      "epoch": 0.13042,
+      "grad_norm": 1.1653508801524972,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 13042
+    },
+    {
+      "epoch": 0.13043,
+      "grad_norm": 1.1754164423686313,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 13043
+    },
+    {
+      "epoch": 0.13044,
+      "grad_norm": 1.3302602473205494,
+      "learning_rate": 0.003,
+      "loss": 4.0015,
+      "step": 13044
+    },
+    {
+      "epoch": 0.13045,
+      "grad_norm": 1.0904545751149235,
+      "learning_rate": 0.003,
+      "loss": 4.0118,
+      "step": 13045
+    },
+    {
+      "epoch": 0.13046,
+      "grad_norm": 1.3172464854441723,
+      "learning_rate": 0.003,
+      "loss": 4.0267,
+      "step": 13046
+    },
+    {
+      "epoch": 0.13047,
+      "grad_norm": 1.00886913655674,
+      "learning_rate": 0.003,
+      "loss": 4.0238,
+      "step": 13047
+    },
+    {
+      "epoch": 0.13048,
+      "grad_norm": 1.244453787403923,
+      "learning_rate": 0.003,
+      "loss": 3.9889,
+      "step": 13048
+    },
+    {
+      "epoch": 0.13049,
+      "grad_norm": 0.9503210626593559,
+      "learning_rate": 0.003,
+      "loss": 3.9859,
+      "step": 13049
+    },
+    {
+      "epoch": 0.1305,
+      "grad_norm": 1.1082209982210707,
+      "learning_rate": 0.003,
+      "loss": 3.9874,
+      "step": 13050
+    },
+    {
+      "epoch": 0.13051,
+      "grad_norm": 1.3825316783754058,
+      "learning_rate": 0.003,
+      "loss": 4.0469,
+      "step": 13051
+    },
+    {
+      "epoch": 0.13052,
+      "grad_norm": 1.1065415073757985,
+      "learning_rate": 0.003,
+      "loss": 4.0081,
+      "step": 13052
+    },
+    {
+      "epoch": 0.13053,
+      "grad_norm": 1.2282454715331608,
+      "learning_rate": 0.003,
+      "loss": 4.0016,
+      "step": 13053
+    },
+    {
+      "epoch": 0.13054,
+      "grad_norm": 1.2066879253376723,
+      "learning_rate": 0.003,
+      "loss": 4.0313,
+      "step": 13054
+    },
+    {
+      "epoch": 0.13055,
+      "grad_norm": 0.9873407819705948,
+      "learning_rate": 0.003,
+      "loss": 4.0019,
+      "step": 13055
+    },
+    {
+      "epoch": 0.13056,
+      "grad_norm": 1.3135267445481453,
+      "learning_rate": 0.003,
+      "loss": 4.0065,
+      "step": 13056
+    },
+    {
+      "epoch": 0.13057,
+      "grad_norm": 1.0202575371173688,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 13057
+    },
+    {
+      "epoch": 0.13058,
+      "grad_norm": 1.331106710918081,
+      "learning_rate": 0.003,
+      "loss": 4.0318,
+      "step": 13058
+    },
+    {
+      "epoch": 0.13059,
+      "grad_norm": 1.1490975344521086,
+      "learning_rate": 0.003,
+      "loss": 4.0537,
+      "step": 13059
+    },
+    {
+      "epoch": 0.1306,
+      "grad_norm": 1.3554628358227565,
+      "learning_rate": 0.003,
+      "loss": 4.0347,
+      "step": 13060
+    },
+    {
+      "epoch": 0.13061,
+      "grad_norm": 1.0613734741931096,
+      "learning_rate": 0.003,
+      "loss": 4.0005,
+      "step": 13061
+    },
+    {
+      "epoch": 0.13062,
+      "grad_norm": 1.1786998069131467,
+      "learning_rate": 0.003,
+      "loss": 4.0191,
+      "step": 13062
+    },
+    {
+      "epoch": 0.13063,
+      "grad_norm": 1.122364934303279,
+      "learning_rate": 0.003,
+      "loss": 3.9898,
+      "step": 13063
+    },
+    {
+      "epoch": 0.13064,
+      "grad_norm": 1.245717571216681,
+      "learning_rate": 0.003,
+      "loss": 4.0561,
+      "step": 13064
+    },
+    {
+      "epoch": 0.13065,
+      "grad_norm": 1.2300096300862102,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 13065
+    },
+    {
+      "epoch": 0.13066,
+      "grad_norm": 1.177243977511725,
+      "learning_rate": 0.003,
+      "loss": 4.021,
+      "step": 13066
+    },
+    {
+      "epoch": 0.13067,
+      "grad_norm": 1.121267875399439,
+      "learning_rate": 0.003,
+      "loss": 4.0695,
+      "step": 13067
+    },
+    {
+      "epoch": 0.13068,
+      "grad_norm": 1.4458148439390073,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 13068
+    },
+    {
+      "epoch": 0.13069,
+      "grad_norm": 1.0655288352193661,
+      "learning_rate": 0.003,
+      "loss": 4.0216,
+      "step": 13069
+    },
+    {
+      "epoch": 0.1307,
+      "grad_norm": 1.4673778899232879,
+      "learning_rate": 0.003,
+      "loss": 4.0365,
+      "step": 13070
+    },
+    {
+      "epoch": 0.13071,
+      "grad_norm": 1.0077870922762957,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 13071
+    },
+    {
+      "epoch": 0.13072,
+      "grad_norm": 1.2477720049192509,
+      "learning_rate": 0.003,
+      "loss": 4.016,
+      "step": 13072
+    },
+    {
+      "epoch": 0.13073,
+      "grad_norm": 1.2295747164214397,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 13073
+    },
+    {
+      "epoch": 0.13074,
+      "grad_norm": 1.2677829946765515,
+      "learning_rate": 0.003,
+      "loss": 4.0063,
+      "step": 13074
+    },
+    {
+      "epoch": 0.13075,
+      "grad_norm": 1.3998043440354033,
+      "learning_rate": 0.003,
+      "loss": 4.0349,
+      "step": 13075
+    },
+    {
+      "epoch": 0.13076,
+      "grad_norm": 1.0540236803053258,
+      "learning_rate": 0.003,
+      "loss": 4.0309,
+      "step": 13076
+    },
+    {
+      "epoch": 0.13077,
+      "grad_norm": 1.2607216048499503,
+      "learning_rate": 0.003,
+      "loss": 4.0213,
+      "step": 13077
+    },
+    {
+      "epoch": 0.13078,
+      "grad_norm": 1.0932345623866864,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 13078
+    },
+    {
+      "epoch": 0.13079,
+      "grad_norm": 1.2051592865391518,
+      "learning_rate": 0.003,
+      "loss": 3.991,
+      "step": 13079
+    },
+    {
+      "epoch": 0.1308,
+      "grad_norm": 1.0491942575755948,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 13080
+    },
+    {
+      "epoch": 0.13081,
+      "grad_norm": 1.3062916574774783,
+      "learning_rate": 0.003,
+      "loss": 4.0312,
+      "step": 13081
+    },
+    {
+      "epoch": 0.13082,
+      "grad_norm": 1.1321098357352237,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 13082
+    },
+    {
+      "epoch": 0.13083,
+      "grad_norm": 1.3665990418676934,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 13083
+    },
+    {
+      "epoch": 0.13084,
+      "grad_norm": 0.9978809201908994,
+      "learning_rate": 0.003,
+      "loss": 4.0252,
+      "step": 13084
+    },
+    {
+      "epoch": 0.13085,
+      "grad_norm": 1.373453319634236,
+      "learning_rate": 0.003,
+      "loss": 4.037,
+      "step": 13085
+    },
+    {
+      "epoch": 0.13086,
+      "grad_norm": 0.8948839986805347,
+      "learning_rate": 0.003,
+      "loss": 4.0017,
+      "step": 13086
+    },
+    {
+      "epoch": 0.13087,
+      "grad_norm": 1.1553678542467876,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 13087
+    },
+    {
+      "epoch": 0.13088,
+      "grad_norm": 1.228440641943863,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 13088
+    },
+    {
+      "epoch": 0.13089,
+      "grad_norm": 1.3675739443552888,
+      "learning_rate": 0.003,
+      "loss": 4.0282,
+      "step": 13089
+    },
+    {
+      "epoch": 0.1309,
+      "grad_norm": 1.0849909250003047,
+      "learning_rate": 0.003,
+      "loss": 4.0012,
+      "step": 13090
+    },
+    {
+      "epoch": 0.13091,
+      "grad_norm": 1.2724323504464439,
+      "learning_rate": 0.003,
+      "loss": 4.0249,
+      "step": 13091
+    },
+    {
+      "epoch": 0.13092,
+      "grad_norm": 1.1154728415628237,
+      "learning_rate": 0.003,
+      "loss": 4.026,
+      "step": 13092
+    },
+    {
+      "epoch": 0.13093,
+      "grad_norm": 1.3266592579188943,
+      "learning_rate": 0.003,
+      "loss": 4.0327,
+      "step": 13093
+    },
+    {
+      "epoch": 0.13094,
+      "grad_norm": 1.0431979252943413,
+      "learning_rate": 0.003,
+      "loss": 4.0284,
+      "step": 13094
+    },
+    {
+      "epoch": 0.13095,
+      "grad_norm": 1.2587443347677894,
+      "learning_rate": 0.003,
+      "loss": 4.0204,
+      "step": 13095
+    },
+    {
+      "epoch": 0.13096,
+      "grad_norm": 1.0518579515346653,
+      "learning_rate": 0.003,
+      "loss": 4.021,
+      "step": 13096
+    },
+    {
+      "epoch": 0.13097,
+      "grad_norm": 1.182379232046843,
+      "learning_rate": 0.003,
+      "loss": 4.046,
+      "step": 13097
+    },
+    {
+      "epoch": 0.13098,
+      "grad_norm": 1.2441723313218485,
+      "learning_rate": 0.003,
+      "loss": 4.0307,
+      "step": 13098
+    },
+    {
+      "epoch": 0.13099,
+      "grad_norm": 1.1745868481759494,
+      "learning_rate": 0.003,
+      "loss": 4.031,
+      "step": 13099
+    },
+    {
+      "epoch": 0.131,
+      "grad_norm": 1.2466331476420034,
+      "learning_rate": 0.003,
+      "loss": 4.0306,
+      "step": 13100
+    },
+    {
+      "epoch": 0.13101,
+      "grad_norm": 1.111399149137611,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 13101
+    },
+    {
+      "epoch": 0.13102,
+      "grad_norm": 1.3801332479480273,
+      "learning_rate": 0.003,
+      "loss": 4.0194,
+      "step": 13102
+    },
+    {
+      "epoch": 0.13103,
+      "grad_norm": 1.0319688794090096,
+      "learning_rate": 0.003,
+      "loss": 4.0385,
+      "step": 13103
+    },
+    {
+      "epoch": 0.13104,
+      "grad_norm": 1.352104538176443,
+      "learning_rate": 0.003,
+      "loss": 4.0308,
+      "step": 13104
+    },
+    {
+      "epoch": 0.13105,
+      "grad_norm": 0.9840982994262835,
+      "learning_rate": 0.003,
+      "loss": 4.012,
+      "step": 13105
+    },
+    {
+      "epoch": 0.13106,
+      "grad_norm": 1.2720508957564811,
+      "learning_rate": 0.003,
+      "loss": 4.0048,
+      "step": 13106
+    },
+    {
+      "epoch": 0.13107,
+      "grad_norm": 1.3582344819108931,
+      "learning_rate": 0.003,
+      "loss": 4.0181,
+      "step": 13107
+    },
+    {
+      "epoch": 0.13108,
+      "grad_norm": 1.263872749869849,
+      "learning_rate": 0.003,
+      "loss": 4.0074,
+      "step": 13108
+    },
+    {
+      "epoch": 0.13109,
+      "grad_norm": 1.3146328515584427,
+      "learning_rate": 0.003,
+      "loss": 4.0211,
+      "step": 13109
+    },
+    {
+      "epoch": 0.1311,
+      "grad_norm": 1.1142588293129503,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 13110
+    },
+    {
+      "epoch": 0.13111,
+      "grad_norm": 1.2948103531876165,
+      "learning_rate": 0.003,
+      "loss": 4.0077,
+      "step": 13111
+    },
+    {
+      "epoch": 0.13112,
+      "grad_norm": 1.2131004643968781,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 13112
+    },
+    {
+      "epoch": 0.13113,
+      "grad_norm": 1.10642883210753,
+      "learning_rate": 0.003,
+      "loss": 4.0505,
+      "step": 13113
+    },
+    {
+      "epoch": 0.13114,
+      "grad_norm": 1.2028397006048634,
+      "learning_rate": 0.003,
+      "loss": 3.9916,
+      "step": 13114
+    },
+    {
+      "epoch": 0.13115,
+      "grad_norm": 0.9699969253696146,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 13115
+    },
+    {
+      "epoch": 0.13116,
+      "grad_norm": 1.244963431503124,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 13116
+    },
+    {
+      "epoch": 0.13117,
+      "grad_norm": 1.0532682402073905,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 13117
+    },
+    {
+      "epoch": 0.13118,
+      "grad_norm": 1.1743842407713163,
+      "learning_rate": 0.003,
+      "loss": 4.0439,
+      "step": 13118
+    },
+    {
+      "epoch": 0.13119,
+      "grad_norm": 1.1241278412152182,
+      "learning_rate": 0.003,
+      "loss": 4.0053,
+      "step": 13119
+    },
+    {
+      "epoch": 0.1312,
+      "grad_norm": 1.2176277731495335,
+      "learning_rate": 0.003,
+      "loss": 4.0363,
+      "step": 13120
+    },
+    {
+      "epoch": 0.13121,
+      "grad_norm": 1.2384334922856706,
+      "learning_rate": 0.003,
+      "loss": 4.0172,
+      "step": 13121
+    },
+    {
+      "epoch": 0.13122,
+      "grad_norm": 1.3559900193532226,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 13122
+    },
+    {
+      "epoch": 0.13123,
+      "grad_norm": 1.0763654633297999,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 13123
+    },
+    {
+      "epoch": 0.13124,
+      "grad_norm": 1.1916683197228783,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 13124
+    },
+    {
+      "epoch": 0.13125,
+      "grad_norm": 1.227531132739174,
+      "learning_rate": 0.003,
+      "loss": 4.0213,
+      "step": 13125
+    },
+    {
+      "epoch": 0.13126,
+      "grad_norm": 1.0960208761266186,
+      "learning_rate": 0.003,
+      "loss": 4.0241,
+      "step": 13126
+    },
+    {
+      "epoch": 0.13127,
+      "grad_norm": 1.2337908088108465,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 13127
+    },
+    {
+      "epoch": 0.13128,
+      "grad_norm": 1.1628289901082414,
+      "learning_rate": 0.003,
+      "loss": 3.9923,
+      "step": 13128
+    },
+    {
+      "epoch": 0.13129,
+      "grad_norm": 1.6903240617545117,
+      "learning_rate": 0.003,
+      "loss": 4.0324,
+      "step": 13129
+    },
+    {
+      "epoch": 0.1313,
+      "grad_norm": 1.0718491893109336,
+      "learning_rate": 0.003,
+      "loss": 4.0342,
+      "step": 13130
+    },
+    {
+      "epoch": 0.13131,
+      "grad_norm": 1.389295634459969,
+      "learning_rate": 0.003,
+      "loss": 4.0378,
+      "step": 13131
+    },
+    {
+      "epoch": 0.13132,
+      "grad_norm": 0.9675178513023274,
+      "learning_rate": 0.003,
+      "loss": 4.0347,
+      "step": 13132
+    },
+    {
+      "epoch": 0.13133,
+      "grad_norm": 1.2463845794263217,
+      "learning_rate": 0.003,
+      "loss": 4.0547,
+      "step": 13133
+    },
+    {
+      "epoch": 0.13134,
+      "grad_norm": 1.303025274762317,
+      "learning_rate": 0.003,
+      "loss": 4.0142,
+      "step": 13134
+    },
+    {
+      "epoch": 0.13135,
+      "grad_norm": 1.1598289958201813,
+      "learning_rate": 0.003,
+      "loss": 4.02,
+      "step": 13135
+    },
+    {
+      "epoch": 0.13136,
+      "grad_norm": 1.3105925553909337,
+      "learning_rate": 0.003,
+      "loss": 4.0305,
+      "step": 13136
+    },
+    {
+      "epoch": 0.13137,
+      "grad_norm": 1.1781013394534319,
+      "learning_rate": 0.003,
+      "loss": 4.0074,
+      "step": 13137
+    },
+    {
+      "epoch": 0.13138,
+      "grad_norm": 1.1794371671889319,
+      "learning_rate": 0.003,
+      "loss": 4.0035,
+      "step": 13138
+    },
+    {
+      "epoch": 0.13139,
+      "grad_norm": 1.2986139091265898,
+      "learning_rate": 0.003,
+      "loss": 4.0357,
+      "step": 13139
+    },
+    {
+      "epoch": 0.1314,
+      "grad_norm": 0.989722674097084,
+      "learning_rate": 0.003,
+      "loss": 4.0324,
+      "step": 13140
+    },
+    {
+      "epoch": 0.13141,
+      "grad_norm": 1.3343673770114424,
+      "learning_rate": 0.003,
+      "loss": 4.0348,
+      "step": 13141
+    },
+    {
+      "epoch": 0.13142,
+      "grad_norm": 1.1470785700967723,
+      "learning_rate": 0.003,
+      "loss": 4.0561,
+      "step": 13142
+    },
+    {
+      "epoch": 0.13143,
+      "grad_norm": 1.1589060396306303,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 13143
+    },
+    {
+      "epoch": 0.13144,
+      "grad_norm": 1.0582465386323356,
+      "learning_rate": 0.003,
+      "loss": 4.0263,
+      "step": 13144
+    },
+    {
+      "epoch": 0.13145,
+      "grad_norm": 1.3672232216600506,
+      "learning_rate": 0.003,
+      "loss": 4.0046,
+      "step": 13145
+    },
+    {
+      "epoch": 0.13146,
+      "grad_norm": 1.1879858947806368,
+      "learning_rate": 0.003,
+      "loss": 4.0107,
+      "step": 13146
+    },
+    {
+      "epoch": 0.13147,
+      "grad_norm": 1.20843180027465,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 13147
+    },
+    {
+      "epoch": 0.13148,
+      "grad_norm": 1.3432584402500107,
+      "learning_rate": 0.003,
+      "loss": 4.0494,
+      "step": 13148
+    },
+    {
+      "epoch": 0.13149,
+      "grad_norm": 0.9095709723571204,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 13149
+    },
+    {
+      "epoch": 0.1315,
+      "grad_norm": 1.057646553778702,
+      "learning_rate": 0.003,
+      "loss": 4.0181,
+      "step": 13150
+    },
+    {
+      "epoch": 0.13151,
+      "grad_norm": 1.3610467106606845,
+      "learning_rate": 0.003,
+      "loss": 4.0239,
+      "step": 13151
+    },
+    {
+      "epoch": 0.13152,
+      "grad_norm": 1.0777210898069285,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 13152
+    },
+    {
+      "epoch": 0.13153,
+      "grad_norm": 1.3859619564262946,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 13153
+    },
+    {
+      "epoch": 0.13154,
+      "grad_norm": 1.0844304505512883,
+      "learning_rate": 0.003,
+      "loss": 3.9906,
+      "step": 13154
+    },
+    {
+      "epoch": 0.13155,
+      "grad_norm": 1.2359660173968072,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 13155
+    },
+    {
+      "epoch": 0.13156,
+      "grad_norm": 1.3677030719934207,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 13156
+    },
+    {
+      "epoch": 0.13157,
+      "grad_norm": 0.9833401342037275,
+      "learning_rate": 0.003,
+      "loss": 4.0597,
+      "step": 13157
+    },
+    {
+      "epoch": 0.13158,
+      "grad_norm": 1.1062735484981285,
+      "learning_rate": 0.003,
+      "loss": 4.0153,
+      "step": 13158
+    },
+    {
+      "epoch": 0.13159,
+      "grad_norm": 1.1254248542885985,
+      "learning_rate": 0.003,
+      "loss": 4.0134,
+      "step": 13159
+    },
+    {
+      "epoch": 0.1316,
+      "grad_norm": 1.1120092920802103,
+      "learning_rate": 0.003,
+      "loss": 3.9813,
+      "step": 13160
+    },
+    {
+      "epoch": 0.13161,
+      "grad_norm": 1.1989205083321814,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 13161
+    },
+    {
+      "epoch": 0.13162,
+      "grad_norm": 1.1928171487306694,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 13162
+    },
+    {
+      "epoch": 0.13163,
+      "grad_norm": 1.4081902393551464,
+      "learning_rate": 0.003,
+      "loss": 4.0678,
+      "step": 13163
+    },
+    {
+      "epoch": 0.13164,
+      "grad_norm": 1.1583999173018455,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 13164
+    },
+    {
+      "epoch": 0.13165,
+      "grad_norm": 1.3293599987195368,
+      "learning_rate": 0.003,
+      "loss": 4.0325,
+      "step": 13165
+    },
+    {
+      "epoch": 0.13166,
+      "grad_norm": 1.042849775568771,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 13166
+    },
+    {
+      "epoch": 0.13167,
+      "grad_norm": 1.179654456448683,
+      "learning_rate": 0.003,
+      "loss": 4.0351,
+      "step": 13167
+    },
+    {
+      "epoch": 0.13168,
+      "grad_norm": 1.0543994115678994,
+      "learning_rate": 0.003,
+      "loss": 4.0376,
+      "step": 13168
+    },
+    {
+      "epoch": 0.13169,
+      "grad_norm": 1.3091653230611913,
+      "learning_rate": 0.003,
+      "loss": 3.9994,
+      "step": 13169
+    },
+    {
+      "epoch": 0.1317,
+      "grad_norm": 1.1789551231881399,
+      "learning_rate": 0.003,
+      "loss": 4.0317,
+      "step": 13170
+    },
+    {
+      "epoch": 0.13171,
+      "grad_norm": 1.2679235757864307,
+      "learning_rate": 0.003,
+      "loss": 4.0541,
+      "step": 13171
+    },
+    {
+      "epoch": 0.13172,
+      "grad_norm": 1.0121238818707832,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 13172
+    },
+    {
+      "epoch": 0.13173,
+      "grad_norm": 1.3674161299963243,
+      "learning_rate": 0.003,
+      "loss": 3.9998,
+      "step": 13173
+    },
+    {
+      "epoch": 0.13174,
+      "grad_norm": 1.1333333512368313,
+      "learning_rate": 0.003,
+      "loss": 4.0503,
+      "step": 13174
+    },
+    {
+      "epoch": 0.13175,
+      "grad_norm": 1.263735157123079,
+      "learning_rate": 0.003,
+      "loss": 3.9857,
+      "step": 13175
+    },
+    {
+      "epoch": 0.13176,
+      "grad_norm": 1.4329249743192631,
+      "learning_rate": 0.003,
+      "loss": 4.022,
+      "step": 13176
+    },
+    {
+      "epoch": 0.13177,
+      "grad_norm": 1.113757810778901,
+      "learning_rate": 0.003,
+      "loss": 4.0321,
+      "step": 13177
+    },
+    {
+      "epoch": 0.13178,
+      "grad_norm": 1.2033659807687154,
+      "learning_rate": 0.003,
+      "loss": 4.0169,
+      "step": 13178
+    },
+    {
+      "epoch": 0.13179,
+      "grad_norm": 1.1629366378785435,
+      "learning_rate": 0.003,
+      "loss": 4.0241,
+      "step": 13179
+    },
+    {
+      "epoch": 0.1318,
+      "grad_norm": 1.3602821104802627,
+      "learning_rate": 0.003,
+      "loss": 4.0142,
+      "step": 13180
+    },
+    {
+      "epoch": 0.13181,
+      "grad_norm": 1.2445246941668198,
+      "learning_rate": 0.003,
+      "loss": 4.0018,
+      "step": 13181
+    },
+    {
+      "epoch": 0.13182,
+      "grad_norm": 1.337209314159866,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 13182
+    },
+    {
+      "epoch": 0.13183,
+      "grad_norm": 0.9878572274233712,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 13183
+    },
+    {
+      "epoch": 0.13184,
+      "grad_norm": 1.0960981251343198,
+      "learning_rate": 0.003,
+      "loss": 4.0242,
+      "step": 13184
+    },
+    {
+      "epoch": 0.13185,
+      "grad_norm": 1.3102949978898657,
+      "learning_rate": 0.003,
+      "loss": 4.0429,
+      "step": 13185
+    },
+    {
+      "epoch": 0.13186,
+      "grad_norm": 1.205314079023528,
+      "learning_rate": 0.003,
+      "loss": 4.0378,
+      "step": 13186
+    },
+    {
+      "epoch": 0.13187,
+      "grad_norm": 1.0255720665353423,
+      "learning_rate": 0.003,
+      "loss": 3.9929,
+      "step": 13187
+    },
+    {
+      "epoch": 0.13188,
+      "grad_norm": 1.3262167257541027,
+      "learning_rate": 0.003,
+      "loss": 4.0222,
+      "step": 13188
+    },
+    {
+      "epoch": 0.13189,
+      "grad_norm": 1.2168641109639486,
+      "learning_rate": 0.003,
+      "loss": 3.9894,
+      "step": 13189
+    },
+    {
+      "epoch": 0.1319,
+      "grad_norm": 1.1889265011218615,
+      "learning_rate": 0.003,
+      "loss": 4.0534,
+      "step": 13190
+    },
+    {
+      "epoch": 0.13191,
+      "grad_norm": 1.3805581899749395,
+      "learning_rate": 0.003,
+      "loss": 4.0491,
+      "step": 13191
+    },
+    {
+      "epoch": 0.13192,
+      "grad_norm": 1.0413867655514306,
+      "learning_rate": 0.003,
+      "loss": 3.9898,
+      "step": 13192
+    },
+    {
+      "epoch": 0.13193,
+      "grad_norm": 1.2492107648050148,
+      "learning_rate": 0.003,
+      "loss": 3.9758,
+      "step": 13193
+    },
+    {
+      "epoch": 0.13194,
+      "grad_norm": 1.0030085267089808,
+      "learning_rate": 0.003,
+      "loss": 4.0215,
+      "step": 13194
+    },
+    {
+      "epoch": 0.13195,
+      "grad_norm": 1.3532728033233967,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 13195
+    },
+    {
+      "epoch": 0.13196,
+      "grad_norm": 1.3442955676133201,
+      "learning_rate": 0.003,
+      "loss": 3.9847,
+      "step": 13196
+    },
+    {
+      "epoch": 0.13197,
+      "grad_norm": 1.1839299552626144,
+      "learning_rate": 0.003,
+      "loss": 4.0329,
+      "step": 13197
+    },
+    {
+      "epoch": 0.13198,
+      "grad_norm": 1.0302625638882434,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 13198
+    },
+    {
+      "epoch": 0.13199,
+      "grad_norm": 1.2400658727503961,
+      "learning_rate": 0.003,
+      "loss": 4.0281,
+      "step": 13199
+    },
+    {
+      "epoch": 0.132,
+      "grad_norm": 1.1317920385131712,
+      "learning_rate": 0.003,
+      "loss": 4.032,
+      "step": 13200
+    },
+    {
+      "epoch": 0.13201,
+      "grad_norm": 1.3070532346117103,
+      "learning_rate": 0.003,
+      "loss": 3.9995,
+      "step": 13201
+    },
+    {
+      "epoch": 0.13202,
+      "grad_norm": 1.290994390815391,
+      "learning_rate": 0.003,
+      "loss": 4.0345,
+      "step": 13202
+    },
+    {
+      "epoch": 0.13203,
+      "grad_norm": 1.3259193241549494,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 13203
+    },
+    {
+      "epoch": 0.13204,
+      "grad_norm": 1.1783424720493545,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 13204
+    },
+    {
+      "epoch": 0.13205,
+      "grad_norm": 1.170009083173674,
+      "learning_rate": 0.003,
+      "loss": 4.0298,
+      "step": 13205
+    },
+    {
+      "epoch": 0.13206,
+      "grad_norm": 1.423622540381494,
+      "learning_rate": 0.003,
+      "loss": 4.0126,
+      "step": 13206
+    },
+    {
+      "epoch": 0.13207,
+      "grad_norm": 1.0283146612527605,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 13207
+    },
+    {
+      "epoch": 0.13208,
+      "grad_norm": 1.2998154192550682,
+      "learning_rate": 0.003,
+      "loss": 3.9946,
+      "step": 13208
+    },
+    {
+      "epoch": 0.13209,
+      "grad_norm": 0.873572889486228,
+      "learning_rate": 0.003,
+      "loss": 4.0188,
+      "step": 13209
+    },
+    {
+      "epoch": 0.1321,
+      "grad_norm": 1.2389240668521508,
+      "learning_rate": 0.003,
+      "loss": 4.0583,
+      "step": 13210
+    },
+    {
+      "epoch": 0.13211,
+      "grad_norm": 1.5130364765500839,
+      "learning_rate": 0.003,
+      "loss": 4.0453,
+      "step": 13211
+    },
+    {
+      "epoch": 0.13212,
+      "grad_norm": 1.3577792356335452,
+      "learning_rate": 0.003,
+      "loss": 4.025,
+      "step": 13212
+    },
+    {
+      "epoch": 0.13213,
+      "grad_norm": 0.8591220566534142,
+      "learning_rate": 0.003,
+      "loss": 4.0346,
+      "step": 13213
+    },
+    {
+      "epoch": 0.13214,
+      "grad_norm": 0.9734006922172769,
+      "learning_rate": 0.003,
+      "loss": 4.0078,
+      "step": 13214
+    },
+    {
+      "epoch": 0.13215,
+      "grad_norm": 1.3037427579729166,
+      "learning_rate": 0.003,
+      "loss": 4.0296,
+      "step": 13215
+    },
+    {
+      "epoch": 0.13216,
+      "grad_norm": 1.0660492545368943,
+      "learning_rate": 0.003,
+      "loss": 4.0211,
+      "step": 13216
+    },
+    {
+      "epoch": 0.13217,
+      "grad_norm": 1.3276671787509555,
+      "learning_rate": 0.003,
+      "loss": 4.0194,
+      "step": 13217
+    },
+    {
+      "epoch": 0.13218,
+      "grad_norm": 0.9912861724784381,
+      "learning_rate": 0.003,
+      "loss": 4.0264,
+      "step": 13218
+    },
+    {
+      "epoch": 0.13219,
+      "grad_norm": 1.1947328423567711,
+      "learning_rate": 0.003,
+      "loss": 4.0734,
+      "step": 13219
+    },
+    {
+      "epoch": 0.1322,
+      "grad_norm": 1.1324627330357286,
+      "learning_rate": 0.003,
+      "loss": 4.0582,
+      "step": 13220
+    },
+    {
+      "epoch": 0.13221,
+      "grad_norm": 1.2736609006663895,
+      "learning_rate": 0.003,
+      "loss": 4.0284,
+      "step": 13221
+    },
+    {
+      "epoch": 0.13222,
+      "grad_norm": 1.2523335140850878,
+      "learning_rate": 0.003,
+      "loss": 4.0266,
+      "step": 13222
+    },
+    {
+      "epoch": 0.13223,
+      "grad_norm": 1.125126287078147,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 13223
+    },
+    {
+      "epoch": 0.13224,
+      "grad_norm": 1.3849418026736036,
+      "learning_rate": 0.003,
+      "loss": 4.0021,
+      "step": 13224
+    },
+    {
+      "epoch": 0.13225,
+      "grad_norm": 1.1461342791536524,
+      "learning_rate": 0.003,
+      "loss": 3.9898,
+      "step": 13225
+    },
+    {
+      "epoch": 0.13226,
+      "grad_norm": 1.4180290204833317,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 13226
+    },
+    {
+      "epoch": 0.13227,
+      "grad_norm": 1.014686375750144,
+      "learning_rate": 0.003,
+      "loss": 4.0327,
+      "step": 13227
+    },
+    {
+      "epoch": 0.13228,
+      "grad_norm": 1.5037739153801797,
+      "learning_rate": 0.003,
+      "loss": 4.025,
+      "step": 13228
+    },
+    {
+      "epoch": 0.13229,
+      "grad_norm": 1.0446456028567737,
+      "learning_rate": 0.003,
+      "loss": 4.0318,
+      "step": 13229
+    },
+    {
+      "epoch": 0.1323,
+      "grad_norm": 1.2431434834546604,
+      "learning_rate": 0.003,
+      "loss": 4.0093,
+      "step": 13230
+    },
+    {
+      "epoch": 0.13231,
+      "grad_norm": 1.1595233786574235,
+      "learning_rate": 0.003,
+      "loss": 4.0037,
+      "step": 13231
+    },
+    {
+      "epoch": 0.13232,
+      "grad_norm": 1.23700285851049,
+      "learning_rate": 0.003,
+      "loss": 4.0237,
+      "step": 13232
+    },
+    {
+      "epoch": 0.13233,
+      "grad_norm": 1.2414313005494253,
+      "learning_rate": 0.003,
+      "loss": 3.9987,
+      "step": 13233
+    },
+    {
+      "epoch": 0.13234,
+      "grad_norm": 1.4284272611351443,
+      "learning_rate": 0.003,
+      "loss": 4.012,
+      "step": 13234
+    },
+    {
+      "epoch": 0.13235,
+      "grad_norm": 1.1272287828969014,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 13235
+    },
+    {
+      "epoch": 0.13236,
+      "grad_norm": 1.0883267700976904,
+      "learning_rate": 0.003,
+      "loss": 3.9873,
+      "step": 13236
+    },
+    {
+      "epoch": 0.13237,
+      "grad_norm": 1.3095687487861305,
+      "learning_rate": 0.003,
+      "loss": 4.0261,
+      "step": 13237
+    },
+    {
+      "epoch": 0.13238,
+      "grad_norm": 1.178701415006733,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 13238
+    },
+    {
+      "epoch": 0.13239,
+      "grad_norm": 1.2027726294396186,
+      "learning_rate": 0.003,
+      "loss": 4.0167,
+      "step": 13239
+    },
+    {
+      "epoch": 0.1324,
+      "grad_norm": 1.2377893040922567,
+      "learning_rate": 0.003,
+      "loss": 4.0222,
+      "step": 13240
+    },
+    {
+      "epoch": 0.13241,
+      "grad_norm": 1.0267119343524598,
+      "learning_rate": 0.003,
+      "loss": 4.0092,
+      "step": 13241
+    },
+    {
+      "epoch": 0.13242,
+      "grad_norm": 1.3012800497905883,
+      "learning_rate": 0.003,
+      "loss": 4.0477,
+      "step": 13242
+    },
+    {
+      "epoch": 0.13243,
+      "grad_norm": 1.1614741672299007,
+      "learning_rate": 0.003,
+      "loss": 4.0181,
+      "step": 13243
+    },
+    {
+      "epoch": 0.13244,
+      "grad_norm": 1.2023819031296115,
+      "learning_rate": 0.003,
+      "loss": 3.9977,
+      "step": 13244
+    },
+    {
+      "epoch": 0.13245,
+      "grad_norm": 1.1969761782597237,
+      "learning_rate": 0.003,
+      "loss": 3.9801,
+      "step": 13245
+    },
+    {
+      "epoch": 0.13246,
+      "grad_norm": 1.1736874650380753,
+      "learning_rate": 0.003,
+      "loss": 4.0016,
+      "step": 13246
+    },
+    {
+      "epoch": 0.13247,
+      "grad_norm": 1.2217478030413764,
+      "learning_rate": 0.003,
+      "loss": 4.0415,
+      "step": 13247
+    },
+    {
+      "epoch": 0.13248,
+      "grad_norm": 1.195487292425333,
+      "learning_rate": 0.003,
+      "loss": 4.0424,
+      "step": 13248
+    },
+    {
+      "epoch": 0.13249,
+      "grad_norm": 1.3531113906627674,
+      "learning_rate": 0.003,
+      "loss": 4.004,
+      "step": 13249
+    },
+    {
+      "epoch": 0.1325,
+      "grad_norm": 1.1331994653407587,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 13250
+    },
+    {
+      "epoch": 0.13251,
+      "grad_norm": 1.3568915508653279,
+      "learning_rate": 0.003,
+      "loss": 4.0109,
+      "step": 13251
+    },
+    {
+      "epoch": 0.13252,
+      "grad_norm": 1.1022967952260512,
+      "learning_rate": 0.003,
+      "loss": 4.0277,
+      "step": 13252
+    },
+    {
+      "epoch": 0.13253,
+      "grad_norm": 1.4730317933527035,
+      "learning_rate": 0.003,
+      "loss": 3.9895,
+      "step": 13253
+    },
+    {
+      "epoch": 0.13254,
+      "grad_norm": 1.1698133513210476,
+      "learning_rate": 0.003,
+      "loss": 4.0422,
+      "step": 13254
+    },
+    {
+      "epoch": 0.13255,
+      "grad_norm": 1.3006938019160277,
+      "learning_rate": 0.003,
+      "loss": 4.0322,
+      "step": 13255
+    },
+    {
+      "epoch": 0.13256,
+      "grad_norm": 1.1089839963119312,
+      "learning_rate": 0.003,
+      "loss": 4.0322,
+      "step": 13256
+    },
+    {
+      "epoch": 0.13257,
+      "grad_norm": 1.2055086984398902,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 13257
+    },
+    {
+      "epoch": 0.13258,
+      "grad_norm": 1.0517577625863193,
+      "learning_rate": 0.003,
+      "loss": 4.0132,
+      "step": 13258
+    },
+    {
+      "epoch": 0.13259,
+      "grad_norm": 1.0260203890418644,
+      "learning_rate": 0.003,
+      "loss": 4.0371,
+      "step": 13259
+    },
+    {
+      "epoch": 0.1326,
+      "grad_norm": 1.1451620545525771,
+      "learning_rate": 0.003,
+      "loss": 4.0247,
+      "step": 13260
+    },
+    {
+      "epoch": 0.13261,
+      "grad_norm": 1.3573920654937885,
+      "learning_rate": 0.003,
+      "loss": 4.0267,
+      "step": 13261
+    },
+    {
+      "epoch": 0.13262,
+      "grad_norm": 1.2997613200686104,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 13262
+    },
+    {
+      "epoch": 0.13263,
+      "grad_norm": 1.162070533819352,
+      "learning_rate": 0.003,
+      "loss": 4.0068,
+      "step": 13263
+    },
+    {
+      "epoch": 0.13264,
+      "grad_norm": 1.1860611604132822,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 13264
+    },
+    {
+      "epoch": 0.13265,
+      "grad_norm": 1.155878268074166,
+      "learning_rate": 0.003,
+      "loss": 4.0146,
+      "step": 13265
+    },
+    {
+      "epoch": 0.13266,
+      "grad_norm": 1.162897362742571,
+      "learning_rate": 0.003,
+      "loss": 4.0218,
+      "step": 13266
+    },
+    {
+      "epoch": 0.13267,
+      "grad_norm": 1.2566274122158887,
+      "learning_rate": 0.003,
+      "loss": 4.0236,
+      "step": 13267
+    },
+    {
+      "epoch": 0.13268,
+      "grad_norm": 1.0624053962002076,
+      "learning_rate": 0.003,
+      "loss": 4.0187,
+      "step": 13268
+    },
+    {
+      "epoch": 0.13269,
+      "grad_norm": 1.146155331042088,
+      "learning_rate": 0.003,
+      "loss": 3.9927,
+      "step": 13269
+    },
+    {
+      "epoch": 0.1327,
+      "grad_norm": 1.1523816823944344,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 13270
+    },
+    {
+      "epoch": 0.13271,
+      "grad_norm": 1.2822840784270244,
+      "learning_rate": 0.003,
+      "loss": 4.0309,
+      "step": 13271
+    },
+    {
+      "epoch": 0.13272,
+      "grad_norm": 1.0786807154247118,
+      "learning_rate": 0.003,
+      "loss": 3.9956,
+      "step": 13272
+    },
+    {
+      "epoch": 0.13273,
+      "grad_norm": 1.218673056995643,
+      "learning_rate": 0.003,
+      "loss": 4.0104,
+      "step": 13273
+    },
+    {
+      "epoch": 0.13274,
+      "grad_norm": 1.3254738935758086,
+      "learning_rate": 0.003,
+      "loss": 4.0461,
+      "step": 13274
+    },
+    {
+      "epoch": 0.13275,
+      "grad_norm": 1.2485461958782031,
+      "learning_rate": 0.003,
+      "loss": 4.0354,
+      "step": 13275
+    },
+    {
+      "epoch": 0.13276,
+      "grad_norm": 1.0339646828439346,
+      "learning_rate": 0.003,
+      "loss": 4.0037,
+      "step": 13276
+    },
+    {
+      "epoch": 0.13277,
+      "grad_norm": 1.2296936563599417,
+      "learning_rate": 0.003,
+      "loss": 4.0284,
+      "step": 13277
+    },
+    {
+      "epoch": 0.13278,
+      "grad_norm": 1.1719359498015458,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 13278
+    },
+    {
+      "epoch": 0.13279,
+      "grad_norm": 1.2906893737489893,
+      "learning_rate": 0.003,
+      "loss": 4.0382,
+      "step": 13279
+    },
+    {
+      "epoch": 0.1328,
+      "grad_norm": 0.9747260590672138,
+      "learning_rate": 0.003,
+      "loss": 3.9934,
+      "step": 13280
+    },
+    {
+      "epoch": 0.13281,
+      "grad_norm": 1.3295126084209,
+      "learning_rate": 0.003,
+      "loss": 4.0249,
+      "step": 13281
+    },
+    {
+      "epoch": 0.13282,
+      "grad_norm": 1.1543837502411551,
+      "learning_rate": 0.003,
+      "loss": 4.0469,
+      "step": 13282
+    },
+    {
+      "epoch": 0.13283,
+      "grad_norm": 1.3388418810355132,
+      "learning_rate": 0.003,
+      "loss": 4.0333,
+      "step": 13283
+    },
+    {
+      "epoch": 0.13284,
+      "grad_norm": 0.9952350561662376,
+      "learning_rate": 0.003,
+      "loss": 4.0204,
+      "step": 13284
+    },
+    {
+      "epoch": 0.13285,
+      "grad_norm": 1.4499501567801372,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 13285
+    },
+    {
+      "epoch": 0.13286,
+      "grad_norm": 1.066590029594129,
+      "learning_rate": 0.003,
+      "loss": 4.0555,
+      "step": 13286
+    },
+    {
+      "epoch": 0.13287,
+      "grad_norm": 1.388542523326155,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 13287
+    },
+    {
+      "epoch": 0.13288,
+      "grad_norm": 1.242952241814662,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 13288
+    },
+    {
+      "epoch": 0.13289,
+      "grad_norm": 1.2844622720087564,
+      "learning_rate": 0.003,
+      "loss": 4.0503,
+      "step": 13289
+    },
+    {
+      "epoch": 0.1329,
+      "grad_norm": 1.1296034429914248,
+      "learning_rate": 0.003,
+      "loss": 4.0495,
+      "step": 13290
+    },
+    {
+      "epoch": 0.13291,
+      "grad_norm": 1.1817733386973799,
+      "learning_rate": 0.003,
+      "loss": 4.0266,
+      "step": 13291
+    },
+    {
+      "epoch": 0.13292,
+      "grad_norm": 1.227418218840078,
+      "learning_rate": 0.003,
+      "loss": 4.0304,
+      "step": 13292
+    },
+    {
+      "epoch": 0.13293,
+      "grad_norm": 1.2934766559092665,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 13293
+    },
+    {
+      "epoch": 0.13294,
+      "grad_norm": 1.2324210240838256,
+      "learning_rate": 0.003,
+      "loss": 4.0302,
+      "step": 13294
+    },
+    {
+      "epoch": 0.13295,
+      "grad_norm": 1.1190504319413743,
+      "learning_rate": 0.003,
+      "loss": 4.0289,
+      "step": 13295
+    },
+    {
+      "epoch": 0.13296,
+      "grad_norm": 1.3831470071383796,
+      "learning_rate": 0.003,
+      "loss": 4.0655,
+      "step": 13296
+    },
+    {
+      "epoch": 0.13297,
+      "grad_norm": 0.9024796111158896,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 13297
+    },
+    {
+      "epoch": 0.13298,
+      "grad_norm": 1.3130099662344958,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 13298
+    },
+    {
+      "epoch": 0.13299,
+      "grad_norm": 1.1678197485533626,
+      "learning_rate": 0.003,
+      "loss": 4.0711,
+      "step": 13299
+    },
+    {
+      "epoch": 0.133,
+      "grad_norm": 1.2927678212090512,
+      "learning_rate": 0.003,
+      "loss": 4.0329,
+      "step": 13300
+    },
+    {
+      "epoch": 0.13301,
+      "grad_norm": 1.1463633834307947,
+      "learning_rate": 0.003,
+      "loss": 4.0154,
+      "step": 13301
+    },
+    {
+      "epoch": 0.13302,
+      "grad_norm": 1.3051173709107187,
+      "learning_rate": 0.003,
+      "loss": 3.9841,
+      "step": 13302
+    },
+    {
+      "epoch": 0.13303,
+      "grad_norm": 1.136023830327464,
+      "learning_rate": 0.003,
+      "loss": 4.0354,
+      "step": 13303
+    },
+    {
+      "epoch": 0.13304,
+      "grad_norm": 1.231542541977393,
+      "learning_rate": 0.003,
+      "loss": 4.0294,
+      "step": 13304
+    },
+    {
+      "epoch": 0.13305,
+      "grad_norm": 1.0420078614674835,
+      "learning_rate": 0.003,
+      "loss": 4.0462,
+      "step": 13305
+    },
+    {
+      "epoch": 0.13306,
+      "grad_norm": 1.4456939860953055,
+      "learning_rate": 0.003,
+      "loss": 4.0305,
+      "step": 13306
+    },
+    {
+      "epoch": 0.13307,
+      "grad_norm": 0.9463696467956895,
+      "learning_rate": 0.003,
+      "loss": 4.0033,
+      "step": 13307
+    },
+    {
+      "epoch": 0.13308,
+      "grad_norm": 1.372066712252764,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 13308
+    },
+    {
+      "epoch": 0.13309,
+      "grad_norm": 1.2982308263401467,
+      "learning_rate": 0.003,
+      "loss": 4.0082,
+      "step": 13309
+    },
+    {
+      "epoch": 0.1331,
+      "grad_norm": 1.3021473169835807,
+      "learning_rate": 0.003,
+      "loss": 4.0171,
+      "step": 13310
+    },
+    {
+      "epoch": 0.13311,
+      "grad_norm": 1.1181520647256893,
+      "learning_rate": 0.003,
+      "loss": 4.005,
+      "step": 13311
+    },
+    {
+      "epoch": 0.13312,
+      "grad_norm": 1.1323773951622496,
+      "learning_rate": 0.003,
+      "loss": 4.03,
+      "step": 13312
+    },
+    {
+      "epoch": 0.13313,
+      "grad_norm": 1.2994672754854095,
+      "learning_rate": 0.003,
+      "loss": 4.0088,
+      "step": 13313
+    },
+    {
+      "epoch": 0.13314,
+      "grad_norm": 1.1123813052260192,
+      "learning_rate": 0.003,
+      "loss": 4.0171,
+      "step": 13314
+    },
+    {
+      "epoch": 0.13315,
+      "grad_norm": 1.3356269811242296,
+      "learning_rate": 0.003,
+      "loss": 4.039,
+      "step": 13315
+    },
+    {
+      "epoch": 0.13316,
+      "grad_norm": 1.1307571245786385,
+      "learning_rate": 0.003,
+      "loss": 4.0078,
+      "step": 13316
+    },
+    {
+      "epoch": 0.13317,
+      "grad_norm": 1.1289097193017215,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 13317
+    },
+    {
+      "epoch": 0.13318,
+      "grad_norm": 1.2008083845027793,
+      "learning_rate": 0.003,
+      "loss": 4.0224,
+      "step": 13318
+    },
+    {
+      "epoch": 0.13319,
+      "grad_norm": 1.1306337491064893,
+      "learning_rate": 0.003,
+      "loss": 4.0421,
+      "step": 13319
+    },
+    {
+      "epoch": 0.1332,
+      "grad_norm": 1.3679305192275055,
+      "learning_rate": 0.003,
+      "loss": 4.0218,
+      "step": 13320
+    },
+    {
+      "epoch": 0.13321,
+      "grad_norm": 1.1711826264907252,
+      "learning_rate": 0.003,
+      "loss": 4.0317,
+      "step": 13321
+    },
+    {
+      "epoch": 0.13322,
+      "grad_norm": 1.0966041565646945,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 13322
+    },
+    {
+      "epoch": 0.13323,
+      "grad_norm": 1.1551860383609442,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 13323
+    },
+    {
+      "epoch": 0.13324,
+      "grad_norm": 1.0761459065829688,
+      "learning_rate": 0.003,
+      "loss": 4.0375,
+      "step": 13324
+    },
+    {
+      "epoch": 0.13325,
+      "grad_norm": 1.1855031922305135,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 13325
+    },
+    {
+      "epoch": 0.13326,
+      "grad_norm": 1.358087467790536,
+      "learning_rate": 0.003,
+      "loss": 4.0557,
+      "step": 13326
+    },
+    {
+      "epoch": 0.13327,
+      "grad_norm": 1.5789407226797987,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 13327
+    },
+    {
+      "epoch": 0.13328,
+      "grad_norm": 1.0846902206425766,
+      "learning_rate": 0.003,
+      "loss": 4.0348,
+      "step": 13328
+    },
+    {
+      "epoch": 0.13329,
+      "grad_norm": 1.166482834679772,
+      "learning_rate": 0.003,
+      "loss": 4.0465,
+      "step": 13329
+    },
+    {
+      "epoch": 0.1333,
+      "grad_norm": 1.1676348442033728,
+      "learning_rate": 0.003,
+      "loss": 4.0442,
+      "step": 13330
+    },
+    {
+      "epoch": 0.13331,
+      "grad_norm": 1.2387302200322514,
+      "learning_rate": 0.003,
+      "loss": 4.0303,
+      "step": 13331
+    },
+    {
+      "epoch": 0.13332,
+      "grad_norm": 1.090022503966062,
+      "learning_rate": 0.003,
+      "loss": 4.016,
+      "step": 13332
+    },
+    {
+      "epoch": 0.13333,
+      "grad_norm": 1.1993557738278178,
+      "learning_rate": 0.003,
+      "loss": 4.0246,
+      "step": 13333
+    },
+    {
+      "epoch": 0.13334,
+      "grad_norm": 1.149779889121198,
+      "learning_rate": 0.003,
+      "loss": 4.0329,
+      "step": 13334
+    },
+    {
+      "epoch": 0.13335,
+      "grad_norm": 1.2404908878452907,
+      "learning_rate": 0.003,
+      "loss": 3.9969,
+      "step": 13335
+    },
+    {
+      "epoch": 0.13336,
+      "grad_norm": 1.1097718936919116,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 13336
+    },
+    {
+      "epoch": 0.13337,
+      "grad_norm": 1.3923059951933716,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 13337
+    },
+    {
+      "epoch": 0.13338,
+      "grad_norm": 1.2188422973936779,
+      "learning_rate": 0.003,
+      "loss": 4.0581,
+      "step": 13338
+    },
+    {
+      "epoch": 0.13339,
+      "grad_norm": 1.4947926128477245,
+      "learning_rate": 0.003,
+      "loss": 4.0339,
+      "step": 13339
+    },
+    {
+      "epoch": 0.1334,
+      "grad_norm": 1.135167360467982,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 13340
+    },
+    {
+      "epoch": 0.13341,
+      "grad_norm": 1.2957735877895509,
+      "learning_rate": 0.003,
+      "loss": 4.0615,
+      "step": 13341
+    },
+    {
+      "epoch": 0.13342,
+      "grad_norm": 1.0937534826464805,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 13342
+    },
+    {
+      "epoch": 0.13343,
+      "grad_norm": 1.146491272433081,
+      "learning_rate": 0.003,
+      "loss": 4.0115,
+      "step": 13343
+    },
+    {
+      "epoch": 0.13344,
+      "grad_norm": 1.4468045189337417,
+      "learning_rate": 0.003,
+      "loss": 4.062,
+      "step": 13344
+    },
+    {
+      "epoch": 0.13345,
+      "grad_norm": 1.2592429566456962,
+      "learning_rate": 0.003,
+      "loss": 4.0622,
+      "step": 13345
+    },
+    {
+      "epoch": 0.13346,
+      "grad_norm": 1.2143728986915379,
+      "learning_rate": 0.003,
+      "loss": 4.0245,
+      "step": 13346
+    },
+    {
+      "epoch": 0.13347,
+      "grad_norm": 1.1905439376198783,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 13347
+    },
+    {
+      "epoch": 0.13348,
+      "grad_norm": 1.1443391482742789,
+      "learning_rate": 0.003,
+      "loss": 4.0373,
+      "step": 13348
+    },
+    {
+      "epoch": 0.13349,
+      "grad_norm": 1.246186743392689,
+      "learning_rate": 0.003,
+      "loss": 4.0263,
+      "step": 13349
+    },
+    {
+      "epoch": 0.1335,
+      "grad_norm": 1.0806035133689187,
+      "learning_rate": 0.003,
+      "loss": 4.0079,
+      "step": 13350
+    },
+    {
+      "epoch": 0.13351,
+      "grad_norm": 1.123461988084758,
+      "learning_rate": 0.003,
+      "loss": 4.0308,
+      "step": 13351
+    },
+    {
+      "epoch": 0.13352,
+      "grad_norm": 1.1992586433931847,
+      "learning_rate": 0.003,
+      "loss": 4.0035,
+      "step": 13352
+    },
+    {
+      "epoch": 0.13353,
+      "grad_norm": 1.2287700219343816,
+      "learning_rate": 0.003,
+      "loss": 4.0322,
+      "step": 13353
+    },
+    {
+      "epoch": 0.13354,
+      "grad_norm": 1.145713010144192,
+      "learning_rate": 0.003,
+      "loss": 4.0269,
+      "step": 13354
+    },
+    {
+      "epoch": 0.13355,
+      "grad_norm": 1.3129522428667089,
+      "learning_rate": 0.003,
+      "loss": 4.0223,
+      "step": 13355
+    },
+    {
+      "epoch": 0.13356,
+      "grad_norm": 1.1217225093906096,
+      "learning_rate": 0.003,
+      "loss": 4.0322,
+      "step": 13356
+    },
+    {
+      "epoch": 0.13357,
+      "grad_norm": 1.3610017363079503,
+      "learning_rate": 0.003,
+      "loss": 4.0236,
+      "step": 13357
+    },
+    {
+      "epoch": 0.13358,
+      "grad_norm": 1.0659946872902204,
+      "learning_rate": 0.003,
+      "loss": 4.0064,
+      "step": 13358
+    },
+    {
+      "epoch": 0.13359,
+      "grad_norm": 1.4579500688790328,
+      "learning_rate": 0.003,
+      "loss": 4.0454,
+      "step": 13359
+    },
+    {
+      "epoch": 0.1336,
+      "grad_norm": 1.003934981079245,
+      "learning_rate": 0.003,
+      "loss": 4.0264,
+      "step": 13360
+    },
+    {
+      "epoch": 0.13361,
+      "grad_norm": 1.1638155922468505,
+      "learning_rate": 0.003,
+      "loss": 4.0082,
+      "step": 13361
+    },
+    {
+      "epoch": 0.13362,
+      "grad_norm": 1.0889598114731738,
+      "learning_rate": 0.003,
+      "loss": 4.0107,
+      "step": 13362
+    },
+    {
+      "epoch": 0.13363,
+      "grad_norm": 1.3693945445048112,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 13363
+    },
+    {
+      "epoch": 0.13364,
+      "grad_norm": 1.0988775373011543,
+      "learning_rate": 0.003,
+      "loss": 4.0232,
+      "step": 13364
+    },
+    {
+      "epoch": 0.13365,
+      "grad_norm": 1.4802228295419162,
+      "learning_rate": 0.003,
+      "loss": 4.0238,
+      "step": 13365
+    },
+    {
+      "epoch": 0.13366,
+      "grad_norm": 1.1350414757360303,
+      "learning_rate": 0.003,
+      "loss": 4.0322,
+      "step": 13366
+    },
+    {
+      "epoch": 0.13367,
+      "grad_norm": 1.0970300311081125,
+      "learning_rate": 0.003,
+      "loss": 4.0027,
+      "step": 13367
+    },
+    {
+      "epoch": 0.13368,
+      "grad_norm": 1.2364580439070338,
+      "learning_rate": 0.003,
+      "loss": 4.0431,
+      "step": 13368
+    },
+    {
+      "epoch": 0.13369,
+      "grad_norm": 1.1393845065668011,
+      "learning_rate": 0.003,
+      "loss": 4.0276,
+      "step": 13369
+    },
+    {
+      "epoch": 0.1337,
+      "grad_norm": 1.1533636621662187,
+      "learning_rate": 0.003,
+      "loss": 4.0229,
+      "step": 13370
+    },
+    {
+      "epoch": 0.13371,
+      "grad_norm": 1.0816471655884832,
+      "learning_rate": 0.003,
+      "loss": 4.0233,
+      "step": 13371
+    },
+    {
+      "epoch": 0.13372,
+      "grad_norm": 1.193792088943506,
+      "learning_rate": 0.003,
+      "loss": 4.0225,
+      "step": 13372
+    },
+    {
+      "epoch": 0.13373,
+      "grad_norm": 1.1094174710891824,
+      "learning_rate": 0.003,
+      "loss": 4.0118,
+      "step": 13373
+    },
+    {
+      "epoch": 0.13374,
+      "grad_norm": 1.2879292981648436,
+      "learning_rate": 0.003,
+      "loss": 4.0504,
+      "step": 13374
+    },
+    {
+      "epoch": 0.13375,
+      "grad_norm": 1.1453786363334113,
+      "learning_rate": 0.003,
+      "loss": 3.9989,
+      "step": 13375
+    },
+    {
+      "epoch": 0.13376,
+      "grad_norm": 1.3055465273095053,
+      "learning_rate": 0.003,
+      "loss": 4.0403,
+      "step": 13376
+    },
+    {
+      "epoch": 0.13377,
+      "grad_norm": 1.1532149751861094,
+      "learning_rate": 0.003,
+      "loss": 4.0036,
+      "step": 13377
+    },
+    {
+      "epoch": 0.13378,
+      "grad_norm": 1.2267252708924727,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 13378
+    },
+    {
+      "epoch": 0.13379,
+      "grad_norm": 1.2465260797365454,
+      "learning_rate": 0.003,
+      "loss": 4.0132,
+      "step": 13379
+    },
+    {
+      "epoch": 0.1338,
+      "grad_norm": 1.1671338719435786,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 13380
+    },
+    {
+      "epoch": 0.13381,
+      "grad_norm": 1.2770301462955111,
+      "learning_rate": 0.003,
+      "loss": 3.9949,
+      "step": 13381
+    },
+    {
+      "epoch": 0.13382,
+      "grad_norm": 1.3607247962128857,
+      "learning_rate": 0.003,
+      "loss": 4.0267,
+      "step": 13382
+    },
+    {
+      "epoch": 0.13383,
+      "grad_norm": 1.036366389732506,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 13383
+    },
+    {
+      "epoch": 0.13384,
+      "grad_norm": 1.2065138074211352,
+      "learning_rate": 0.003,
+      "loss": 3.9827,
+      "step": 13384
+    },
+    {
+      "epoch": 0.13385,
+      "grad_norm": 1.1857547899204484,
+      "learning_rate": 0.003,
+      "loss": 4.0269,
+      "step": 13385
+    },
+    {
+      "epoch": 0.13386,
+      "grad_norm": 0.946051102788784,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 13386
+    },
+    {
+      "epoch": 0.13387,
+      "grad_norm": 1.1633476212241936,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 13387
+    },
+    {
+      "epoch": 0.13388,
+      "grad_norm": 1.3245812612445407,
+      "learning_rate": 0.003,
+      "loss": 4.0323,
+      "step": 13388
+    },
+    {
+      "epoch": 0.13389,
+      "grad_norm": 1.134263149423251,
+      "learning_rate": 0.003,
+      "loss": 4.0237,
+      "step": 13389
+    },
+    {
+      "epoch": 0.1339,
+      "grad_norm": 1.296265090606207,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 13390
+    },
+    {
+      "epoch": 0.13391,
+      "grad_norm": 1.1360413290115436,
+      "learning_rate": 0.003,
+      "loss": 3.9997,
+      "step": 13391
+    },
+    {
+      "epoch": 0.13392,
+      "grad_norm": 1.291707542924984,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 13392
+    },
+    {
+      "epoch": 0.13393,
+      "grad_norm": 1.076626333520554,
+      "learning_rate": 0.003,
+      "loss": 4.0295,
+      "step": 13393
+    },
+    {
+      "epoch": 0.13394,
+      "grad_norm": 1.2115304300049723,
+      "learning_rate": 0.003,
+      "loss": 4.0255,
+      "step": 13394
+    },
+    {
+      "epoch": 0.13395,
+      "grad_norm": 1.126113520409547,
+      "learning_rate": 0.003,
+      "loss": 4.0046,
+      "step": 13395
+    },
+    {
+      "epoch": 0.13396,
+      "grad_norm": 1.2100032511507974,
+      "learning_rate": 0.003,
+      "loss": 4.0036,
+      "step": 13396
+    },
+    {
+      "epoch": 0.13397,
+      "grad_norm": 1.1413697699504317,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 13397
+    },
+    {
+      "epoch": 0.13398,
+      "grad_norm": 1.1369134666653362,
+      "learning_rate": 0.003,
+      "loss": 4.0068,
+      "step": 13398
+    },
+    {
+      "epoch": 0.13399,
+      "grad_norm": 1.141769147643521,
+      "learning_rate": 0.003,
+      "loss": 3.9831,
+      "step": 13399
+    },
+    {
+      "epoch": 0.134,
+      "grad_norm": 1.351880588084034,
+      "learning_rate": 0.003,
+      "loss": 4.0475,
+      "step": 13400
+    },
+    {
+      "epoch": 0.13401,
+      "grad_norm": 1.0694809056532624,
+      "learning_rate": 0.003,
+      "loss": 4.0098,
+      "step": 13401
+    },
+    {
+      "epoch": 0.13402,
+      "grad_norm": 1.2462804234100007,
+      "learning_rate": 0.003,
+      "loss": 3.9751,
+      "step": 13402
+    },
+    {
+      "epoch": 0.13403,
+      "grad_norm": 0.9550894770645565,
+      "learning_rate": 0.003,
+      "loss": 4.0535,
+      "step": 13403
+    },
+    {
+      "epoch": 0.13404,
+      "grad_norm": 1.4004013738063046,
+      "learning_rate": 0.003,
+      "loss": 4.0564,
+      "step": 13404
+    },
+    {
+      "epoch": 0.13405,
+      "grad_norm": 1.0182565499258522,
+      "learning_rate": 0.003,
+      "loss": 4.008,
+      "step": 13405
+    },
+    {
+      "epoch": 0.13406,
+      "grad_norm": 1.3389242782316388,
+      "learning_rate": 0.003,
+      "loss": 4.0364,
+      "step": 13406
+    },
+    {
+      "epoch": 0.13407,
+      "grad_norm": 1.265463794551261,
+      "learning_rate": 0.003,
+      "loss": 4.0357,
+      "step": 13407
+    },
+    {
+      "epoch": 0.13408,
+      "grad_norm": 1.334487167757332,
+      "learning_rate": 0.003,
+      "loss": 4.0229,
+      "step": 13408
+    },
+    {
+      "epoch": 0.13409,
+      "grad_norm": 1.3576258794005125,
+      "learning_rate": 0.003,
+      "loss": 4.0266,
+      "step": 13409
+    },
+    {
+      "epoch": 0.1341,
+      "grad_norm": 1.2184686143115655,
+      "learning_rate": 0.003,
+      "loss": 4.003,
+      "step": 13410
+    },
+    {
+      "epoch": 0.13411,
+      "grad_norm": 0.9916996275319055,
+      "learning_rate": 0.003,
+      "loss": 4.0295,
+      "step": 13411
+    },
+    {
+      "epoch": 0.13412,
+      "grad_norm": 1.2216044612518433,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 13412
+    },
+    {
+      "epoch": 0.13413,
+      "grad_norm": 1.160783345128535,
+      "learning_rate": 0.003,
+      "loss": 4.0303,
+      "step": 13413
+    },
+    {
+      "epoch": 0.13414,
+      "grad_norm": 1.1292722771811052,
+      "learning_rate": 0.003,
+      "loss": 4.042,
+      "step": 13414
+    },
+    {
+      "epoch": 0.13415,
+      "grad_norm": 1.126718355717055,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 13415
+    },
+    {
+      "epoch": 0.13416,
+      "grad_norm": 1.2476269931115544,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 13416
+    },
+    {
+      "epoch": 0.13417,
+      "grad_norm": 1.2048295152923227,
+      "learning_rate": 0.003,
+      "loss": 4.0012,
+      "step": 13417
+    },
+    {
+      "epoch": 0.13418,
+      "grad_norm": 1.3428948792281432,
+      "learning_rate": 0.003,
+      "loss": 4.0218,
+      "step": 13418
+    },
+    {
+      "epoch": 0.13419,
+      "grad_norm": 1.1181666148157166,
+      "learning_rate": 0.003,
+      "loss": 4.037,
+      "step": 13419
+    },
+    {
+      "epoch": 0.1342,
+      "grad_norm": 1.3215144632242453,
+      "learning_rate": 0.003,
+      "loss": 4.0141,
+      "step": 13420
+    },
+    {
+      "epoch": 0.13421,
+      "grad_norm": 1.3097747341982395,
+      "learning_rate": 0.003,
+      "loss": 4.0373,
+      "step": 13421
+    },
+    {
+      "epoch": 0.13422,
+      "grad_norm": 1.198201371504427,
+      "learning_rate": 0.003,
+      "loss": 4.0448,
+      "step": 13422
+    },
+    {
+      "epoch": 0.13423,
+      "grad_norm": 1.2244440779414874,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 13423
+    },
+    {
+      "epoch": 0.13424,
+      "grad_norm": 0.9789279406920552,
+      "learning_rate": 0.003,
+      "loss": 4.0135,
+      "step": 13424
+    },
+    {
+      "epoch": 0.13425,
+      "grad_norm": 1.419037471910816,
+      "learning_rate": 0.003,
+      "loss": 4.0355,
+      "step": 13425
+    },
+    {
+      "epoch": 0.13426,
+      "grad_norm": 1.0024421860356587,
+      "learning_rate": 0.003,
+      "loss": 3.9926,
+      "step": 13426
+    },
+    {
+      "epoch": 0.13427,
+      "grad_norm": 1.236195542106977,
+      "learning_rate": 0.003,
+      "loss": 4.0316,
+      "step": 13427
+    },
+    {
+      "epoch": 0.13428,
+      "grad_norm": 1.2230106966663075,
+      "learning_rate": 0.003,
+      "loss": 4.0417,
+      "step": 13428
+    },
+    {
+      "epoch": 0.13429,
+      "grad_norm": 1.2019836590046975,
+      "learning_rate": 0.003,
+      "loss": 4.0346,
+      "step": 13429
+    },
+    {
+      "epoch": 0.1343,
+      "grad_norm": 1.4494027288332898,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 13430
+    },
+    {
+      "epoch": 0.13431,
+      "grad_norm": 1.192145526605227,
+      "learning_rate": 0.003,
+      "loss": 4.032,
+      "step": 13431
+    },
+    {
+      "epoch": 0.13432,
+      "grad_norm": 1.1248484191707095,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 13432
+    },
+    {
+      "epoch": 0.13433,
+      "grad_norm": 1.260785974024862,
+      "learning_rate": 0.003,
+      "loss": 4.0191,
+      "step": 13433
+    },
+    {
+      "epoch": 0.13434,
+      "grad_norm": 1.0400060793363335,
+      "learning_rate": 0.003,
+      "loss": 4.0239,
+      "step": 13434
+    },
+    {
+      "epoch": 0.13435,
+      "grad_norm": 1.0783745236421138,
+      "learning_rate": 0.003,
+      "loss": 4.0479,
+      "step": 13435
+    },
+    {
+      "epoch": 0.13436,
+      "grad_norm": 1.165074049861491,
+      "learning_rate": 0.003,
+      "loss": 4.0395,
+      "step": 13436
+    },
+    {
+      "epoch": 0.13437,
+      "grad_norm": 1.162590117493891,
+      "learning_rate": 0.003,
+      "loss": 4.0118,
+      "step": 13437
+    },
+    {
+      "epoch": 0.13438,
+      "grad_norm": 1.0904670998164137,
+      "learning_rate": 0.003,
+      "loss": 4.0305,
+      "step": 13438
+    },
+    {
+      "epoch": 0.13439,
+      "grad_norm": 1.3044658478058948,
+      "learning_rate": 0.003,
+      "loss": 4.0374,
+      "step": 13439
+    },
+    {
+      "epoch": 0.1344,
+      "grad_norm": 1.2585749984772712,
+      "learning_rate": 0.003,
+      "loss": 4.0416,
+      "step": 13440
+    },
+    {
+      "epoch": 0.13441,
+      "grad_norm": 1.1350577146238163,
+      "learning_rate": 0.003,
+      "loss": 4.004,
+      "step": 13441
+    },
+    {
+      "epoch": 0.13442,
+      "grad_norm": 1.2934824566661551,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 13442
+    },
+    {
+      "epoch": 0.13443,
+      "grad_norm": 1.1704384561374903,
+      "learning_rate": 0.003,
+      "loss": 4.0305,
+      "step": 13443
+    },
+    {
+      "epoch": 0.13444,
+      "grad_norm": 1.2287067692197493,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 13444
+    },
+    {
+      "epoch": 0.13445,
+      "grad_norm": 1.052312126159789,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 13445
+    },
+    {
+      "epoch": 0.13446,
+      "grad_norm": 1.3351631871063565,
+      "learning_rate": 0.003,
+      "loss": 4.0385,
+      "step": 13446
+    },
+    {
+      "epoch": 0.13447,
+      "grad_norm": 1.1456741213952397,
+      "learning_rate": 0.003,
+      "loss": 4.0591,
+      "step": 13447
+    },
+    {
+      "epoch": 0.13448,
+      "grad_norm": 1.3178994702812536,
+      "learning_rate": 0.003,
+      "loss": 4.0336,
+      "step": 13448
+    },
+    {
+      "epoch": 0.13449,
+      "grad_norm": 1.1376435500601898,
+      "learning_rate": 0.003,
+      "loss": 4.0314,
+      "step": 13449
+    },
+    {
+      "epoch": 0.1345,
+      "grad_norm": 1.5577330564326097,
+      "learning_rate": 0.003,
+      "loss": 3.9742,
+      "step": 13450
+    },
+    {
+      "epoch": 0.13451,
+      "grad_norm": 1.240799192639951,
+      "learning_rate": 0.003,
+      "loss": 4.0232,
+      "step": 13451
+    },
+    {
+      "epoch": 0.13452,
+      "grad_norm": 1.3303368261452497,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 13452
+    },
+    {
+      "epoch": 0.13453,
+      "grad_norm": 0.9864499235298809,
+      "learning_rate": 0.003,
+      "loss": 4.0224,
+      "step": 13453
+    },
+    {
+      "epoch": 0.13454,
+      "grad_norm": 1.3913306919812294,
+      "learning_rate": 0.003,
+      "loss": 4.0294,
+      "step": 13454
+    },
+    {
+      "epoch": 0.13455,
+      "grad_norm": 0.9262772369745972,
+      "learning_rate": 0.003,
+      "loss": 4.0413,
+      "step": 13455
+    },
+    {
+      "epoch": 0.13456,
+      "grad_norm": 1.1715238243590518,
+      "learning_rate": 0.003,
+      "loss": 4.0408,
+      "step": 13456
+    },
+    {
+      "epoch": 0.13457,
+      "grad_norm": 1.007775791407712,
+      "learning_rate": 0.003,
+      "loss": 3.9987,
+      "step": 13457
+    },
+    {
+      "epoch": 0.13458,
+      "grad_norm": 1.4373867057512402,
+      "learning_rate": 0.003,
+      "loss": 4.0548,
+      "step": 13458
+    },
+    {
+      "epoch": 0.13459,
+      "grad_norm": 1.2087819192138398,
+      "learning_rate": 0.003,
+      "loss": 4.0513,
+      "step": 13459
+    },
+    {
+      "epoch": 0.1346,
+      "grad_norm": 1.2761546089109206,
+      "learning_rate": 0.003,
+      "loss": 4.0359,
+      "step": 13460
+    },
+    {
+      "epoch": 0.13461,
+      "grad_norm": 1.3195149048198116,
+      "learning_rate": 0.003,
+      "loss": 4.0364,
+      "step": 13461
+    },
+    {
+      "epoch": 0.13462,
+      "grad_norm": 1.4298685154422084,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 13462
+    },
+    {
+      "epoch": 0.13463,
+      "grad_norm": 1.0232005094391101,
+      "learning_rate": 0.003,
+      "loss": 4.0344,
+      "step": 13463
+    },
+    {
+      "epoch": 0.13464,
+      "grad_norm": 1.3554979757755057,
+      "learning_rate": 0.003,
+      "loss": 4.0475,
+      "step": 13464
+    },
+    {
+      "epoch": 0.13465,
+      "grad_norm": 1.1332202658140447,
+      "learning_rate": 0.003,
+      "loss": 4.0048,
+      "step": 13465
+    },
+    {
+      "epoch": 0.13466,
+      "grad_norm": 1.3885916310764128,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 13466
+    },
+    {
+      "epoch": 0.13467,
+      "grad_norm": 1.1009272857461163,
+      "learning_rate": 0.003,
+      "loss": 4.048,
+      "step": 13467
+    },
+    {
+      "epoch": 0.13468,
+      "grad_norm": 1.291927423311447,
+      "learning_rate": 0.003,
+      "loss": 4.0326,
+      "step": 13468
+    },
+    {
+      "epoch": 0.13469,
+      "grad_norm": 1.2553231921644208,
+      "learning_rate": 0.003,
+      "loss": 4.0316,
+      "step": 13469
+    },
+    {
+      "epoch": 0.1347,
+      "grad_norm": 1.5595510592158073,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 13470
+    },
+    {
+      "epoch": 0.13471,
+      "grad_norm": 1.4597265580853351,
+      "learning_rate": 0.003,
+      "loss": 4.0305,
+      "step": 13471
+    },
+    {
+      "epoch": 0.13472,
+      "grad_norm": 1.06169793876242,
+      "learning_rate": 0.003,
+      "loss": 4.0213,
+      "step": 13472
+    },
+    {
+      "epoch": 0.13473,
+      "grad_norm": 1.1661843523771311,
+      "learning_rate": 0.003,
+      "loss": 3.9915,
+      "step": 13473
+    },
+    {
+      "epoch": 0.13474,
+      "grad_norm": 1.3141625224505493,
+      "learning_rate": 0.003,
+      "loss": 4.0313,
+      "step": 13474
+    },
+    {
+      "epoch": 0.13475,
+      "grad_norm": 1.0191480284471333,
+      "learning_rate": 0.003,
+      "loss": 4.0241,
+      "step": 13475
+    },
+    {
+      "epoch": 0.13476,
+      "grad_norm": 1.3593921109749711,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 13476
+    },
+    {
+      "epoch": 0.13477,
+      "grad_norm": 1.029868013350082,
+      "learning_rate": 0.003,
+      "loss": 4.0198,
+      "step": 13477
+    },
+    {
+      "epoch": 0.13478,
+      "grad_norm": 1.220297263485053,
+      "learning_rate": 0.003,
+      "loss": 4.0346,
+      "step": 13478
+    },
+    {
+      "epoch": 0.13479,
+      "grad_norm": 1.2216981759838992,
+      "learning_rate": 0.003,
+      "loss": 4.0134,
+      "step": 13479
+    },
+    {
+      "epoch": 0.1348,
+      "grad_norm": 1.1588932875116496,
+      "learning_rate": 0.003,
+      "loss": 4.0367,
+      "step": 13480
+    },
+    {
+      "epoch": 0.13481,
+      "grad_norm": 1.04814139318589,
+      "learning_rate": 0.003,
+      "loss": 4.0372,
+      "step": 13481
+    },
+    {
+      "epoch": 0.13482,
+      "grad_norm": 1.27331759602881,
+      "learning_rate": 0.003,
+      "loss": 4.0098,
+      "step": 13482
+    },
+    {
+      "epoch": 0.13483,
+      "grad_norm": 1.2024848177421057,
+      "learning_rate": 0.003,
+      "loss": 3.9871,
+      "step": 13483
+    },
+    {
+      "epoch": 0.13484,
+      "grad_norm": 1.1441160988818182,
+      "learning_rate": 0.003,
+      "loss": 4.0254,
+      "step": 13484
+    },
+    {
+      "epoch": 0.13485,
+      "grad_norm": 1.2492437430898804,
+      "learning_rate": 0.003,
+      "loss": 4.0434,
+      "step": 13485
+    },
+    {
+      "epoch": 0.13486,
+      "grad_norm": 1.2067132902882773,
+      "learning_rate": 0.003,
+      "loss": 4.0216,
+      "step": 13486
+    },
+    {
+      "epoch": 0.13487,
+      "grad_norm": 1.1942540105040946,
+      "learning_rate": 0.003,
+      "loss": 4.0093,
+      "step": 13487
+    },
+    {
+      "epoch": 0.13488,
+      "grad_norm": 1.3019162946617349,
+      "learning_rate": 0.003,
+      "loss": 4.0282,
+      "step": 13488
+    },
+    {
+      "epoch": 0.13489,
+      "grad_norm": 1.076361817110379,
+      "learning_rate": 0.003,
+      "loss": 4.0028,
+      "step": 13489
+    },
+    {
+      "epoch": 0.1349,
+      "grad_norm": 1.0367542395991136,
+      "learning_rate": 0.003,
+      "loss": 4.019,
+      "step": 13490
+    },
+    {
+      "epoch": 0.13491,
+      "grad_norm": 1.1496630472442426,
+      "learning_rate": 0.003,
+      "loss": 4.0283,
+      "step": 13491
+    },
+    {
+      "epoch": 0.13492,
+      "grad_norm": 1.4714537384829591,
+      "learning_rate": 0.003,
+      "loss": 4.0339,
+      "step": 13492
+    },
+    {
+      "epoch": 0.13493,
+      "grad_norm": 1.2095126352691563,
+      "learning_rate": 0.003,
+      "loss": 4.0019,
+      "step": 13493
+    },
+    {
+      "epoch": 0.13494,
+      "grad_norm": 1.2689510126659043,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 13494
+    },
+    {
+      "epoch": 0.13495,
+      "grad_norm": 1.1657755327513644,
+      "learning_rate": 0.003,
+      "loss": 4.0548,
+      "step": 13495
+    },
+    {
+      "epoch": 0.13496,
+      "grad_norm": 1.1697449439005412,
+      "learning_rate": 0.003,
+      "loss": 4.0431,
+      "step": 13496
+    },
+    {
+      "epoch": 0.13497,
+      "grad_norm": 1.2346266760881337,
+      "learning_rate": 0.003,
+      "loss": 4.0135,
+      "step": 13497
+    },
+    {
+      "epoch": 0.13498,
+      "grad_norm": 1.6136535135218877,
+      "learning_rate": 0.003,
+      "loss": 4.037,
+      "step": 13498
+    },
+    {
+      "epoch": 0.13499,
+      "grad_norm": 1.1067394699171362,
+      "learning_rate": 0.003,
+      "loss": 3.9888,
+      "step": 13499
+    },
+    {
+      "epoch": 0.135,
+      "grad_norm": 1.266309341268125,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 13500
+    },
+    {
+      "epoch": 0.13501,
+      "grad_norm": 1.2728591837186685,
+      "learning_rate": 0.003,
+      "loss": 4.043,
+      "step": 13501
+    },
+    {
+      "epoch": 0.13502,
+      "grad_norm": 1.2093142337945428,
+      "learning_rate": 0.003,
+      "loss": 4.0326,
+      "step": 13502
+    },
+    {
+      "epoch": 0.13503,
+      "grad_norm": 1.0891243791379945,
+      "learning_rate": 0.003,
+      "loss": 4.0134,
+      "step": 13503
+    },
+    {
+      "epoch": 0.13504,
+      "grad_norm": 1.121546991957497,
+      "learning_rate": 0.003,
+      "loss": 4.0135,
+      "step": 13504
+    },
+    {
+      "epoch": 0.13505,
+      "grad_norm": 1.290359557449459,
+      "learning_rate": 0.003,
+      "loss": 4.0408,
+      "step": 13505
+    },
+    {
+      "epoch": 0.13506,
+      "grad_norm": 1.1792088578482778,
+      "learning_rate": 0.003,
+      "loss": 4.0307,
+      "step": 13506
+    },
+    {
+      "epoch": 0.13507,
+      "grad_norm": 1.2456337380797515,
+      "learning_rate": 0.003,
+      "loss": 4.0403,
+      "step": 13507
+    },
+    {
+      "epoch": 0.13508,
+      "grad_norm": 1.4020559997019646,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 13508
+    },
+    {
+      "epoch": 0.13509,
+      "grad_norm": 1.1533109080710795,
+      "learning_rate": 0.003,
+      "loss": 4.0135,
+      "step": 13509
+    },
+    {
+      "epoch": 0.1351,
+      "grad_norm": 1.2745718197348026,
+      "learning_rate": 0.003,
+      "loss": 4.0298,
+      "step": 13510
+    },
+    {
+      "epoch": 0.13511,
+      "grad_norm": 1.3014225992516306,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 13511
+    },
+    {
+      "epoch": 0.13512,
+      "grad_norm": 1.0258185120614944,
+      "learning_rate": 0.003,
+      "loss": 4.0151,
+      "step": 13512
+    },
+    {
+      "epoch": 0.13513,
+      "grad_norm": 1.2987664783952992,
+      "learning_rate": 0.003,
+      "loss": 4.0146,
+      "step": 13513
+    },
+    {
+      "epoch": 0.13514,
+      "grad_norm": 1.0301254210890254,
+      "learning_rate": 0.003,
+      "loss": 4.0373,
+      "step": 13514
+    },
+    {
+      "epoch": 0.13515,
+      "grad_norm": 1.4063675226394983,
+      "learning_rate": 0.003,
+      "loss": 4.0363,
+      "step": 13515
+    },
+    {
+      "epoch": 0.13516,
+      "grad_norm": 1.06062614177715,
+      "learning_rate": 0.003,
+      "loss": 3.9832,
+      "step": 13516
+    },
+    {
+      "epoch": 0.13517,
+      "grad_norm": 1.2326757174346914,
+      "learning_rate": 0.003,
+      "loss": 4.0238,
+      "step": 13517
+    },
+    {
+      "epoch": 0.13518,
+      "grad_norm": 0.9967612734492626,
+      "learning_rate": 0.003,
+      "loss": 4.0218,
+      "step": 13518
+    },
+    {
+      "epoch": 0.13519,
+      "grad_norm": 1.3638494530839866,
+      "learning_rate": 0.003,
+      "loss": 4.0373,
+      "step": 13519
+    },
+    {
+      "epoch": 0.1352,
+      "grad_norm": 0.9993900606979094,
+      "learning_rate": 0.003,
+      "loss": 3.997,
+      "step": 13520
+    },
+    {
+      "epoch": 0.13521,
+      "grad_norm": 1.2647391132461463,
+      "learning_rate": 0.003,
+      "loss": 4.0327,
+      "step": 13521
+    },
+    {
+      "epoch": 0.13522,
+      "grad_norm": 1.076840147795485,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 13522
+    },
+    {
+      "epoch": 0.13523,
+      "grad_norm": 1.536667316815823,
+      "learning_rate": 0.003,
+      "loss": 4.0257,
+      "step": 13523
+    },
+    {
+      "epoch": 0.13524,
+      "grad_norm": 1.1907360604879682,
+      "learning_rate": 0.003,
+      "loss": 4.0355,
+      "step": 13524
+    },
+    {
+      "epoch": 0.13525,
+      "grad_norm": 1.1183691818173742,
+      "learning_rate": 0.003,
+      "loss": 4.0273,
+      "step": 13525
+    },
+    {
+      "epoch": 0.13526,
+      "grad_norm": 1.1958633957563631,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 13526
+    },
+    {
+      "epoch": 0.13527,
+      "grad_norm": 1.1733058300297852,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 13527
+    },
+    {
+      "epoch": 0.13528,
+      "grad_norm": 1.2818225476163603,
+      "learning_rate": 0.003,
+      "loss": 4.0164,
+      "step": 13528
+    },
+    {
+      "epoch": 0.13529,
+      "grad_norm": 1.40427771743178,
+      "learning_rate": 0.003,
+      "loss": 4.0531,
+      "step": 13529
+    },
+    {
+      "epoch": 0.1353,
+      "grad_norm": 1.0559809394022808,
+      "learning_rate": 0.003,
+      "loss": 4.0339,
+      "step": 13530
+    },
+    {
+      "epoch": 0.13531,
+      "grad_norm": 1.2141375395885508,
+      "learning_rate": 0.003,
+      "loss": 4.0269,
+      "step": 13531
+    },
+    {
+      "epoch": 0.13532,
+      "grad_norm": 1.1631699152071382,
+      "learning_rate": 0.003,
+      "loss": 4.0402,
+      "step": 13532
+    },
+    {
+      "epoch": 0.13533,
+      "grad_norm": 1.1435788324807838,
+      "learning_rate": 0.003,
+      "loss": 4.0064,
+      "step": 13533
+    },
+    {
+      "epoch": 0.13534,
+      "grad_norm": 1.0902001010477733,
+      "learning_rate": 0.003,
+      "loss": 4.0436,
+      "step": 13534
+    },
+    {
+      "epoch": 0.13535,
+      "grad_norm": 1.3308338913254787,
+      "learning_rate": 0.003,
+      "loss": 4.0239,
+      "step": 13535
+    },
+    {
+      "epoch": 0.13536,
+      "grad_norm": 1.1546192103612023,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 13536
+    },
+    {
+      "epoch": 0.13537,
+      "grad_norm": 1.0527740548229891,
+      "learning_rate": 0.003,
+      "loss": 4.0141,
+      "step": 13537
+    },
+    {
+      "epoch": 0.13538,
+      "grad_norm": 1.3938719152318257,
+      "learning_rate": 0.003,
+      "loss": 4.0443,
+      "step": 13538
+    },
+    {
+      "epoch": 0.13539,
+      "grad_norm": 1.294181469325066,
+      "learning_rate": 0.003,
+      "loss": 4.0258,
+      "step": 13539
+    },
+    {
+      "epoch": 0.1354,
+      "grad_norm": 1.3313605648250066,
+      "learning_rate": 0.003,
+      "loss": 4.0144,
+      "step": 13540
+    },
+    {
+      "epoch": 0.13541,
+      "grad_norm": 1.1090694180026441,
+      "learning_rate": 0.003,
+      "loss": 4.0528,
+      "step": 13541
+    },
+    {
+      "epoch": 0.13542,
+      "grad_norm": 1.2566169447470952,
+      "learning_rate": 0.003,
+      "loss": 4.0403,
+      "step": 13542
+    },
+    {
+      "epoch": 0.13543,
+      "grad_norm": 1.1364640575819667,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 13543
+    },
+    {
+      "epoch": 0.13544,
+      "grad_norm": 1.238508893201822,
+      "learning_rate": 0.003,
+      "loss": 4.0264,
+      "step": 13544
+    },
+    {
+      "epoch": 0.13545,
+      "grad_norm": 1.3426050575470458,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 13545
+    },
+    {
+      "epoch": 0.13546,
+      "grad_norm": 1.1224828710678576,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 13546
+    },
+    {
+      "epoch": 0.13547,
+      "grad_norm": 1.4489054917469295,
+      "learning_rate": 0.003,
+      "loss": 3.9953,
+      "step": 13547
+    },
+    {
+      "epoch": 0.13548,
+      "grad_norm": 0.9563686440940138,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 13548
+    },
+    {
+      "epoch": 0.13549,
+      "grad_norm": 1.2742319396351343,
+      "learning_rate": 0.003,
+      "loss": 4.0084,
+      "step": 13549
+    },
+    {
+      "epoch": 0.1355,
+      "grad_norm": 1.2331965104642149,
+      "learning_rate": 0.003,
+      "loss": 4.0065,
+      "step": 13550
+    },
+    {
+      "epoch": 0.13551,
+      "grad_norm": 1.1910295938637434,
+      "learning_rate": 0.003,
+      "loss": 4.0211,
+      "step": 13551
+    },
+    {
+      "epoch": 0.13552,
+      "grad_norm": 1.2063895895243764,
+      "learning_rate": 0.003,
+      "loss": 4.0263,
+      "step": 13552
+    },
+    {
+      "epoch": 0.13553,
+      "grad_norm": 1.3495585085551443,
+      "learning_rate": 0.003,
+      "loss": 4.058,
+      "step": 13553
+    },
+    {
+      "epoch": 0.13554,
+      "grad_norm": 1.3261178312946502,
+      "learning_rate": 0.003,
+      "loss": 4.0182,
+      "step": 13554
+    },
+    {
+      "epoch": 0.13555,
+      "grad_norm": 1.2671382357999805,
+      "learning_rate": 0.003,
+      "loss": 3.9939,
+      "step": 13555
+    },
+    {
+      "epoch": 0.13556,
+      "grad_norm": 1.013440546564239,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 13556
+    },
+    {
+      "epoch": 0.13557,
+      "grad_norm": 1.358405967910683,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 13557
+    },
+    {
+      "epoch": 0.13558,
+      "grad_norm": 0.9529425587072567,
+      "learning_rate": 0.003,
+      "loss": 3.9979,
+      "step": 13558
+    },
+    {
+      "epoch": 0.13559,
+      "grad_norm": 1.3926067932351842,
+      "learning_rate": 0.003,
+      "loss": 4.0409,
+      "step": 13559
+    },
+    {
+      "epoch": 0.1356,
+      "grad_norm": 1.1927999409657768,
+      "learning_rate": 0.003,
+      "loss": 4.0043,
+      "step": 13560
+    },
+    {
+      "epoch": 0.13561,
+      "grad_norm": 1.2975418476490068,
+      "learning_rate": 0.003,
+      "loss": 4.0255,
+      "step": 13561
+    },
+    {
+      "epoch": 0.13562,
+      "grad_norm": 1.268040128104862,
+      "learning_rate": 0.003,
+      "loss": 4.0016,
+      "step": 13562
+    },
+    {
+      "epoch": 0.13563,
+      "grad_norm": 1.068965547878699,
+      "learning_rate": 0.003,
+      "loss": 3.9682,
+      "step": 13563
+    },
+    {
+      "epoch": 0.13564,
+      "grad_norm": 1.2462949090924584,
+      "learning_rate": 0.003,
+      "loss": 4.0421,
+      "step": 13564
+    },
+    {
+      "epoch": 0.13565,
+      "grad_norm": 1.0262326038063978,
+      "learning_rate": 0.003,
+      "loss": 4.0078,
+      "step": 13565
+    },
+    {
+      "epoch": 0.13566,
+      "grad_norm": 1.241331896159747,
+      "learning_rate": 0.003,
+      "loss": 4.0182,
+      "step": 13566
+    },
+    {
+      "epoch": 0.13567,
+      "grad_norm": 1.106533598252521,
+      "learning_rate": 0.003,
+      "loss": 4.0214,
+      "step": 13567
+    },
+    {
+      "epoch": 0.13568,
+      "grad_norm": 1.2845913986916169,
+      "learning_rate": 0.003,
+      "loss": 4.022,
+      "step": 13568
+    },
+    {
+      "epoch": 0.13569,
+      "grad_norm": 1.0934200649882952,
+      "learning_rate": 0.003,
+      "loss": 4.0241,
+      "step": 13569
+    },
+    {
+      "epoch": 0.1357,
+      "grad_norm": 1.3469028866488575,
+      "learning_rate": 0.003,
+      "loss": 4.032,
+      "step": 13570
+    },
+    {
+      "epoch": 0.13571,
+      "grad_norm": 1.1323505460539465,
+      "learning_rate": 0.003,
+      "loss": 4.0303,
+      "step": 13571
+    },
+    {
+      "epoch": 0.13572,
+      "grad_norm": 1.3450317600710857,
+      "learning_rate": 0.003,
+      "loss": 4.0318,
+      "step": 13572
+    },
+    {
+      "epoch": 0.13573,
+      "grad_norm": 1.217539370131012,
+      "learning_rate": 0.003,
+      "loss": 4.0077,
+      "step": 13573
+    },
+    {
+      "epoch": 0.13574,
+      "grad_norm": 1.3619147322432303,
+      "learning_rate": 0.003,
+      "loss": 4.0321,
+      "step": 13574
+    },
+    {
+      "epoch": 0.13575,
+      "grad_norm": 1.2943028459713686,
+      "learning_rate": 0.003,
+      "loss": 3.9969,
+      "step": 13575
+    },
+    {
+      "epoch": 0.13576,
+      "grad_norm": 1.056776897622436,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 13576
+    },
+    {
+      "epoch": 0.13577,
+      "grad_norm": 1.372028195433907,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 13577
+    },
+    {
+      "epoch": 0.13578,
+      "grad_norm": 1.0412699789951014,
+      "learning_rate": 0.003,
+      "loss": 4.0329,
+      "step": 13578
+    },
+    {
+      "epoch": 0.13579,
+      "grad_norm": 1.375918893580882,
+      "learning_rate": 0.003,
+      "loss": 4.0209,
+      "step": 13579
+    },
+    {
+      "epoch": 0.1358,
+      "grad_norm": 1.0483388215012421,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 13580
+    },
+    {
+      "epoch": 0.13581,
+      "grad_norm": 1.2980414345055213,
+      "learning_rate": 0.003,
+      "loss": 4.0061,
+      "step": 13581
+    },
+    {
+      "epoch": 0.13582,
+      "grad_norm": 1.0353181140648553,
+      "learning_rate": 0.003,
+      "loss": 4.0373,
+      "step": 13582
+    },
+    {
+      "epoch": 0.13583,
+      "grad_norm": 1.265469273405755,
+      "learning_rate": 0.003,
+      "loss": 4.0211,
+      "step": 13583
+    },
+    {
+      "epoch": 0.13584,
+      "grad_norm": 1.0112357228896525,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 13584
+    },
+    {
+      "epoch": 0.13585,
+      "grad_norm": 1.343005759231974,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 13585
+    },
+    {
+      "epoch": 0.13586,
+      "grad_norm": 1.2175197887500093,
+      "learning_rate": 0.003,
+      "loss": 4.0661,
+      "step": 13586
+    },
+    {
+      "epoch": 0.13587,
+      "grad_norm": 1.179478814557246,
+      "learning_rate": 0.003,
+      "loss": 4.007,
+      "step": 13587
+    },
+    {
+      "epoch": 0.13588,
+      "grad_norm": 1.307292876356786,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 13588
+    },
+    {
+      "epoch": 0.13589,
+      "grad_norm": 1.2162754598432555,
+      "learning_rate": 0.003,
+      "loss": 4.0267,
+      "step": 13589
+    },
+    {
+      "epoch": 0.1359,
+      "grad_norm": 1.2257717680755864,
+      "learning_rate": 0.003,
+      "loss": 3.9964,
+      "step": 13590
+    },
+    {
+      "epoch": 0.13591,
+      "grad_norm": 1.3668360738196097,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 13591
+    },
+    {
+      "epoch": 0.13592,
+      "grad_norm": 1.1463625785724558,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 13592
+    },
+    {
+      "epoch": 0.13593,
+      "grad_norm": 1.2555792951194609,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 13593
+    },
+    {
+      "epoch": 0.13594,
+      "grad_norm": 1.136425816545672,
+      "learning_rate": 0.003,
+      "loss": 4.0359,
+      "step": 13594
+    },
+    {
+      "epoch": 0.13595,
+      "grad_norm": 1.3017380885036873,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 13595
+    },
+    {
+      "epoch": 0.13596,
+      "grad_norm": 1.2390442231201604,
+      "learning_rate": 0.003,
+      "loss": 4.0313,
+      "step": 13596
+    },
+    {
+      "epoch": 0.13597,
+      "grad_norm": 1.0792428908460772,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 13597
+    },
+    {
+      "epoch": 0.13598,
+      "grad_norm": 1.1627665380886698,
+      "learning_rate": 0.003,
+      "loss": 4.0539,
+      "step": 13598
+    },
+    {
+      "epoch": 0.13599,
+      "grad_norm": 1.0891261570324686,
+      "learning_rate": 0.003,
+      "loss": 4.0354,
+      "step": 13599
+    },
+    {
+      "epoch": 0.136,
+      "grad_norm": 1.3290981851394779,
+      "learning_rate": 0.003,
+      "loss": 3.9806,
+      "step": 13600
+    },
+    {
+      "epoch": 0.13601,
+      "grad_norm": 1.1191130692722162,
+      "learning_rate": 0.003,
+      "loss": 4.0342,
+      "step": 13601
+    },
+    {
+      "epoch": 0.13602,
+      "grad_norm": 1.14018704398269,
+      "learning_rate": 0.003,
+      "loss": 4.0281,
+      "step": 13602
+    },
+    {
+      "epoch": 0.13603,
+      "grad_norm": 1.4134013940831658,
+      "learning_rate": 0.003,
+      "loss": 4.0399,
+      "step": 13603
+    },
+    {
+      "epoch": 0.13604,
+      "grad_norm": 0.9919395717353615,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 13604
+    },
+    {
+      "epoch": 0.13605,
+      "grad_norm": 1.4685076537988184,
+      "learning_rate": 0.003,
+      "loss": 3.9796,
+      "step": 13605
+    },
+    {
+      "epoch": 0.13606,
+      "grad_norm": 1.3043209564442602,
+      "learning_rate": 0.003,
+      "loss": 4.0187,
+      "step": 13606
+    },
+    {
+      "epoch": 0.13607,
+      "grad_norm": 1.2720661225646608,
+      "learning_rate": 0.003,
+      "loss": 4.0523,
+      "step": 13607
+    },
+    {
+      "epoch": 0.13608,
+      "grad_norm": 1.1218482561071894,
+      "learning_rate": 0.003,
+      "loss": 4.0333,
+      "step": 13608
+    },
+    {
+      "epoch": 0.13609,
+      "grad_norm": 1.1870233607488427,
+      "learning_rate": 0.003,
+      "loss": 3.9998,
+      "step": 13609
+    },
+    {
+      "epoch": 0.1361,
+      "grad_norm": 1.4120658302714315,
+      "learning_rate": 0.003,
+      "loss": 4.0222,
+      "step": 13610
+    },
+    {
+      "epoch": 0.13611,
+      "grad_norm": 1.068516624222778,
+      "learning_rate": 0.003,
+      "loss": 4.0096,
+      "step": 13611
+    },
+    {
+      "epoch": 0.13612,
+      "grad_norm": 1.245146046438301,
+      "learning_rate": 0.003,
+      "loss": 4.0317,
+      "step": 13612
+    },
+    {
+      "epoch": 0.13613,
+      "grad_norm": 1.145508954883413,
+      "learning_rate": 0.003,
+      "loss": 4.0172,
+      "step": 13613
+    },
+    {
+      "epoch": 0.13614,
+      "grad_norm": 1.055471911251265,
+      "learning_rate": 0.003,
+      "loss": 4.0072,
+      "step": 13614
+    },
+    {
+      "epoch": 0.13615,
+      "grad_norm": 1.1450571637294198,
+      "learning_rate": 0.003,
+      "loss": 4.0247,
+      "step": 13615
+    },
+    {
+      "epoch": 0.13616,
+      "grad_norm": 1.148179332102041,
+      "learning_rate": 0.003,
+      "loss": 4.0086,
+      "step": 13616
+    },
+    {
+      "epoch": 0.13617,
+      "grad_norm": 1.2645193541346786,
+      "learning_rate": 0.003,
+      "loss": 4.0167,
+      "step": 13617
+    },
+    {
+      "epoch": 0.13618,
+      "grad_norm": 1.3830179107987441,
+      "learning_rate": 0.003,
+      "loss": 4.0493,
+      "step": 13618
+    },
+    {
+      "epoch": 0.13619,
+      "grad_norm": 1.1986654364411988,
+      "learning_rate": 0.003,
+      "loss": 4.036,
+      "step": 13619
+    },
+    {
+      "epoch": 0.1362,
+      "grad_norm": 1.1818480902296788,
+      "learning_rate": 0.003,
+      "loss": 4.0377,
+      "step": 13620
+    },
+    {
+      "epoch": 0.13621,
+      "grad_norm": 1.1745157716112689,
+      "learning_rate": 0.003,
+      "loss": 4.0093,
+      "step": 13621
+    },
+    {
+      "epoch": 0.13622,
+      "grad_norm": 1.1066332563589678,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 13622
+    },
+    {
+      "epoch": 0.13623,
+      "grad_norm": 1.3645281692577584,
+      "learning_rate": 0.003,
+      "loss": 4.0255,
+      "step": 13623
+    },
+    {
+      "epoch": 0.13624,
+      "grad_norm": 1.1929185751443674,
+      "learning_rate": 0.003,
+      "loss": 4.0215,
+      "step": 13624
+    },
+    {
+      "epoch": 0.13625,
+      "grad_norm": 1.4248611420754493,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 13625
+    },
+    {
+      "epoch": 0.13626,
+      "grad_norm": 1.186299479007345,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 13626
+    },
+    {
+      "epoch": 0.13627,
+      "grad_norm": 1.0788810221878606,
+      "learning_rate": 0.003,
+      "loss": 3.9985,
+      "step": 13627
+    },
+    {
+      "epoch": 0.13628,
+      "grad_norm": 1.2977698017293953,
+      "learning_rate": 0.003,
+      "loss": 4.0272,
+      "step": 13628
+    },
+    {
+      "epoch": 0.13629,
+      "grad_norm": 1.07972580227147,
+      "learning_rate": 0.003,
+      "loss": 4.0049,
+      "step": 13629
+    },
+    {
+      "epoch": 0.1363,
+      "grad_norm": 1.1688938042282193,
+      "learning_rate": 0.003,
+      "loss": 3.986,
+      "step": 13630
+    },
+    {
+      "epoch": 0.13631,
+      "grad_norm": 1.3352889662898282,
+      "learning_rate": 0.003,
+      "loss": 4.0086,
+      "step": 13631
+    },
+    {
+      "epoch": 0.13632,
+      "grad_norm": 1.1365058041036933,
+      "learning_rate": 0.003,
+      "loss": 4.0443,
+      "step": 13632
+    },
+    {
+      "epoch": 0.13633,
+      "grad_norm": 1.315315923471128,
+      "learning_rate": 0.003,
+      "loss": 4.0181,
+      "step": 13633
+    },
+    {
+      "epoch": 0.13634,
+      "grad_norm": 1.0931804813692574,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 13634
+    },
+    {
+      "epoch": 0.13635,
+      "grad_norm": 1.264910695283981,
+      "learning_rate": 0.003,
+      "loss": 4.0416,
+      "step": 13635
+    },
+    {
+      "epoch": 0.13636,
+      "grad_norm": 1.2708625560327722,
+      "learning_rate": 0.003,
+      "loss": 4.0181,
+      "step": 13636
+    },
+    {
+      "epoch": 0.13637,
+      "grad_norm": 0.9990499280248041,
+      "learning_rate": 0.003,
+      "loss": 3.997,
+      "step": 13637
+    },
+    {
+      "epoch": 0.13638,
+      "grad_norm": 1.2961716625668893,
+      "learning_rate": 0.003,
+      "loss": 4.046,
+      "step": 13638
+    },
+    {
+      "epoch": 0.13639,
+      "grad_norm": 1.1163179620633081,
+      "learning_rate": 0.003,
+      "loss": 4.024,
+      "step": 13639
+    },
+    {
+      "epoch": 0.1364,
+      "grad_norm": 1.1076994011515442,
+      "learning_rate": 0.003,
+      "loss": 4.0075,
+      "step": 13640
+    },
+    {
+      "epoch": 0.13641,
+      "grad_norm": 1.1762842182555493,
+      "learning_rate": 0.003,
+      "loss": 4.0477,
+      "step": 13641
+    },
+    {
+      "epoch": 0.13642,
+      "grad_norm": 1.1464394748013567,
+      "learning_rate": 0.003,
+      "loss": 4.0251,
+      "step": 13642
+    },
+    {
+      "epoch": 0.13643,
+      "grad_norm": 1.2598077685915399,
+      "learning_rate": 0.003,
+      "loss": 4.0349,
+      "step": 13643
+    },
+    {
+      "epoch": 0.13644,
+      "grad_norm": 1.060741487532991,
+      "learning_rate": 0.003,
+      "loss": 3.9941,
+      "step": 13644
+    },
+    {
+      "epoch": 0.13645,
+      "grad_norm": 1.377870157370045,
+      "learning_rate": 0.003,
+      "loss": 4.0077,
+      "step": 13645
+    },
+    {
+      "epoch": 0.13646,
+      "grad_norm": 1.1109566861432008,
+      "learning_rate": 0.003,
+      "loss": 3.9889,
+      "step": 13646
+    },
+    {
+      "epoch": 0.13647,
+      "grad_norm": 1.3433279023666778,
+      "learning_rate": 0.003,
+      "loss": 4.0295,
+      "step": 13647
+    },
+    {
+      "epoch": 0.13648,
+      "grad_norm": 0.9491736753233733,
+      "learning_rate": 0.003,
+      "loss": 3.9852,
+      "step": 13648
+    },
+    {
+      "epoch": 0.13649,
+      "grad_norm": 1.5014629142589342,
+      "learning_rate": 0.003,
+      "loss": 4.0346,
+      "step": 13649
+    },
+    {
+      "epoch": 0.1365,
+      "grad_norm": 1.2241394783719237,
+      "learning_rate": 0.003,
+      "loss": 4.0252,
+      "step": 13650
+    },
+    {
+      "epoch": 0.13651,
+      "grad_norm": 1.4139447872011477,
+      "learning_rate": 0.003,
+      "loss": 4.0357,
+      "step": 13651
+    },
+    {
+      "epoch": 0.13652,
+      "grad_norm": 0.996084137685544,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 13652
+    },
+    {
+      "epoch": 0.13653,
+      "grad_norm": 1.0902948797409415,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 13653
+    },
+    {
+      "epoch": 0.13654,
+      "grad_norm": 1.3141932106860696,
+      "learning_rate": 0.003,
+      "loss": 4.0288,
+      "step": 13654
+    },
+    {
+      "epoch": 0.13655,
+      "grad_norm": 1.1740235304999693,
+      "learning_rate": 0.003,
+      "loss": 4.0008,
+      "step": 13655
+    },
+    {
+      "epoch": 0.13656,
+      "grad_norm": 1.2292046645803103,
+      "learning_rate": 0.003,
+      "loss": 4.0212,
+      "step": 13656
+    },
+    {
+      "epoch": 0.13657,
+      "grad_norm": 1.0944277948772945,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 13657
+    },
+    {
+      "epoch": 0.13658,
+      "grad_norm": 1.212064236516267,
+      "learning_rate": 0.003,
+      "loss": 4.0519,
+      "step": 13658
+    },
+    {
+      "epoch": 0.13659,
+      "grad_norm": 1.2888223611027119,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 13659
+    },
+    {
+      "epoch": 0.1366,
+      "grad_norm": 1.2746851628935965,
+      "learning_rate": 0.003,
+      "loss": 4.0565,
+      "step": 13660
+    },
+    {
+      "epoch": 0.13661,
+      "grad_norm": 1.4234278610089575,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 13661
+    },
+    {
+      "epoch": 0.13662,
+      "grad_norm": 1.2116319288225803,
+      "learning_rate": 0.003,
+      "loss": 4.0339,
+      "step": 13662
+    },
+    {
+      "epoch": 0.13663,
+      "grad_norm": 1.328145092229361,
+      "learning_rate": 0.003,
+      "loss": 4.0362,
+      "step": 13663
+    },
+    {
+      "epoch": 0.13664,
+      "grad_norm": 0.9959449839391749,
+      "learning_rate": 0.003,
+      "loss": 3.9876,
+      "step": 13664
+    },
+    {
+      "epoch": 0.13665,
+      "grad_norm": 1.2867223826779501,
+      "learning_rate": 0.003,
+      "loss": 4.0423,
+      "step": 13665
+    },
+    {
+      "epoch": 0.13666,
+      "grad_norm": 0.9853202985653796,
+      "learning_rate": 0.003,
+      "loss": 4.0368,
+      "step": 13666
+    },
+    {
+      "epoch": 0.13667,
+      "grad_norm": 1.3798038152578214,
+      "learning_rate": 0.003,
+      "loss": 4.0448,
+      "step": 13667
+    },
+    {
+      "epoch": 0.13668,
+      "grad_norm": 1.2428031701336075,
+      "learning_rate": 0.003,
+      "loss": 4.0457,
+      "step": 13668
+    },
+    {
+      "epoch": 0.13669,
+      "grad_norm": 1.0698514700923212,
+      "learning_rate": 0.003,
+      "loss": 4.015,
+      "step": 13669
+    },
+    {
+      "epoch": 0.1367,
+      "grad_norm": 1.3737478426089011,
+      "learning_rate": 0.003,
+      "loss": 4.039,
+      "step": 13670
+    },
+    {
+      "epoch": 0.13671,
+      "grad_norm": 1.122164008438741,
+      "learning_rate": 0.003,
+      "loss": 4.0329,
+      "step": 13671
+    },
+    {
+      "epoch": 0.13672,
+      "grad_norm": 1.5307147958375853,
+      "learning_rate": 0.003,
+      "loss": 4.0065,
+      "step": 13672
+    },
+    {
+      "epoch": 0.13673,
+      "grad_norm": 0.899868551945437,
+      "learning_rate": 0.003,
+      "loss": 4.0211,
+      "step": 13673
+    },
+    {
+      "epoch": 0.13674,
+      "grad_norm": 1.2097351095429887,
+      "learning_rate": 0.003,
+      "loss": 4.0343,
+      "step": 13674
+    },
+    {
+      "epoch": 0.13675,
+      "grad_norm": 1.149552133151396,
+      "learning_rate": 0.003,
+      "loss": 4.0052,
+      "step": 13675
+    },
+    {
+      "epoch": 0.13676,
+      "grad_norm": 1.5380813479804742,
+      "learning_rate": 0.003,
+      "loss": 3.994,
+      "step": 13676
+    },
+    {
+      "epoch": 0.13677,
+      "grad_norm": 1.1197435050673568,
+      "learning_rate": 0.003,
+      "loss": 4.0334,
+      "step": 13677
+    },
+    {
+      "epoch": 0.13678,
+      "grad_norm": 1.2370751158278261,
+      "learning_rate": 0.003,
+      "loss": 4.0181,
+      "step": 13678
+    },
+    {
+      "epoch": 0.13679,
+      "grad_norm": 1.37577427018495,
+      "learning_rate": 0.003,
+      "loss": 4.0375,
+      "step": 13679
+    },
+    {
+      "epoch": 0.1368,
+      "grad_norm": 1.2362491627572139,
+      "learning_rate": 0.003,
+      "loss": 4.0092,
+      "step": 13680
+    },
+    {
+      "epoch": 0.13681,
+      "grad_norm": 1.133991783462781,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 13681
+    },
+    {
+      "epoch": 0.13682,
+      "grad_norm": 1.3599758993204916,
+      "learning_rate": 0.003,
+      "loss": 4.0499,
+      "step": 13682
+    },
+    {
+      "epoch": 0.13683,
+      "grad_norm": 1.0764047723814174,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 13683
+    },
+    {
+      "epoch": 0.13684,
+      "grad_norm": 1.2192253420881678,
+      "learning_rate": 0.003,
+      "loss": 4.0246,
+      "step": 13684
+    },
+    {
+      "epoch": 0.13685,
+      "grad_norm": 1.082565952772552,
+      "learning_rate": 0.003,
+      "loss": 4.0032,
+      "step": 13685
+    },
+    {
+      "epoch": 0.13686,
+      "grad_norm": 1.1530607333400629,
+      "learning_rate": 0.003,
+      "loss": 4.0263,
+      "step": 13686
+    },
+    {
+      "epoch": 0.13687,
+      "grad_norm": 1.1444553141251446,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 13687
+    },
+    {
+      "epoch": 0.13688,
+      "grad_norm": 1.1769917803189258,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 13688
+    },
+    {
+      "epoch": 0.13689,
+      "grad_norm": 1.054751783887151,
+      "learning_rate": 0.003,
+      "loss": 4.0351,
+      "step": 13689
+    },
+    {
+      "epoch": 0.1369,
+      "grad_norm": 1.5601870027824791,
+      "learning_rate": 0.003,
+      "loss": 4.043,
+      "step": 13690
+    },
+    {
+      "epoch": 0.13691,
+      "grad_norm": 1.093268599921564,
+      "learning_rate": 0.003,
+      "loss": 4.0343,
+      "step": 13691
+    },
+    {
+      "epoch": 0.13692,
+      "grad_norm": 1.1917534985211347,
+      "learning_rate": 0.003,
+      "loss": 4.0023,
+      "step": 13692
+    },
+    {
+      "epoch": 0.13693,
+      "grad_norm": 1.2994734700544726,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 13693
+    },
+    {
+      "epoch": 0.13694,
+      "grad_norm": 1.1547766676263558,
+      "learning_rate": 0.003,
+      "loss": 4.0002,
+      "step": 13694
+    },
+    {
+      "epoch": 0.13695,
+      "grad_norm": 1.1635361967495015,
+      "learning_rate": 0.003,
+      "loss": 4.0211,
+      "step": 13695
+    },
+    {
+      "epoch": 0.13696,
+      "grad_norm": 1.2507602619889238,
+      "learning_rate": 0.003,
+      "loss": 4.0237,
+      "step": 13696
+    },
+    {
+      "epoch": 0.13697,
+      "grad_norm": 0.9315906290639528,
+      "learning_rate": 0.003,
+      "loss": 3.9926,
+      "step": 13697
+    },
+    {
+      "epoch": 0.13698,
+      "grad_norm": 1.1915880289336969,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 13698
+    },
+    {
+      "epoch": 0.13699,
+      "grad_norm": 1.2447972729649426,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 13699
+    },
+    {
+      "epoch": 0.137,
+      "grad_norm": 1.115048622483683,
+      "learning_rate": 0.003,
+      "loss": 4.0043,
+      "step": 13700
+    },
+    {
+      "epoch": 0.13701,
+      "grad_norm": 1.1805289485345356,
+      "learning_rate": 0.003,
+      "loss": 4.0464,
+      "step": 13701
+    },
+    {
+      "epoch": 0.13702,
+      "grad_norm": 1.0326553481841696,
+      "learning_rate": 0.003,
+      "loss": 4.0008,
+      "step": 13702
+    },
+    {
+      "epoch": 0.13703,
+      "grad_norm": 1.1744583181917279,
+      "learning_rate": 0.003,
+      "loss": 3.9953,
+      "step": 13703
+    },
+    {
+      "epoch": 0.13704,
+      "grad_norm": 1.1175079166697401,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 13704
+    },
+    {
+      "epoch": 0.13705,
+      "grad_norm": 1.224709949818222,
+      "learning_rate": 0.003,
+      "loss": 4.0244,
+      "step": 13705
+    },
+    {
+      "epoch": 0.13706,
+      "grad_norm": 1.36848019240307,
+      "learning_rate": 0.003,
+      "loss": 4.0177,
+      "step": 13706
+    },
+    {
+      "epoch": 0.13707,
+      "grad_norm": 1.1390307172209935,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 13707
+    },
+    {
+      "epoch": 0.13708,
+      "grad_norm": 1.090064422114458,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 13708
+    },
+    {
+      "epoch": 0.13709,
+      "grad_norm": 1.2237984657350607,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 13709
+    },
+    {
+      "epoch": 0.1371,
+      "grad_norm": 1.168599884032333,
+      "learning_rate": 0.003,
+      "loss": 3.9842,
+      "step": 13710
+    },
+    {
+      "epoch": 0.13711,
+      "grad_norm": 1.6158106815844115,
+      "learning_rate": 0.003,
+      "loss": 4.0212,
+      "step": 13711
+    },
+    {
+      "epoch": 0.13712,
+      "grad_norm": 0.9296745648114974,
+      "learning_rate": 0.003,
+      "loss": 3.9942,
+      "step": 13712
+    },
+    {
+      "epoch": 0.13713,
+      "grad_norm": 1.184943380430527,
+      "learning_rate": 0.003,
+      "loss": 4.0266,
+      "step": 13713
+    },
+    {
+      "epoch": 0.13714,
+      "grad_norm": 1.2657594217766623,
+      "learning_rate": 0.003,
+      "loss": 4.0132,
+      "step": 13714
+    },
+    {
+      "epoch": 0.13715,
+      "grad_norm": 1.2599122393116169,
+      "learning_rate": 0.003,
+      "loss": 4.0316,
+      "step": 13715
+    },
+    {
+      "epoch": 0.13716,
+      "grad_norm": 1.1314866178955236,
+      "learning_rate": 0.003,
+      "loss": 3.9888,
+      "step": 13716
+    },
+    {
+      "epoch": 0.13717,
+      "grad_norm": 1.1669368270819687,
+      "learning_rate": 0.003,
+      "loss": 4.0154,
+      "step": 13717
+    },
+    {
+      "epoch": 0.13718,
+      "grad_norm": 1.3826944955399298,
+      "learning_rate": 0.003,
+      "loss": 4.0342,
+      "step": 13718
+    },
+    {
+      "epoch": 0.13719,
+      "grad_norm": 1.2228088691409713,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 13719
+    },
+    {
+      "epoch": 0.1372,
+      "grad_norm": 1.1745635287793246,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 13720
+    },
+    {
+      "epoch": 0.13721,
+      "grad_norm": 1.1651313503452432,
+      "learning_rate": 0.003,
+      "loss": 4.0093,
+      "step": 13721
+    },
+    {
+      "epoch": 0.13722,
+      "grad_norm": 1.1640986926150954,
+      "learning_rate": 0.003,
+      "loss": 4.0018,
+      "step": 13722
+    },
+    {
+      "epoch": 0.13723,
+      "grad_norm": 1.349857413894611,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 13723
+    },
+    {
+      "epoch": 0.13724,
+      "grad_norm": 1.0418103753447379,
+      "learning_rate": 0.003,
+      "loss": 4.0263,
+      "step": 13724
+    },
+    {
+      "epoch": 0.13725,
+      "grad_norm": 1.420188473153875,
+      "learning_rate": 0.003,
+      "loss": 4.0475,
+      "step": 13725
+    },
+    {
+      "epoch": 0.13726,
+      "grad_norm": 1.0133290806749176,
+      "learning_rate": 0.003,
+      "loss": 4.0247,
+      "step": 13726
+    },
+    {
+      "epoch": 0.13727,
+      "grad_norm": 1.425719934360195,
+      "learning_rate": 0.003,
+      "loss": 4.0377,
+      "step": 13727
+    },
+    {
+      "epoch": 0.13728,
+      "grad_norm": 0.9872080493101567,
+      "learning_rate": 0.003,
+      "loss": 4.0409,
+      "step": 13728
+    },
+    {
+      "epoch": 0.13729,
+      "grad_norm": 1.412401336069546,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 13729
+    },
+    {
+      "epoch": 0.1373,
+      "grad_norm": 1.1511414874159784,
+      "learning_rate": 0.003,
+      "loss": 4.0258,
+      "step": 13730
+    },
+    {
+      "epoch": 0.13731,
+      "grad_norm": 1.4826903861671865,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 13731
+    },
+    {
+      "epoch": 0.13732,
+      "grad_norm": 1.0612799057595916,
+      "learning_rate": 0.003,
+      "loss": 3.9867,
+      "step": 13732
+    },
+    {
+      "epoch": 0.13733,
+      "grad_norm": 1.1130211645705217,
+      "learning_rate": 0.003,
+      "loss": 4.0306,
+      "step": 13733
+    },
+    {
+      "epoch": 0.13734,
+      "grad_norm": 1.1992034429309264,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 13734
+    },
+    {
+      "epoch": 0.13735,
+      "grad_norm": 1.1869985348367593,
+      "learning_rate": 0.003,
+      "loss": 4.0264,
+      "step": 13735
+    },
+    {
+      "epoch": 0.13736,
+      "grad_norm": 1.0581702676513116,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 13736
+    },
+    {
+      "epoch": 0.13737,
+      "grad_norm": 1.2556745480544078,
+      "learning_rate": 0.003,
+      "loss": 4.0086,
+      "step": 13737
+    },
+    {
+      "epoch": 0.13738,
+      "grad_norm": 1.0967254187692979,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 13738
+    },
+    {
+      "epoch": 0.13739,
+      "grad_norm": 1.2665754205291968,
+      "learning_rate": 0.003,
+      "loss": 4.0135,
+      "step": 13739
+    },
+    {
+      "epoch": 0.1374,
+      "grad_norm": 0.8686616512028059,
+      "learning_rate": 0.003,
+      "loss": 4.0001,
+      "step": 13740
+    },
+    {
+      "epoch": 0.13741,
+      "grad_norm": 1.0704742058175276,
+      "learning_rate": 0.003,
+      "loss": 4.0035,
+      "step": 13741
+    },
+    {
+      "epoch": 0.13742,
+      "grad_norm": 1.479336455824835,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 13742
+    },
+    {
+      "epoch": 0.13743,
+      "grad_norm": 1.1989083802465799,
+      "learning_rate": 0.003,
+      "loss": 3.9946,
+      "step": 13743
+    },
+    {
+      "epoch": 0.13744,
+      "grad_norm": 1.2784732801027345,
+      "learning_rate": 0.003,
+      "loss": 3.9986,
+      "step": 13744
+    },
+    {
+      "epoch": 0.13745,
+      "grad_norm": 1.0593265535350147,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 13745
+    },
+    {
+      "epoch": 0.13746,
+      "grad_norm": 1.3268605067423047,
+      "learning_rate": 0.003,
+      "loss": 4.0211,
+      "step": 13746
+    },
+    {
+      "epoch": 0.13747,
+      "grad_norm": 1.073961143923257,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 13747
+    },
+    {
+      "epoch": 0.13748,
+      "grad_norm": 1.3522000440774327,
+      "learning_rate": 0.003,
+      "loss": 4.0592,
+      "step": 13748
+    },
+    {
+      "epoch": 0.13749,
+      "grad_norm": 0.8957601239200287,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 13749
+    },
+    {
+      "epoch": 0.1375,
+      "grad_norm": 1.3417390194539345,
+      "learning_rate": 0.003,
+      "loss": 4.0374,
+      "step": 13750
+    },
+    {
+      "epoch": 0.13751,
+      "grad_norm": 1.5273003202455322,
+      "learning_rate": 0.003,
+      "loss": 4.056,
+      "step": 13751
+    },
+    {
+      "epoch": 0.13752,
+      "grad_norm": 0.9674239717893509,
+      "learning_rate": 0.003,
+      "loss": 3.989,
+      "step": 13752
+    },
+    {
+      "epoch": 0.13753,
+      "grad_norm": 1.331551362321103,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 13753
+    },
+    {
+      "epoch": 0.13754,
+      "grad_norm": 0.9727047760605053,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 13754
+    },
+    {
+      "epoch": 0.13755,
+      "grad_norm": 1.3971142255392022,
+      "learning_rate": 0.003,
+      "loss": 4.0284,
+      "step": 13755
+    },
+    {
+      "epoch": 0.13756,
+      "grad_norm": 1.144141970264651,
+      "learning_rate": 0.003,
+      "loss": 4.0128,
+      "step": 13756
+    },
+    {
+      "epoch": 0.13757,
+      "grad_norm": 1.570052296011186,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 13757
+    },
+    {
+      "epoch": 0.13758,
+      "grad_norm": 1.2885569367110294,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 13758
+    },
+    {
+      "epoch": 0.13759,
+      "grad_norm": 1.2012167993907945,
+      "learning_rate": 0.003,
+      "loss": 4.0013,
+      "step": 13759
+    },
+    {
+      "epoch": 0.1376,
+      "grad_norm": 1.1971123342918735,
+      "learning_rate": 0.003,
+      "loss": 4.0426,
+      "step": 13760
+    },
+    {
+      "epoch": 0.13761,
+      "grad_norm": 1.2711990626473382,
+      "learning_rate": 0.003,
+      "loss": 3.9892,
+      "step": 13761
+    },
+    {
+      "epoch": 0.13762,
+      "grad_norm": 1.2563892597273907,
+      "learning_rate": 0.003,
+      "loss": 3.9954,
+      "step": 13762
+    },
+    {
+      "epoch": 0.13763,
+      "grad_norm": 1.3999239237162613,
+      "learning_rate": 0.003,
+      "loss": 4.0592,
+      "step": 13763
+    },
+    {
+      "epoch": 0.13764,
+      "grad_norm": 0.9286033918747508,
+      "learning_rate": 0.003,
+      "loss": 4.0429,
+      "step": 13764
+    },
+    {
+      "epoch": 0.13765,
+      "grad_norm": 1.229986753069097,
+      "learning_rate": 0.003,
+      "loss": 4.0247,
+      "step": 13765
+    },
+    {
+      "epoch": 0.13766,
+      "grad_norm": 1.2910295749295635,
+      "learning_rate": 0.003,
+      "loss": 3.9954,
+      "step": 13766
+    },
+    {
+      "epoch": 0.13767,
+      "grad_norm": 1.2404265094621845,
+      "learning_rate": 0.003,
+      "loss": 4.0032,
+      "step": 13767
+    },
+    {
+      "epoch": 0.13768,
+      "grad_norm": 1.380749421877674,
+      "learning_rate": 0.003,
+      "loss": 4.03,
+      "step": 13768
+    },
+    {
+      "epoch": 0.13769,
+      "grad_norm": 1.055616536431569,
+      "learning_rate": 0.003,
+      "loss": 4.0061,
+      "step": 13769
+    },
+    {
+      "epoch": 0.1377,
+      "grad_norm": 1.261496186434932,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 13770
+    },
+    {
+      "epoch": 0.13771,
+      "grad_norm": 1.078420340221653,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 13771
+    },
+    {
+      "epoch": 0.13772,
+      "grad_norm": 1.1886135398725044,
+      "learning_rate": 0.003,
+      "loss": 4.0311,
+      "step": 13772
+    },
+    {
+      "epoch": 0.13773,
+      "grad_norm": 1.1260744620890015,
+      "learning_rate": 0.003,
+      "loss": 3.9916,
+      "step": 13773
+    },
+    {
+      "epoch": 0.13774,
+      "grad_norm": 1.3720426378978656,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 13774
+    },
+    {
+      "epoch": 0.13775,
+      "grad_norm": 1.1216764323554513,
+      "learning_rate": 0.003,
+      "loss": 4.0451,
+      "step": 13775
+    },
+    {
+      "epoch": 0.13776,
+      "grad_norm": 1.393869507420362,
+      "learning_rate": 0.003,
+      "loss": 3.9686,
+      "step": 13776
+    },
+    {
+      "epoch": 0.13777,
+      "grad_norm": 1.2412187589578358,
+      "learning_rate": 0.003,
+      "loss": 3.9985,
+      "step": 13777
+    },
+    {
+      "epoch": 0.13778,
+      "grad_norm": 1.3400166349526852,
+      "learning_rate": 0.003,
+      "loss": 3.9935,
+      "step": 13778
+    },
+    {
+      "epoch": 0.13779,
+      "grad_norm": 1.1264589087670016,
+      "learning_rate": 0.003,
+      "loss": 3.9904,
+      "step": 13779
+    },
+    {
+      "epoch": 0.1378,
+      "grad_norm": 1.4068617953668512,
+      "learning_rate": 0.003,
+      "loss": 4.0267,
+      "step": 13780
+    },
+    {
+      "epoch": 0.13781,
+      "grad_norm": 1.163379495883538,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 13781
+    },
+    {
+      "epoch": 0.13782,
+      "grad_norm": 1.0635912857489516,
+      "learning_rate": 0.003,
+      "loss": 4.0022,
+      "step": 13782
+    },
+    {
+      "epoch": 0.13783,
+      "grad_norm": 1.295939159374199,
+      "learning_rate": 0.003,
+      "loss": 4.059,
+      "step": 13783
+    },
+    {
+      "epoch": 0.13784,
+      "grad_norm": 1.0123106600447564,
+      "learning_rate": 0.003,
+      "loss": 4.0265,
+      "step": 13784
+    },
+    {
+      "epoch": 0.13785,
+      "grad_norm": 1.1793198715769542,
+      "learning_rate": 0.003,
+      "loss": 4.0207,
+      "step": 13785
+    },
+    {
+      "epoch": 0.13786,
+      "grad_norm": 1.3681108391101406,
+      "learning_rate": 0.003,
+      "loss": 4.0078,
+      "step": 13786
+    },
+    {
+      "epoch": 0.13787,
+      "grad_norm": 1.0806468901634552,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 13787
+    },
+    {
+      "epoch": 0.13788,
+      "grad_norm": 1.4124903568965343,
+      "learning_rate": 0.003,
+      "loss": 3.9947,
+      "step": 13788
+    },
+    {
+      "epoch": 0.13789,
+      "grad_norm": 0.9941383912137073,
+      "learning_rate": 0.003,
+      "loss": 4.0396,
+      "step": 13789
+    },
+    {
+      "epoch": 0.1379,
+      "grad_norm": 1.4690114949842519,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 13790
+    },
+    {
+      "epoch": 0.13791,
+      "grad_norm": 0.9399050464965364,
+      "learning_rate": 0.003,
+      "loss": 4.0326,
+      "step": 13791
+    },
+    {
+      "epoch": 0.13792,
+      "grad_norm": 1.0929317392269118,
+      "learning_rate": 0.003,
+      "loss": 4.0223,
+      "step": 13792
+    },
+    {
+      "epoch": 0.13793,
+      "grad_norm": 1.1524550818415649,
+      "learning_rate": 0.003,
+      "loss": 4.0272,
+      "step": 13793
+    },
+    {
+      "epoch": 0.13794,
+      "grad_norm": 1.4067300730540004,
+      "learning_rate": 0.003,
+      "loss": 4.0543,
+      "step": 13794
+    },
+    {
+      "epoch": 0.13795,
+      "grad_norm": 0.8852232954524153,
+      "learning_rate": 0.003,
+      "loss": 4.0281,
+      "step": 13795
+    },
+    {
+      "epoch": 0.13796,
+      "grad_norm": 1.34857853723551,
+      "learning_rate": 0.003,
+      "loss": 4.0122,
+      "step": 13796
+    },
+    {
+      "epoch": 0.13797,
+      "grad_norm": 1.417437703995536,
+      "learning_rate": 0.003,
+      "loss": 4.0415,
+      "step": 13797
+    },
+    {
+      "epoch": 0.13798,
+      "grad_norm": 1.2875740904930375,
+      "learning_rate": 0.003,
+      "loss": 4.019,
+      "step": 13798
+    },
+    {
+      "epoch": 0.13799,
+      "grad_norm": 1.1346468457962757,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 13799
+    },
+    {
+      "epoch": 0.138,
+      "grad_norm": 1.1202318261674071,
+      "learning_rate": 0.003,
+      "loss": 4.0282,
+      "step": 13800
+    },
+    {
+      "epoch": 0.13801,
+      "grad_norm": 1.2250164759666067,
+      "learning_rate": 0.003,
+      "loss": 4.0089,
+      "step": 13801
+    },
+    {
+      "epoch": 0.13802,
+      "grad_norm": 1.273665428202938,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 13802
+    },
+    {
+      "epoch": 0.13803,
+      "grad_norm": 1.0505705506231746,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 13803
+    },
+    {
+      "epoch": 0.13804,
+      "grad_norm": 1.396502131917135,
+      "learning_rate": 0.003,
+      "loss": 4.0523,
+      "step": 13804
+    },
+    {
+      "epoch": 0.13805,
+      "grad_norm": 1.040820881985539,
+      "learning_rate": 0.003,
+      "loss": 4.0059,
+      "step": 13805
+    },
+    {
+      "epoch": 0.13806,
+      "grad_norm": 1.3201950992023033,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 13806
+    },
+    {
+      "epoch": 0.13807,
+      "grad_norm": 1.0861147573091232,
+      "learning_rate": 0.003,
+      "loss": 4.0322,
+      "step": 13807
+    },
+    {
+      "epoch": 0.13808,
+      "grad_norm": 1.2454784265931447,
+      "learning_rate": 0.003,
+      "loss": 4.0313,
+      "step": 13808
+    },
+    {
+      "epoch": 0.13809,
+      "grad_norm": 1.0460082186264419,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 13809
+    },
+    {
+      "epoch": 0.1381,
+      "grad_norm": 1.4013112005182495,
+      "learning_rate": 0.003,
+      "loss": 4.0033,
+      "step": 13810
+    },
+    {
+      "epoch": 0.13811,
+      "grad_norm": 1.4702898465350844,
+      "learning_rate": 0.003,
+      "loss": 4.0531,
+      "step": 13811
+    },
+    {
+      "epoch": 0.13812,
+      "grad_norm": 1.1119162188653258,
+      "learning_rate": 0.003,
+      "loss": 4.0333,
+      "step": 13812
+    },
+    {
+      "epoch": 0.13813,
+      "grad_norm": 1.3278272093657792,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 13813
+    },
+    {
+      "epoch": 0.13814,
+      "grad_norm": 1.2921164651858392,
+      "learning_rate": 0.003,
+      "loss": 4.0381,
+      "step": 13814
+    },
+    {
+      "epoch": 0.13815,
+      "grad_norm": 1.070492395176577,
+      "learning_rate": 0.003,
+      "loss": 3.9903,
+      "step": 13815
+    },
+    {
+      "epoch": 0.13816,
+      "grad_norm": 1.1857974334956447,
+      "learning_rate": 0.003,
+      "loss": 4.0091,
+      "step": 13816
+    },
+    {
+      "epoch": 0.13817,
+      "grad_norm": 1.2491079518196415,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 13817
+    },
+    {
+      "epoch": 0.13818,
+      "grad_norm": 1.0503690419199725,
+      "learning_rate": 0.003,
+      "loss": 3.995,
+      "step": 13818
+    },
+    {
+      "epoch": 0.13819,
+      "grad_norm": 1.213579214314059,
+      "learning_rate": 0.003,
+      "loss": 4.0021,
+      "step": 13819
+    },
+    {
+      "epoch": 0.1382,
+      "grad_norm": 1.2256209285990405,
+      "learning_rate": 0.003,
+      "loss": 4.0112,
+      "step": 13820
+    },
+    {
+      "epoch": 0.13821,
+      "grad_norm": 1.1823879754528905,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 13821
+    },
+    {
+      "epoch": 0.13822,
+      "grad_norm": 1.1448906824952305,
+      "learning_rate": 0.003,
+      "loss": 4.0316,
+      "step": 13822
+    },
+    {
+      "epoch": 0.13823,
+      "grad_norm": 1.472628687368256,
+      "learning_rate": 0.003,
+      "loss": 4.0116,
+      "step": 13823
+    },
+    {
+      "epoch": 0.13824,
+      "grad_norm": 1.0233371215440514,
+      "learning_rate": 0.003,
+      "loss": 4.0276,
+      "step": 13824
+    },
+    {
+      "epoch": 0.13825,
+      "grad_norm": 1.4748736420222301,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 13825
+    },
+    {
+      "epoch": 0.13826,
+      "grad_norm": 1.134890953235084,
+      "learning_rate": 0.003,
+      "loss": 4.0215,
+      "step": 13826
+    },
+    {
+      "epoch": 0.13827,
+      "grad_norm": 1.1542926288725004,
+      "learning_rate": 0.003,
+      "loss": 4.0296,
+      "step": 13827
+    },
+    {
+      "epoch": 0.13828,
+      "grad_norm": 1.0772783402004467,
+      "learning_rate": 0.003,
+      "loss": 3.9934,
+      "step": 13828
+    },
+    {
+      "epoch": 0.13829,
+      "grad_norm": 1.398030801311642,
+      "learning_rate": 0.003,
+      "loss": 3.9873,
+      "step": 13829
+    },
+    {
+      "epoch": 0.1383,
+      "grad_norm": 1.3276117884935643,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 13830
+    },
+    {
+      "epoch": 0.13831,
+      "grad_norm": 0.9571093429384878,
+      "learning_rate": 0.003,
+      "loss": 3.9842,
+      "step": 13831
+    },
+    {
+      "epoch": 0.13832,
+      "grad_norm": 1.4892711039285325,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 13832
+    },
+    {
+      "epoch": 0.13833,
+      "grad_norm": 1.150716669767256,
+      "learning_rate": 0.003,
+      "loss": 4.0057,
+      "step": 13833
+    },
+    {
+      "epoch": 0.13834,
+      "grad_norm": 1.0530121395710959,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 13834
+    },
+    {
+      "epoch": 0.13835,
+      "grad_norm": 1.1494600804013582,
+      "learning_rate": 0.003,
+      "loss": 4.0069,
+      "step": 13835
+    },
+    {
+      "epoch": 0.13836,
+      "grad_norm": 1.2083708079434357,
+      "learning_rate": 0.003,
+      "loss": 4.0244,
+      "step": 13836
+    },
+    {
+      "epoch": 0.13837,
+      "grad_norm": 1.1715556568975776,
+      "learning_rate": 0.003,
+      "loss": 4.0275,
+      "step": 13837
+    },
+    {
+      "epoch": 0.13838,
+      "grad_norm": 1.0801918163617674,
+      "learning_rate": 0.003,
+      "loss": 3.9786,
+      "step": 13838
+    },
+    {
+      "epoch": 0.13839,
+      "grad_norm": 1.1347098154169728,
+      "learning_rate": 0.003,
+      "loss": 4.0307,
+      "step": 13839
+    },
+    {
+      "epoch": 0.1384,
+      "grad_norm": 1.3958779067058595,
+      "learning_rate": 0.003,
+      "loss": 4.0338,
+      "step": 13840
+    },
+    {
+      "epoch": 0.13841,
+      "grad_norm": 1.006181864114487,
+      "learning_rate": 0.003,
+      "loss": 4.0029,
+      "step": 13841
+    },
+    {
+      "epoch": 0.13842,
+      "grad_norm": 1.272894091173234,
+      "learning_rate": 0.003,
+      "loss": 4.0171,
+      "step": 13842
+    },
+    {
+      "epoch": 0.13843,
+      "grad_norm": 1.2013856009795578,
+      "learning_rate": 0.003,
+      "loss": 4.0249,
+      "step": 13843
+    },
+    {
+      "epoch": 0.13844,
+      "grad_norm": 1.1246714118232903,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 13844
+    },
+    {
+      "epoch": 0.13845,
+      "grad_norm": 1.322509334891701,
+      "learning_rate": 0.003,
+      "loss": 4.0286,
+      "step": 13845
+    },
+    {
+      "epoch": 0.13846,
+      "grad_norm": 1.1521055168429188,
+      "learning_rate": 0.003,
+      "loss": 4.0392,
+      "step": 13846
+    },
+    {
+      "epoch": 0.13847,
+      "grad_norm": 1.2478833693518745,
+      "learning_rate": 0.003,
+      "loss": 3.9961,
+      "step": 13847
+    },
+    {
+      "epoch": 0.13848,
+      "grad_norm": 1.3016325633154966,
+      "learning_rate": 0.003,
+      "loss": 4.0204,
+      "step": 13848
+    },
+    {
+      "epoch": 0.13849,
+      "grad_norm": 1.2280057760265102,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 13849
+    },
+    {
+      "epoch": 0.1385,
+      "grad_norm": 1.0772662000472513,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 13850
+    },
+    {
+      "epoch": 0.13851,
+      "grad_norm": 1.3768658155376017,
+      "learning_rate": 0.003,
+      "loss": 4.0003,
+      "step": 13851
+    },
+    {
+      "epoch": 0.13852,
+      "grad_norm": 1.010429514973437,
+      "learning_rate": 0.003,
+      "loss": 4.0171,
+      "step": 13852
+    },
+    {
+      "epoch": 0.13853,
+      "grad_norm": 1.3832349644979869,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 13853
+    },
+    {
+      "epoch": 0.13854,
+      "grad_norm": 1.0641415255798539,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 13854
+    },
+    {
+      "epoch": 0.13855,
+      "grad_norm": 1.4794813909654045,
+      "learning_rate": 0.003,
+      "loss": 4.0163,
+      "step": 13855
+    },
+    {
+      "epoch": 0.13856,
+      "grad_norm": 1.1183826799442813,
+      "learning_rate": 0.003,
+      "loss": 4.0393,
+      "step": 13856
+    },
+    {
+      "epoch": 0.13857,
+      "grad_norm": 1.23748079145574,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 13857
+    },
+    {
+      "epoch": 0.13858,
+      "grad_norm": 1.114360873205981,
+      "learning_rate": 0.003,
+      "loss": 4.0065,
+      "step": 13858
+    },
+    {
+      "epoch": 0.13859,
+      "grad_norm": 1.2406829264267383,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 13859
+    },
+    {
+      "epoch": 0.1386,
+      "grad_norm": 1.1466333335232666,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 13860
+    },
+    {
+      "epoch": 0.13861,
+      "grad_norm": 1.301601988392719,
+      "learning_rate": 0.003,
+      "loss": 4.0339,
+      "step": 13861
+    },
+    {
+      "epoch": 0.13862,
+      "grad_norm": 1.0949167558716029,
+      "learning_rate": 0.003,
+      "loss": 4.0376,
+      "step": 13862
+    },
+    {
+      "epoch": 0.13863,
+      "grad_norm": 0.9560402898539208,
+      "learning_rate": 0.003,
+      "loss": 4.0489,
+      "step": 13863
+    },
+    {
+      "epoch": 0.13864,
+      "grad_norm": 1.2825507857461917,
+      "learning_rate": 0.003,
+      "loss": 4.0413,
+      "step": 13864
+    },
+    {
+      "epoch": 0.13865,
+      "grad_norm": 1.178410218571524,
+      "learning_rate": 0.003,
+      "loss": 4.0323,
+      "step": 13865
+    },
+    {
+      "epoch": 0.13866,
+      "grad_norm": 1.174055726164993,
+      "learning_rate": 0.003,
+      "loss": 3.9964,
+      "step": 13866
+    },
+    {
+      "epoch": 0.13867,
+      "grad_norm": 1.4536410253305663,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 13867
+    },
+    {
+      "epoch": 0.13868,
+      "grad_norm": 0.9062474618794782,
+      "learning_rate": 0.003,
+      "loss": 4.0255,
+      "step": 13868
+    },
+    {
+      "epoch": 0.13869,
+      "grad_norm": 1.0994109586504763,
+      "learning_rate": 0.003,
+      "loss": 4.0,
+      "step": 13869
+    },
+    {
+      "epoch": 0.1387,
+      "grad_norm": 1.1734072957099282,
+      "learning_rate": 0.003,
+      "loss": 3.9879,
+      "step": 13870
+    },
+    {
+      "epoch": 0.13871,
+      "grad_norm": 1.3104684470724604,
+      "learning_rate": 0.003,
+      "loss": 3.9902,
+      "step": 13871
+    },
+    {
+      "epoch": 0.13872,
+      "grad_norm": 1.0379320482503833,
+      "learning_rate": 0.003,
+      "loss": 4.0021,
+      "step": 13872
+    },
+    {
+      "epoch": 0.13873,
+      "grad_norm": 1.293243056137157,
+      "learning_rate": 0.003,
+      "loss": 4.0084,
+      "step": 13873
+    },
+    {
+      "epoch": 0.13874,
+      "grad_norm": 1.0469175605557832,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 13874
+    },
+    {
+      "epoch": 0.13875,
+      "grad_norm": 1.0299822183811806,
+      "learning_rate": 0.003,
+      "loss": 4.0093,
+      "step": 13875
+    },
+    {
+      "epoch": 0.13876,
+      "grad_norm": 1.2130323863458037,
+      "learning_rate": 0.003,
+      "loss": 4.0343,
+      "step": 13876
+    },
+    {
+      "epoch": 0.13877,
+      "grad_norm": 1.1859891666047682,
+      "learning_rate": 0.003,
+      "loss": 4.0079,
+      "step": 13877
+    },
+    {
+      "epoch": 0.13878,
+      "grad_norm": 1.2489914141355798,
+      "learning_rate": 0.003,
+      "loss": 3.9943,
+      "step": 13878
+    },
+    {
+      "epoch": 0.13879,
+      "grad_norm": 1.243883938349891,
+      "learning_rate": 0.003,
+      "loss": 4.0431,
+      "step": 13879
+    },
+    {
+      "epoch": 0.1388,
+      "grad_norm": 1.2098306819415796,
+      "learning_rate": 0.003,
+      "loss": 4.0328,
+      "step": 13880
+    },
+    {
+      "epoch": 0.13881,
+      "grad_norm": 1.1157535130031828,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 13881
+    },
+    {
+      "epoch": 0.13882,
+      "grad_norm": 1.309752718257075,
+      "learning_rate": 0.003,
+      "loss": 4.0207,
+      "step": 13882
+    },
+    {
+      "epoch": 0.13883,
+      "grad_norm": 1.1834101954134257,
+      "learning_rate": 0.003,
+      "loss": 4.0039,
+      "step": 13883
+    },
+    {
+      "epoch": 0.13884,
+      "grad_norm": 1.3622147587914692,
+      "learning_rate": 0.003,
+      "loss": 4.0438,
+      "step": 13884
+    },
+    {
+      "epoch": 0.13885,
+      "grad_norm": 1.0817660002621556,
+      "learning_rate": 0.003,
+      "loss": 4.055,
+      "step": 13885
+    },
+    {
+      "epoch": 0.13886,
+      "grad_norm": 1.220724330378295,
+      "learning_rate": 0.003,
+      "loss": 3.9884,
+      "step": 13886
+    },
+    {
+      "epoch": 0.13887,
+      "grad_norm": 1.0857253756268257,
+      "learning_rate": 0.003,
+      "loss": 4.0057,
+      "step": 13887
+    },
+    {
+      "epoch": 0.13888,
+      "grad_norm": 1.2881188044988716,
+      "learning_rate": 0.003,
+      "loss": 4.0305,
+      "step": 13888
+    },
+    {
+      "epoch": 0.13889,
+      "grad_norm": 1.3277501652331098,
+      "learning_rate": 0.003,
+      "loss": 4.0316,
+      "step": 13889
+    },
+    {
+      "epoch": 0.1389,
+      "grad_norm": 1.2847936050254294,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 13890
+    },
+    {
+      "epoch": 0.13891,
+      "grad_norm": 1.10509747370125,
+      "learning_rate": 0.003,
+      "loss": 4.0264,
+      "step": 13891
+    },
+    {
+      "epoch": 0.13892,
+      "grad_norm": 1.3355982750193622,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 13892
+    },
+    {
+      "epoch": 0.13893,
+      "grad_norm": 1.199772978665531,
+      "learning_rate": 0.003,
+      "loss": 4.0008,
+      "step": 13893
+    },
+    {
+      "epoch": 0.13894,
+      "grad_norm": 1.1919544025083375,
+      "learning_rate": 0.003,
+      "loss": 4.0037,
+      "step": 13894
+    },
+    {
+      "epoch": 0.13895,
+      "grad_norm": 1.1791968684172016,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 13895
+    },
+    {
+      "epoch": 0.13896,
+      "grad_norm": 1.045813585726014,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 13896
+    },
+    {
+      "epoch": 0.13897,
+      "grad_norm": 1.464421185361696,
+      "learning_rate": 0.003,
+      "loss": 4.0188,
+      "step": 13897
+    },
+    {
+      "epoch": 0.13898,
+      "grad_norm": 0.9830880963597627,
+      "learning_rate": 0.003,
+      "loss": 3.992,
+      "step": 13898
+    },
+    {
+      "epoch": 0.13899,
+      "grad_norm": 1.4356757886313305,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 13899
+    },
+    {
+      "epoch": 0.139,
+      "grad_norm": 1.3762264672777118,
+      "learning_rate": 0.003,
+      "loss": 4.0512,
+      "step": 13900
+    },
+    {
+      "epoch": 0.13901,
+      "grad_norm": 1.4262850064756332,
+      "learning_rate": 0.003,
+      "loss": 4.0391,
+      "step": 13901
+    },
+    {
+      "epoch": 0.13902,
+      "grad_norm": 1.1067642729379954,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 13902
+    },
+    {
+      "epoch": 0.13903,
+      "grad_norm": 1.2662437461045917,
+      "learning_rate": 0.003,
+      "loss": 3.9933,
+      "step": 13903
+    },
+    {
+      "epoch": 0.13904,
+      "grad_norm": 1.0311275415899397,
+      "learning_rate": 0.003,
+      "loss": 4.0056,
+      "step": 13904
+    },
+    {
+      "epoch": 0.13905,
+      "grad_norm": 1.1936049630810572,
+      "learning_rate": 0.003,
+      "loss": 4.0348,
+      "step": 13905
+    },
+    {
+      "epoch": 0.13906,
+      "grad_norm": 1.052760904049721,
+      "learning_rate": 0.003,
+      "loss": 3.9777,
+      "step": 13906
+    },
+    {
+      "epoch": 0.13907,
+      "grad_norm": 1.3029101585399128,
+      "learning_rate": 0.003,
+      "loss": 4.0109,
+      "step": 13907
+    },
+    {
+      "epoch": 0.13908,
+      "grad_norm": 1.2335774415370064,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 13908
+    },
+    {
+      "epoch": 0.13909,
+      "grad_norm": 1.3275677632729739,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 13909
+    },
+    {
+      "epoch": 0.1391,
+      "grad_norm": 1.1297632688079993,
+      "learning_rate": 0.003,
+      "loss": 4.0244,
+      "step": 13910
+    },
+    {
+      "epoch": 0.13911,
+      "grad_norm": 1.2722418665779391,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 13911
+    },
+    {
+      "epoch": 0.13912,
+      "grad_norm": 1.0805464020930946,
+      "learning_rate": 0.003,
+      "loss": 4.0303,
+      "step": 13912
+    },
+    {
+      "epoch": 0.13913,
+      "grad_norm": 1.340313135441908,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 13913
+    },
+    {
+      "epoch": 0.13914,
+      "grad_norm": 0.9922681051920051,
+      "learning_rate": 0.003,
+      "loss": 4.0186,
+      "step": 13914
+    },
+    {
+      "epoch": 0.13915,
+      "grad_norm": 1.3705530383268818,
+      "learning_rate": 0.003,
+      "loss": 4.0246,
+      "step": 13915
+    },
+    {
+      "epoch": 0.13916,
+      "grad_norm": 0.9267537104518053,
+      "learning_rate": 0.003,
+      "loss": 4.0233,
+      "step": 13916
+    },
+    {
+      "epoch": 0.13917,
+      "grad_norm": 1.1780767482538883,
+      "learning_rate": 0.003,
+      "loss": 4.0348,
+      "step": 13917
+    },
+    {
+      "epoch": 0.13918,
+      "grad_norm": 1.294385111083149,
+      "learning_rate": 0.003,
+      "loss": 4.0453,
+      "step": 13918
+    },
+    {
+      "epoch": 0.13919,
+      "grad_norm": 0.9510832470643007,
+      "learning_rate": 0.003,
+      "loss": 4.0006,
+      "step": 13919
+    },
+    {
+      "epoch": 0.1392,
+      "grad_norm": 1.1967730551592841,
+      "learning_rate": 0.003,
+      "loss": 4.0254,
+      "step": 13920
+    },
+    {
+      "epoch": 0.13921,
+      "grad_norm": 1.241002135891913,
+      "learning_rate": 0.003,
+      "loss": 4.0272,
+      "step": 13921
+    },
+    {
+      "epoch": 0.13922,
+      "grad_norm": 1.0521373234826998,
+      "learning_rate": 0.003,
+      "loss": 4.0051,
+      "step": 13922
+    },
+    {
+      "epoch": 0.13923,
+      "grad_norm": 1.3016596425010858,
+      "learning_rate": 0.003,
+      "loss": 4.0339,
+      "step": 13923
+    },
+    {
+      "epoch": 0.13924,
+      "grad_norm": 1.3241790790815522,
+      "learning_rate": 0.003,
+      "loss": 4.0194,
+      "step": 13924
+    },
+    {
+      "epoch": 0.13925,
+      "grad_norm": 1.2397807904648384,
+      "learning_rate": 0.003,
+      "loss": 4.0182,
+      "step": 13925
+    },
+    {
+      "epoch": 0.13926,
+      "grad_norm": 1.5988946043532852,
+      "learning_rate": 0.003,
+      "loss": 4.0349,
+      "step": 13926
+    },
+    {
+      "epoch": 0.13927,
+      "grad_norm": 1.043076253026414,
+      "learning_rate": 0.003,
+      "loss": 4.0098,
+      "step": 13927
+    },
+    {
+      "epoch": 0.13928,
+      "grad_norm": 1.3983772820072078,
+      "learning_rate": 0.003,
+      "loss": 4.0383,
+      "step": 13928
+    },
+    {
+      "epoch": 0.13929,
+      "grad_norm": 1.3822214057025675,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 13929
+    },
+    {
+      "epoch": 0.1393,
+      "grad_norm": 1.0932817291788222,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 13930
+    },
+    {
+      "epoch": 0.13931,
+      "grad_norm": 1.2862918829184873,
+      "learning_rate": 0.003,
+      "loss": 3.9558,
+      "step": 13931
+    },
+    {
+      "epoch": 0.13932,
+      "grad_norm": 1.2821414397282036,
+      "learning_rate": 0.003,
+      "loss": 3.9833,
+      "step": 13932
+    },
+    {
+      "epoch": 0.13933,
+      "grad_norm": 1.2089220976008554,
+      "learning_rate": 0.003,
+      "loss": 3.9934,
+      "step": 13933
+    },
+    {
+      "epoch": 0.13934,
+      "grad_norm": 1.2285477929917308,
+      "learning_rate": 0.003,
+      "loss": 4.0324,
+      "step": 13934
+    },
+    {
+      "epoch": 0.13935,
+      "grad_norm": 1.0331335294069954,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 13935
+    },
+    {
+      "epoch": 0.13936,
+      "grad_norm": 1.228393740053751,
+      "learning_rate": 0.003,
+      "loss": 4.0164,
+      "step": 13936
+    },
+    {
+      "epoch": 0.13937,
+      "grad_norm": 1.2194970798916072,
+      "learning_rate": 0.003,
+      "loss": 3.9974,
+      "step": 13937
+    },
+    {
+      "epoch": 0.13938,
+      "grad_norm": 1.1371455385108602,
+      "learning_rate": 0.003,
+      "loss": 4.0069,
+      "step": 13938
+    },
+    {
+      "epoch": 0.13939,
+      "grad_norm": 1.1464202944234065,
+      "learning_rate": 0.003,
+      "loss": 4.026,
+      "step": 13939
+    },
+    {
+      "epoch": 0.1394,
+      "grad_norm": 1.209782506620369,
+      "learning_rate": 0.003,
+      "loss": 3.997,
+      "step": 13940
+    },
+    {
+      "epoch": 0.13941,
+      "grad_norm": 1.2437706017362091,
+      "learning_rate": 0.003,
+      "loss": 4.0174,
+      "step": 13941
+    },
+    {
+      "epoch": 0.13942,
+      "grad_norm": 0.9606867500748588,
+      "learning_rate": 0.003,
+      "loss": 4.0181,
+      "step": 13942
+    },
+    {
+      "epoch": 0.13943,
+      "grad_norm": 1.330782043238364,
+      "learning_rate": 0.003,
+      "loss": 4.0664,
+      "step": 13943
+    },
+    {
+      "epoch": 0.13944,
+      "grad_norm": 1.0944235490095693,
+      "learning_rate": 0.003,
+      "loss": 4.033,
+      "step": 13944
+    },
+    {
+      "epoch": 0.13945,
+      "grad_norm": 1.3837713427805216,
+      "learning_rate": 0.003,
+      "loss": 4.0278,
+      "step": 13945
+    },
+    {
+      "epoch": 0.13946,
+      "grad_norm": 1.1283957430188176,
+      "learning_rate": 0.003,
+      "loss": 4.0332,
+      "step": 13946
+    },
+    {
+      "epoch": 0.13947,
+      "grad_norm": 1.3073033270119225,
+      "learning_rate": 0.003,
+      "loss": 4.0361,
+      "step": 13947
+    },
+    {
+      "epoch": 0.13948,
+      "grad_norm": 1.0356700644168169,
+      "learning_rate": 0.003,
+      "loss": 4.0285,
+      "step": 13948
+    },
+    {
+      "epoch": 0.13949,
+      "grad_norm": 1.2827850931174634,
+      "learning_rate": 0.003,
+      "loss": 4.0387,
+      "step": 13949
+    },
+    {
+      "epoch": 0.1395,
+      "grad_norm": 0.9681245671456082,
+      "learning_rate": 0.003,
+      "loss": 4.0353,
+      "step": 13950
+    },
+    {
+      "epoch": 0.13951,
+      "grad_norm": 1.6241769253303424,
+      "learning_rate": 0.003,
+      "loss": 4.0122,
+      "step": 13951
+    },
+    {
+      "epoch": 0.13952,
+      "grad_norm": 1.0930959372215059,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 13952
+    },
+    {
+      "epoch": 0.13953,
+      "grad_norm": 1.2948262234227748,
+      "learning_rate": 0.003,
+      "loss": 4.0157,
+      "step": 13953
+    },
+    {
+      "epoch": 0.13954,
+      "grad_norm": 1.0006578557828247,
+      "learning_rate": 0.003,
+      "loss": 3.9848,
+      "step": 13954
+    },
+    {
+      "epoch": 0.13955,
+      "grad_norm": 1.4064078111835758,
+      "learning_rate": 0.003,
+      "loss": 4.0348,
+      "step": 13955
+    },
+    {
+      "epoch": 0.13956,
+      "grad_norm": 1.086412035581726,
+      "learning_rate": 0.003,
+      "loss": 4.03,
+      "step": 13956
+    },
+    {
+      "epoch": 0.13957,
+      "grad_norm": 1.4309866455033866,
+      "learning_rate": 0.003,
+      "loss": 4.0394,
+      "step": 13957
+    },
+    {
+      "epoch": 0.13958,
+      "grad_norm": 0.9741802566465895,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 13958
+    },
+    {
+      "epoch": 0.13959,
+      "grad_norm": 1.403639528165463,
+      "learning_rate": 0.003,
+      "loss": 4.0261,
+      "step": 13959
+    },
+    {
+      "epoch": 0.1396,
+      "grad_norm": 1.2468214735098933,
+      "learning_rate": 0.003,
+      "loss": 4.0053,
+      "step": 13960
+    },
+    {
+      "epoch": 0.13961,
+      "grad_norm": 1.2613812556591117,
+      "learning_rate": 0.003,
+      "loss": 4.0164,
+      "step": 13961
+    },
+    {
+      "epoch": 0.13962,
+      "grad_norm": 1.1023642989666198,
+      "learning_rate": 0.003,
+      "loss": 4.028,
+      "step": 13962
+    },
+    {
+      "epoch": 0.13963,
+      "grad_norm": 1.2135199740752762,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 13963
+    },
+    {
+      "epoch": 0.13964,
+      "grad_norm": 1.0943237528793914,
+      "learning_rate": 0.003,
+      "loss": 3.9969,
+      "step": 13964
+    },
+    {
+      "epoch": 0.13965,
+      "grad_norm": 1.3984602769716774,
+      "learning_rate": 0.003,
+      "loss": 3.9928,
+      "step": 13965
+    },
+    {
+      "epoch": 0.13966,
+      "grad_norm": 1.1833554431013096,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 13966
+    },
+    {
+      "epoch": 0.13967,
+      "grad_norm": 1.311230921590294,
+      "learning_rate": 0.003,
+      "loss": 4.0009,
+      "step": 13967
+    },
+    {
+      "epoch": 0.13968,
+      "grad_norm": 0.997931625907764,
+      "learning_rate": 0.003,
+      "loss": 3.9795,
+      "step": 13968
+    },
+    {
+      "epoch": 0.13969,
+      "grad_norm": 1.6613948429954382,
+      "learning_rate": 0.003,
+      "loss": 4.0005,
+      "step": 13969
+    },
+    {
+      "epoch": 0.1397,
+      "grad_norm": 1.1706001548186498,
+      "learning_rate": 0.003,
+      "loss": 3.9988,
+      "step": 13970
+    },
+    {
+      "epoch": 0.13971,
+      "grad_norm": 1.3455729323827497,
+      "learning_rate": 0.003,
+      "loss": 4.0431,
+      "step": 13971
+    },
+    {
+      "epoch": 0.13972,
+      "grad_norm": 1.0071348841464436,
+      "learning_rate": 0.003,
+      "loss": 4.0275,
+      "step": 13972
+    },
+    {
+      "epoch": 0.13973,
+      "grad_norm": 1.382779050748766,
+      "learning_rate": 0.003,
+      "loss": 4.049,
+      "step": 13973
+    },
+    {
+      "epoch": 0.13974,
+      "grad_norm": 1.22731538379118,
+      "learning_rate": 0.003,
+      "loss": 4.034,
+      "step": 13974
+    },
+    {
+      "epoch": 0.13975,
+      "grad_norm": 1.381444376592699,
+      "learning_rate": 0.003,
+      "loss": 4.0116,
+      "step": 13975
+    },
+    {
+      "epoch": 0.13976,
+      "grad_norm": 1.334391128566007,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 13976
+    },
+    {
+      "epoch": 0.13977,
+      "grad_norm": 1.112083982151318,
+      "learning_rate": 0.003,
+      "loss": 4.0064,
+      "step": 13977
+    },
+    {
+      "epoch": 0.13978,
+      "grad_norm": 1.0883189299965499,
+      "learning_rate": 0.003,
+      "loss": 4.076,
+      "step": 13978
+    },
+    {
+      "epoch": 0.13979,
+      "grad_norm": 1.3200277147595922,
+      "learning_rate": 0.003,
+      "loss": 4.0228,
+      "step": 13979
+    },
+    {
+      "epoch": 0.1398,
+      "grad_norm": 1.3044645984561822,
+      "learning_rate": 0.003,
+      "loss": 4.0241,
+      "step": 13980
+    },
+    {
+      "epoch": 0.13981,
+      "grad_norm": 1.168169695750215,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 13981
+    },
+    {
+      "epoch": 0.13982,
+      "grad_norm": 1.3016765422844125,
+      "learning_rate": 0.003,
+      "loss": 4.0259,
+      "step": 13982
+    },
+    {
+      "epoch": 0.13983,
+      "grad_norm": 1.1304122952759246,
+      "learning_rate": 0.003,
+      "loss": 4.0311,
+      "step": 13983
+    },
+    {
+      "epoch": 0.13984,
+      "grad_norm": 1.0634672602319772,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 13984
+    },
+    {
+      "epoch": 0.13985,
+      "grad_norm": 1.1666668593758922,
+      "learning_rate": 0.003,
+      "loss": 4.0218,
+      "step": 13985
+    },
+    {
+      "epoch": 0.13986,
+      "grad_norm": 1.1577098649486288,
+      "learning_rate": 0.003,
+      "loss": 4.0357,
+      "step": 13986
+    },
+    {
+      "epoch": 0.13987,
+      "grad_norm": 1.2543916406092464,
+      "learning_rate": 0.003,
+      "loss": 3.9913,
+      "step": 13987
+    },
+    {
+      "epoch": 0.13988,
+      "grad_norm": 1.2350106643993233,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 13988
+    },
+    {
+      "epoch": 0.13989,
+      "grad_norm": 1.142275915177779,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 13989
+    },
+    {
+      "epoch": 0.1399,
+      "grad_norm": 1.2704798136615096,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 13990
+    },
+    {
+      "epoch": 0.13991,
+      "grad_norm": 0.9015601618647462,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 13991
+    },
+    {
+      "epoch": 0.13992,
+      "grad_norm": 0.9856059615225615,
+      "learning_rate": 0.003,
+      "loss": 4.0128,
+      "step": 13992
+    },
+    {
+      "epoch": 0.13993,
+      "grad_norm": 1.465937483460713,
+      "learning_rate": 0.003,
+      "loss": 3.9969,
+      "step": 13993
+    },
+    {
+      "epoch": 0.13994,
+      "grad_norm": 1.2082285117015399,
+      "learning_rate": 0.003,
+      "loss": 4.0173,
+      "step": 13994
+    },
+    {
+      "epoch": 0.13995,
+      "grad_norm": 1.4099211609051017,
+      "learning_rate": 0.003,
+      "loss": 4.0146,
+      "step": 13995
+    },
+    {
+      "epoch": 0.13996,
+      "grad_norm": 1.1942828382556108,
+      "learning_rate": 0.003,
+      "loss": 4.0204,
+      "step": 13996
+    },
+    {
+      "epoch": 0.13997,
+      "grad_norm": 1.0453877453839826,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 13997
+    },
+    {
+      "epoch": 0.13998,
+      "grad_norm": 1.3971265611083508,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 13998
+    },
+    {
+      "epoch": 0.13999,
+      "grad_norm": 1.0312978267883302,
+      "learning_rate": 0.003,
+      "loss": 4.0238,
+      "step": 13999
+    },
+    {
+      "epoch": 0.14,
+      "grad_norm": 1.3522912522542927,
+      "learning_rate": 0.003,
+      "loss": 3.9999,
+      "step": 14000
+    },
+    {
+      "epoch": 0.14001,
+      "grad_norm": 1.2606310031280883,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 14001
+    },
+    {
+      "epoch": 0.14002,
+      "grad_norm": 1.1408868714728893,
+      "learning_rate": 0.003,
+      "loss": 4.0284,
+      "step": 14002
+    },
+    {
+      "epoch": 0.14003,
+      "grad_norm": 1.2117998216576167,
+      "learning_rate": 0.003,
+      "loss": 4.0329,
+      "step": 14003
+    },
+    {
+      "epoch": 0.14004,
+      "grad_norm": 1.1914649510372295,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 14004
+    },
+    {
+      "epoch": 0.14005,
+      "grad_norm": 1.13158563813848,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 14005
+    },
+    {
+      "epoch": 0.14006,
+      "grad_norm": 1.1789590777846468,
+      "learning_rate": 0.003,
+      "loss": 4.0213,
+      "step": 14006
+    },
+    {
+      "epoch": 0.14007,
+      "grad_norm": 1.2482102753530098,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 14007
+    },
+    {
+      "epoch": 0.14008,
+      "grad_norm": 1.3000865301090239,
+      "learning_rate": 0.003,
+      "loss": 4.0334,
+      "step": 14008
+    },
+    {
+      "epoch": 0.14009,
+      "grad_norm": 1.0317882634647473,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 14009
+    },
+    {
+      "epoch": 0.1401,
+      "grad_norm": 1.2888815577614365,
+      "learning_rate": 0.003,
+      "loss": 4.031,
+      "step": 14010
+    },
+    {
+      "epoch": 0.14011,
+      "grad_norm": 0.9507500621278664,
+      "learning_rate": 0.003,
+      "loss": 3.9919,
+      "step": 14011
+    },
+    {
+      "epoch": 0.14012,
+      "grad_norm": 1.410728391771078,
+      "learning_rate": 0.003,
+      "loss": 4.026,
+      "step": 14012
+    },
+    {
+      "epoch": 0.14013,
+      "grad_norm": 1.223325656820055,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 14013
+    },
+    {
+      "epoch": 0.14014,
+      "grad_norm": 1.110814995418571,
+      "learning_rate": 0.003,
+      "loss": 4.0112,
+      "step": 14014
+    },
+    {
+      "epoch": 0.14015,
+      "grad_norm": 1.0921648099988692,
+      "learning_rate": 0.003,
+      "loss": 3.9994,
+      "step": 14015
+    },
+    {
+      "epoch": 0.14016,
+      "grad_norm": 1.3727424125567884,
+      "learning_rate": 0.003,
+      "loss": 4.0384,
+      "step": 14016
+    },
+    {
+      "epoch": 0.14017,
+      "grad_norm": 1.2203621215869773,
+      "learning_rate": 0.003,
+      "loss": 4.0246,
+      "step": 14017
+    },
+    {
+      "epoch": 0.14018,
+      "grad_norm": 1.3990427406197736,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 14018
+    },
+    {
+      "epoch": 0.14019,
+      "grad_norm": 1.2834847469903492,
+      "learning_rate": 0.003,
+      "loss": 4.0355,
+      "step": 14019
+    },
+    {
+      "epoch": 0.1402,
+      "grad_norm": 1.2203999381013084,
+      "learning_rate": 0.003,
+      "loss": 3.9842,
+      "step": 14020
+    },
+    {
+      "epoch": 0.14021,
+      "grad_norm": 1.1537817679896496,
+      "learning_rate": 0.003,
+      "loss": 4.0305,
+      "step": 14021
+    },
+    {
+      "epoch": 0.14022,
+      "grad_norm": 1.1840094298462875,
+      "learning_rate": 0.003,
+      "loss": 3.9907,
+      "step": 14022
+    },
+    {
+      "epoch": 0.14023,
+      "grad_norm": 1.2514685746782503,
+      "learning_rate": 0.003,
+      "loss": 4.008,
+      "step": 14023
+    },
+    {
+      "epoch": 0.14024,
+      "grad_norm": 1.2572710511999712,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 14024
+    },
+    {
+      "epoch": 0.14025,
+      "grad_norm": 1.1529838978875182,
+      "learning_rate": 0.003,
+      "loss": 3.9932,
+      "step": 14025
+    },
+    {
+      "epoch": 0.14026,
+      "grad_norm": 1.1196849447629258,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 14026
+    },
+    {
+      "epoch": 0.14027,
+      "grad_norm": 1.4120002031767034,
+      "learning_rate": 0.003,
+      "loss": 3.9898,
+      "step": 14027
+    },
+    {
+      "epoch": 0.14028,
+      "grad_norm": 1.2252743403550672,
+      "learning_rate": 0.003,
+      "loss": 3.9946,
+      "step": 14028
+    },
+    {
+      "epoch": 0.14029,
+      "grad_norm": 1.0560936086888912,
+      "learning_rate": 0.003,
+      "loss": 3.9856,
+      "step": 14029
+    },
+    {
+      "epoch": 0.1403,
+      "grad_norm": 1.1071915022634027,
+      "learning_rate": 0.003,
+      "loss": 4.0155,
+      "step": 14030
+    },
+    {
+      "epoch": 0.14031,
+      "grad_norm": 1.1260075699825944,
+      "learning_rate": 0.003,
+      "loss": 3.9996,
+      "step": 14031
+    },
+    {
+      "epoch": 0.14032,
+      "grad_norm": 1.2044444637907508,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 14032
+    },
+    {
+      "epoch": 0.14033,
+      "grad_norm": 1.0727155556561392,
+      "learning_rate": 0.003,
+      "loss": 4.0372,
+      "step": 14033
+    },
+    {
+      "epoch": 0.14034,
+      "grad_norm": 1.32512443537798,
+      "learning_rate": 0.003,
+      "loss": 4.0081,
+      "step": 14034
+    },
+    {
+      "epoch": 0.14035,
+      "grad_norm": 0.9462166052486559,
+      "learning_rate": 0.003,
+      "loss": 4.0408,
+      "step": 14035
+    },
+    {
+      "epoch": 0.14036,
+      "grad_norm": 1.4978733792333327,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 14036
+    },
+    {
+      "epoch": 0.14037,
+      "grad_norm": 1.2740393697478154,
+      "learning_rate": 0.003,
+      "loss": 4.054,
+      "step": 14037
+    },
+    {
+      "epoch": 0.14038,
+      "grad_norm": 1.212192612661939,
+      "learning_rate": 0.003,
+      "loss": 4.0569,
+      "step": 14038
+    },
+    {
+      "epoch": 0.14039,
+      "grad_norm": 1.2651133818216853,
+      "learning_rate": 0.003,
+      "loss": 4.0447,
+      "step": 14039
+    },
+    {
+      "epoch": 0.1404,
+      "grad_norm": 1.159014019843782,
+      "learning_rate": 0.003,
+      "loss": 4.0207,
+      "step": 14040
+    },
+    {
+      "epoch": 0.14041,
+      "grad_norm": 1.356334413419729,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 14041
+    },
+    {
+      "epoch": 0.14042,
+      "grad_norm": 1.1790065129947735,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 14042
+    },
+    {
+      "epoch": 0.14043,
+      "grad_norm": 1.3024594270807575,
+      "learning_rate": 0.003,
+      "loss": 3.9988,
+      "step": 14043
+    },
+    {
+      "epoch": 0.14044,
+      "grad_norm": 1.1063212035382364,
+      "learning_rate": 0.003,
+      "loss": 3.985,
+      "step": 14044
+    },
+    {
+      "epoch": 0.14045,
+      "grad_norm": 1.358797191328306,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 14045
+    },
+    {
+      "epoch": 0.14046,
+      "grad_norm": 1.0003323125137746,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 14046
+    },
+    {
+      "epoch": 0.14047,
+      "grad_norm": 1.3248096853707327,
+      "learning_rate": 0.003,
+      "loss": 4.0084,
+      "step": 14047
+    },
+    {
+      "epoch": 0.14048,
+      "grad_norm": 1.1217841806694424,
+      "learning_rate": 0.003,
+      "loss": 4.0153,
+      "step": 14048
+    },
+    {
+      "epoch": 0.14049,
+      "grad_norm": 1.233918077507105,
+      "learning_rate": 0.003,
+      "loss": 4.0153,
+      "step": 14049
+    },
+    {
+      "epoch": 0.1405,
+      "grad_norm": 1.1216017192561671,
+      "learning_rate": 0.003,
+      "loss": 4.0215,
+      "step": 14050
+    },
+    {
+      "epoch": 0.14051,
+      "grad_norm": 1.108493840663453,
+      "learning_rate": 0.003,
+      "loss": 3.9873,
+      "step": 14051
+    },
+    {
+      "epoch": 0.14052,
+      "grad_norm": 1.1763668314782794,
+      "learning_rate": 0.003,
+      "loss": 4.0301,
+      "step": 14052
+    },
+    {
+      "epoch": 0.14053,
+      "grad_norm": 1.2001181755948613,
+      "learning_rate": 0.003,
+      "loss": 4.0383,
+      "step": 14053
+    },
+    {
+      "epoch": 0.14054,
+      "grad_norm": 1.1838910215006067,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 14054
+    },
+    {
+      "epoch": 0.14055,
+      "grad_norm": 1.3650656646670034,
+      "learning_rate": 0.003,
+      "loss": 3.9934,
+      "step": 14055
+    },
+    {
+      "epoch": 0.14056,
+      "grad_norm": 1.236392593269088,
+      "learning_rate": 0.003,
+      "loss": 4.0336,
+      "step": 14056
+    },
+    {
+      "epoch": 0.14057,
+      "grad_norm": 1.3761101064022911,
+      "learning_rate": 0.003,
+      "loss": 4.0042,
+      "step": 14057
+    },
+    {
+      "epoch": 0.14058,
+      "grad_norm": 0.8473911571718556,
+      "learning_rate": 0.003,
+      "loss": 4.0281,
+      "step": 14058
+    },
+    {
+      "epoch": 0.14059,
+      "grad_norm": 1.1611075233336143,
+      "learning_rate": 0.003,
+      "loss": 4.0303,
+      "step": 14059
+    },
+    {
+      "epoch": 0.1406,
+      "grad_norm": 1.1457197339194993,
+      "learning_rate": 0.003,
+      "loss": 4.0004,
+      "step": 14060
+    },
+    {
+      "epoch": 0.14061,
+      "grad_norm": 1.1708080471302844,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 14061
+    },
+    {
+      "epoch": 0.14062,
+      "grad_norm": 1.264785307429114,
+      "learning_rate": 0.003,
+      "loss": 4.0132,
+      "step": 14062
+    },
+    {
+      "epoch": 0.14063,
+      "grad_norm": 1.4168821067309096,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 14063
+    },
+    {
+      "epoch": 0.14064,
+      "grad_norm": 1.2688748501802554,
+      "learning_rate": 0.003,
+      "loss": 4.0334,
+      "step": 14064
+    },
+    {
+      "epoch": 0.14065,
+      "grad_norm": 1.27489076739789,
+      "learning_rate": 0.003,
+      "loss": 4.0244,
+      "step": 14065
+    },
+    {
+      "epoch": 0.14066,
+      "grad_norm": 1.2205632502308121,
+      "learning_rate": 0.003,
+      "loss": 4.0172,
+      "step": 14066
+    },
+    {
+      "epoch": 0.14067,
+      "grad_norm": 1.0459763075762,
+      "learning_rate": 0.003,
+      "loss": 4.0075,
+      "step": 14067
+    },
+    {
+      "epoch": 0.14068,
+      "grad_norm": 1.1345110724118248,
+      "learning_rate": 0.003,
+      "loss": 3.9914,
+      "step": 14068
+    },
+    {
+      "epoch": 0.14069,
+      "grad_norm": 1.1143860163445822,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 14069
+    },
+    {
+      "epoch": 0.1407,
+      "grad_norm": 1.5410578205758152,
+      "learning_rate": 0.003,
+      "loss": 4.0349,
+      "step": 14070
+    },
+    {
+      "epoch": 0.14071,
+      "grad_norm": 1.0829045851135513,
+      "learning_rate": 0.003,
+      "loss": 4.0184,
+      "step": 14071
+    },
+    {
+      "epoch": 0.14072,
+      "grad_norm": 1.4511526715068985,
+      "learning_rate": 0.003,
+      "loss": 4.0359,
+      "step": 14072
+    },
+    {
+      "epoch": 0.14073,
+      "grad_norm": 1.099805202133277,
+      "learning_rate": 0.003,
+      "loss": 4.0279,
+      "step": 14073
+    },
+    {
+      "epoch": 0.14074,
+      "grad_norm": 1.328312415775265,
+      "learning_rate": 0.003,
+      "loss": 4.0296,
+      "step": 14074
+    },
+    {
+      "epoch": 0.14075,
+      "grad_norm": 1.3574759342501346,
+      "learning_rate": 0.003,
+      "loss": 4.0287,
+      "step": 14075
+    },
+    {
+      "epoch": 0.14076,
+      "grad_norm": 1.2040648367123625,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 14076
+    },
+    {
+      "epoch": 0.14077,
+      "grad_norm": 1.2356253648930262,
+      "learning_rate": 0.003,
+      "loss": 4.0122,
+      "step": 14077
+    },
+    {
+      "epoch": 0.14078,
+      "grad_norm": 1.055788559715171,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 14078
+    },
+    {
+      "epoch": 0.14079,
+      "grad_norm": 1.5561946686609198,
+      "learning_rate": 0.003,
+      "loss": 3.9947,
+      "step": 14079
+    },
+    {
+      "epoch": 0.1408,
+      "grad_norm": 0.8650739053143781,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 14080
+    },
+    {
+      "epoch": 0.14081,
+      "grad_norm": 1.0303599602027143,
+      "learning_rate": 0.003,
+      "loss": 4.0007,
+      "step": 14081
+    },
+    {
+      "epoch": 0.14082,
+      "grad_norm": 1.267751441529696,
+      "learning_rate": 0.003,
+      "loss": 4.0358,
+      "step": 14082
+    },
+    {
+      "epoch": 0.14083,
+      "grad_norm": 0.9239122241956662,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 14083
+    },
+    {
+      "epoch": 0.14084,
+      "grad_norm": 1.0079857265874606,
+      "learning_rate": 0.003,
+      "loss": 3.9749,
+      "step": 14084
+    },
+    {
+      "epoch": 0.14085,
+      "grad_norm": 1.4466701881203794,
+      "learning_rate": 0.003,
+      "loss": 4.0113,
+      "step": 14085
+    },
+    {
+      "epoch": 0.14086,
+      "grad_norm": 1.1307165327501238,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 14086
+    },
+    {
+      "epoch": 0.14087,
+      "grad_norm": 1.2546536674308166,
+      "learning_rate": 0.003,
+      "loss": 4.0282,
+      "step": 14087
+    },
+    {
+      "epoch": 0.14088,
+      "grad_norm": 1.1903476498333216,
+      "learning_rate": 0.003,
+      "loss": 4.0296,
+      "step": 14088
+    },
+    {
+      "epoch": 0.14089,
+      "grad_norm": 1.3578091755598753,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 14089
+    },
+    {
+      "epoch": 0.1409,
+      "grad_norm": 1.268021700510964,
+      "learning_rate": 0.003,
+      "loss": 4.0354,
+      "step": 14090
+    },
+    {
+      "epoch": 0.14091,
+      "grad_norm": 1.2047706355464072,
+      "learning_rate": 0.003,
+      "loss": 4.0042,
+      "step": 14091
+    },
+    {
+      "epoch": 0.14092,
+      "grad_norm": 1.083127062472936,
+      "learning_rate": 0.003,
+      "loss": 4.032,
+      "step": 14092
+    },
+    {
+      "epoch": 0.14093,
+      "grad_norm": 1.418971172036768,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 14093
+    },
+    {
+      "epoch": 0.14094,
+      "grad_norm": 1.1484741827481468,
+      "learning_rate": 0.003,
+      "loss": 3.9855,
+      "step": 14094
+    },
+    {
+      "epoch": 0.14095,
+      "grad_norm": 1.1705700080017016,
+      "learning_rate": 0.003,
+      "loss": 4.0315,
+      "step": 14095
+    },
+    {
+      "epoch": 0.14096,
+      "grad_norm": 1.5220333740977865,
+      "learning_rate": 0.003,
+      "loss": 4.0286,
+      "step": 14096
+    },
+    {
+      "epoch": 0.14097,
+      "grad_norm": 1.1069309793723154,
+      "learning_rate": 0.003,
+      "loss": 4.0135,
+      "step": 14097
+    },
+    {
+      "epoch": 0.14098,
+      "grad_norm": 1.4591901998283383,
+      "learning_rate": 0.003,
+      "loss": 4.036,
+      "step": 14098
+    },
+    {
+      "epoch": 0.14099,
+      "grad_norm": 1.0732646509735886,
+      "learning_rate": 0.003,
+      "loss": 4.037,
+      "step": 14099
+    },
+    {
+      "epoch": 0.141,
+      "grad_norm": 1.2309199191771107,
+      "learning_rate": 0.003,
+      "loss": 4.0515,
+      "step": 14100
+    },
+    {
+      "epoch": 0.14101,
+      "grad_norm": 1.1137438686043557,
+      "learning_rate": 0.003,
+      "loss": 4.0264,
+      "step": 14101
+    },
+    {
+      "epoch": 0.14102,
+      "grad_norm": 1.3116072735141207,
+      "learning_rate": 0.003,
+      "loss": 4.0277,
+      "step": 14102
+    },
+    {
+      "epoch": 0.14103,
+      "grad_norm": 1.3072196104614588,
+      "learning_rate": 0.003,
+      "loss": 4.0361,
+      "step": 14103
+    },
+    {
+      "epoch": 0.14104,
+      "grad_norm": 1.0177560091617586,
+      "learning_rate": 0.003,
+      "loss": 4.0015,
+      "step": 14104
+    },
+    {
+      "epoch": 0.14105,
+      "grad_norm": 1.3464912000778624,
+      "learning_rate": 0.003,
+      "loss": 3.9985,
+      "step": 14105
+    },
+    {
+      "epoch": 0.14106,
+      "grad_norm": 1.0337998967026791,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 14106
+    },
+    {
+      "epoch": 0.14107,
+      "grad_norm": 1.5122607325580328,
+      "learning_rate": 0.003,
+      "loss": 4.0104,
+      "step": 14107
+    },
+    {
+      "epoch": 0.14108,
+      "grad_norm": 1.2956936810229631,
+      "learning_rate": 0.003,
+      "loss": 4.0311,
+      "step": 14108
+    },
+    {
+      "epoch": 0.14109,
+      "grad_norm": 1.3139702931394983,
+      "learning_rate": 0.003,
+      "loss": 4.0322,
+      "step": 14109
+    },
+    {
+      "epoch": 0.1411,
+      "grad_norm": 1.0048453641489445,
+      "learning_rate": 0.003,
+      "loss": 4.0049,
+      "step": 14110
+    },
+    {
+      "epoch": 0.14111,
+      "grad_norm": 1.430901945492829,
+      "learning_rate": 0.003,
+      "loss": 4.0373,
+      "step": 14111
+    },
+    {
+      "epoch": 0.14112,
+      "grad_norm": 1.0367673762169347,
+      "learning_rate": 0.003,
+      "loss": 4.0317,
+      "step": 14112
+    },
+    {
+      "epoch": 0.14113,
+      "grad_norm": 1.2892316542366529,
+      "learning_rate": 0.003,
+      "loss": 4.0024,
+      "step": 14113
+    },
+    {
+      "epoch": 0.14114,
+      "grad_norm": 1.1071539287465246,
+      "learning_rate": 0.003,
+      "loss": 4.0258,
+      "step": 14114
+    },
+    {
+      "epoch": 0.14115,
+      "grad_norm": 1.314574543696847,
+      "learning_rate": 0.003,
+      "loss": 4.0311,
+      "step": 14115
+    },
+    {
+      "epoch": 0.14116,
+      "grad_norm": 1.0897407150869276,
+      "learning_rate": 0.003,
+      "loss": 4.0258,
+      "step": 14116
+    },
+    {
+      "epoch": 0.14117,
+      "grad_norm": 1.1950700957763445,
+      "learning_rate": 0.003,
+      "loss": 4.0118,
+      "step": 14117
+    },
+    {
+      "epoch": 0.14118,
+      "grad_norm": 1.1412532223785143,
+      "learning_rate": 0.003,
+      "loss": 4.0198,
+      "step": 14118
+    },
+    {
+      "epoch": 0.14119,
+      "grad_norm": 1.444097546450158,
+      "learning_rate": 0.003,
+      "loss": 4.0282,
+      "step": 14119
+    },
+    {
+      "epoch": 0.1412,
+      "grad_norm": 1.1772444722146265,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 14120
+    },
+    {
+      "epoch": 0.14121,
+      "grad_norm": 1.2618437643898595,
+      "learning_rate": 0.003,
+      "loss": 4.0198,
+      "step": 14121
+    },
+    {
+      "epoch": 0.14122,
+      "grad_norm": 1.0852284065931241,
+      "learning_rate": 0.003,
+      "loss": 4.0408,
+      "step": 14122
+    },
+    {
+      "epoch": 0.14123,
+      "grad_norm": 1.2536516985760147,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 14123
+    },
+    {
+      "epoch": 0.14124,
+      "grad_norm": 1.1957565476061842,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 14124
+    },
+    {
+      "epoch": 0.14125,
+      "grad_norm": 1.266299712143771,
+      "learning_rate": 0.003,
+      "loss": 4.0013,
+      "step": 14125
+    },
+    {
+      "epoch": 0.14126,
+      "grad_norm": 1.1179825213214207,
+      "learning_rate": 0.003,
+      "loss": 3.9667,
+      "step": 14126
+    },
+    {
+      "epoch": 0.14127,
+      "grad_norm": 1.4270749872421038,
+      "learning_rate": 0.003,
+      "loss": 4.024,
+      "step": 14127
+    },
+    {
+      "epoch": 0.14128,
+      "grad_norm": 1.0854846432711502,
+      "learning_rate": 0.003,
+      "loss": 4.0204,
+      "step": 14128
+    },
+    {
+      "epoch": 0.14129,
+      "grad_norm": 1.2293105496627963,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 14129
+    },
+    {
+      "epoch": 0.1413,
+      "grad_norm": 1.1580430778715787,
+      "learning_rate": 0.003,
+      "loss": 4.0368,
+      "step": 14130
+    },
+    {
+      "epoch": 0.14131,
+      "grad_norm": 1.185949013161452,
+      "learning_rate": 0.003,
+      "loss": 4.0109,
+      "step": 14131
+    },
+    {
+      "epoch": 0.14132,
+      "grad_norm": 1.1314620909176543,
+      "learning_rate": 0.003,
+      "loss": 4.0438,
+      "step": 14132
+    },
+    {
+      "epoch": 0.14133,
+      "grad_norm": 1.2079450778243612,
+      "learning_rate": 0.003,
+      "loss": 3.9866,
+      "step": 14133
+    },
+    {
+      "epoch": 0.14134,
+      "grad_norm": 0.9944973687931271,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 14134
+    },
+    {
+      "epoch": 0.14135,
+      "grad_norm": 1.3767494365485264,
+      "learning_rate": 0.003,
+      "loss": 4.0367,
+      "step": 14135
+    },
+    {
+      "epoch": 0.14136,
+      "grad_norm": 1.0695325114096956,
+      "learning_rate": 0.003,
+      "loss": 4.0008,
+      "step": 14136
+    },
+    {
+      "epoch": 0.14137,
+      "grad_norm": 1.4026688622739356,
+      "learning_rate": 0.003,
+      "loss": 4.0299,
+      "step": 14137
+    },
+    {
+      "epoch": 0.14138,
+      "grad_norm": 1.1898668414843263,
+      "learning_rate": 0.003,
+      "loss": 4.021,
+      "step": 14138
+    },
+    {
+      "epoch": 0.14139,
+      "grad_norm": 1.144411276936079,
+      "learning_rate": 0.003,
+      "loss": 4.0119,
+      "step": 14139
+    },
+    {
+      "epoch": 0.1414,
+      "grad_norm": 1.2266112224678802,
+      "learning_rate": 0.003,
+      "loss": 3.9913,
+      "step": 14140
+    },
+    {
+      "epoch": 0.14141,
+      "grad_norm": 1.1860770803289438,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 14141
+    },
+    {
+      "epoch": 0.14142,
+      "grad_norm": 1.1660925207735835,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 14142
+    },
+    {
+      "epoch": 0.14143,
+      "grad_norm": 1.3955618961272698,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 14143
+    },
+    {
+      "epoch": 0.14144,
+      "grad_norm": 1.1286725867294176,
+      "learning_rate": 0.003,
+      "loss": 3.9984,
+      "step": 14144
+    },
+    {
+      "epoch": 0.14145,
+      "grad_norm": 1.3666449864554138,
+      "learning_rate": 0.003,
+      "loss": 4.0315,
+      "step": 14145
+    },
+    {
+      "epoch": 0.14146,
+      "grad_norm": 1.0899359311217849,
+      "learning_rate": 0.003,
+      "loss": 4.0259,
+      "step": 14146
+    },
+    {
+      "epoch": 0.14147,
+      "grad_norm": 1.3076162503734543,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 14147
+    },
+    {
+      "epoch": 0.14148,
+      "grad_norm": 0.9456838894699942,
+      "learning_rate": 0.003,
+      "loss": 4.0155,
+      "step": 14148
+    },
+    {
+      "epoch": 0.14149,
+      "grad_norm": 1.1458933750085323,
+      "learning_rate": 0.003,
+      "loss": 4.0236,
+      "step": 14149
+    },
+    {
+      "epoch": 0.1415,
+      "grad_norm": 1.3026803164766048,
+      "learning_rate": 0.003,
+      "loss": 4.0445,
+      "step": 14150
+    },
+    {
+      "epoch": 0.14151,
+      "grad_norm": 1.1936052509529032,
+      "learning_rate": 0.003,
+      "loss": 4.0273,
+      "step": 14151
+    },
+    {
+      "epoch": 0.14152,
+      "grad_norm": 1.53961674491896,
+      "learning_rate": 0.003,
+      "loss": 4.0025,
+      "step": 14152
+    },
+    {
+      "epoch": 0.14153,
+      "grad_norm": 1.0355640859393733,
+      "learning_rate": 0.003,
+      "loss": 3.987,
+      "step": 14153
+    },
+    {
+      "epoch": 0.14154,
+      "grad_norm": 1.2626192828203087,
+      "learning_rate": 0.003,
+      "loss": 3.9942,
+      "step": 14154
+    },
+    {
+      "epoch": 0.14155,
+      "grad_norm": 1.364612527361059,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 14155
+    },
+    {
+      "epoch": 0.14156,
+      "grad_norm": 0.9904204490104055,
+      "learning_rate": 0.003,
+      "loss": 3.9983,
+      "step": 14156
+    },
+    {
+      "epoch": 0.14157,
+      "grad_norm": 1.4606046724678607,
+      "learning_rate": 0.003,
+      "loss": 4.0452,
+      "step": 14157
+    },
+    {
+      "epoch": 0.14158,
+      "grad_norm": 1.1412628005227667,
+      "learning_rate": 0.003,
+      "loss": 4.0353,
+      "step": 14158
+    },
+    {
+      "epoch": 0.14159,
+      "grad_norm": 1.2566170966440469,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 14159
+    },
+    {
+      "epoch": 0.1416,
+      "grad_norm": 1.1942427192863938,
+      "learning_rate": 0.003,
+      "loss": 4.0019,
+      "step": 14160
+    },
+    {
+      "epoch": 0.14161,
+      "grad_norm": 1.2747119258046609,
+      "learning_rate": 0.003,
+      "loss": 4.0614,
+      "step": 14161
+    },
+    {
+      "epoch": 0.14162,
+      "grad_norm": 1.0707115414199597,
+      "learning_rate": 0.003,
+      "loss": 4.0014,
+      "step": 14162
+    },
+    {
+      "epoch": 0.14163,
+      "grad_norm": 1.0897594650738252,
+      "learning_rate": 0.003,
+      "loss": 4.0157,
+      "step": 14163
+    },
+    {
+      "epoch": 0.14164,
+      "grad_norm": 1.178469957198732,
+      "learning_rate": 0.003,
+      "loss": 4.0099,
+      "step": 14164
+    },
+    {
+      "epoch": 0.14165,
+      "grad_norm": 1.0783093439173759,
+      "learning_rate": 0.003,
+      "loss": 4.0372,
+      "step": 14165
+    },
+    {
+      "epoch": 0.14166,
+      "grad_norm": 1.3314651178177561,
+      "learning_rate": 0.003,
+      "loss": 4.04,
+      "step": 14166
+    },
+    {
+      "epoch": 0.14167,
+      "grad_norm": 1.0843718324605727,
+      "learning_rate": 0.003,
+      "loss": 3.9924,
+      "step": 14167
+    },
+    {
+      "epoch": 0.14168,
+      "grad_norm": 1.2189229582966636,
+      "learning_rate": 0.003,
+      "loss": 4.0223,
+      "step": 14168
+    },
+    {
+      "epoch": 0.14169,
+      "grad_norm": 1.1528598161429937,
+      "learning_rate": 0.003,
+      "loss": 4.0289,
+      "step": 14169
+    },
+    {
+      "epoch": 0.1417,
+      "grad_norm": 1.360734180095269,
+      "learning_rate": 0.003,
+      "loss": 4.0279,
+      "step": 14170
+    },
+    {
+      "epoch": 0.14171,
+      "grad_norm": 1.0189899619006824,
+      "learning_rate": 0.003,
+      "loss": 4.0245,
+      "step": 14171
+    },
+    {
+      "epoch": 0.14172,
+      "grad_norm": 1.2409046032040727,
+      "learning_rate": 0.003,
+      "loss": 4.0027,
+      "step": 14172
+    },
+    {
+      "epoch": 0.14173,
+      "grad_norm": 1.1134978895708785,
+      "learning_rate": 0.003,
+      "loss": 4.0303,
+      "step": 14173
+    },
+    {
+      "epoch": 0.14174,
+      "grad_norm": 1.428086065445783,
+      "learning_rate": 0.003,
+      "loss": 4.0282,
+      "step": 14174
+    },
+    {
+      "epoch": 0.14175,
+      "grad_norm": 1.293778397312492,
+      "learning_rate": 0.003,
+      "loss": 4.0395,
+      "step": 14175
+    },
+    {
+      "epoch": 0.14176,
+      "grad_norm": 1.2644397446378224,
+      "learning_rate": 0.003,
+      "loss": 4.0112,
+      "step": 14176
+    },
+    {
+      "epoch": 0.14177,
+      "grad_norm": 0.9565849380097393,
+      "learning_rate": 0.003,
+      "loss": 4.0492,
+      "step": 14177
+    },
+    {
+      "epoch": 0.14178,
+      "grad_norm": 1.3059769568691229,
+      "learning_rate": 0.003,
+      "loss": 3.9846,
+      "step": 14178
+    },
+    {
+      "epoch": 0.14179,
+      "grad_norm": 1.0491674393087167,
+      "learning_rate": 0.003,
+      "loss": 4.0246,
+      "step": 14179
+    },
+    {
+      "epoch": 0.1418,
+      "grad_norm": 1.1180949361288677,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 14180
+    },
+    {
+      "epoch": 0.14181,
+      "grad_norm": 1.1157322008472714,
+      "learning_rate": 0.003,
+      "loss": 4.0241,
+      "step": 14181
+    },
+    {
+      "epoch": 0.14182,
+      "grad_norm": 1.3179222987926962,
+      "learning_rate": 0.003,
+      "loss": 4.0149,
+      "step": 14182
+    },
+    {
+      "epoch": 0.14183,
+      "grad_norm": 1.2403172237457192,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 14183
+    },
+    {
+      "epoch": 0.14184,
+      "grad_norm": 1.336720325007302,
+      "learning_rate": 0.003,
+      "loss": 4.0396,
+      "step": 14184
+    },
+    {
+      "epoch": 0.14185,
+      "grad_norm": 1.100527077534773,
+      "learning_rate": 0.003,
+      "loss": 4.0317,
+      "step": 14185
+    },
+    {
+      "epoch": 0.14186,
+      "grad_norm": 1.2033854975669016,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 14186
+    },
+    {
+      "epoch": 0.14187,
+      "grad_norm": 1.2187189628320798,
+      "learning_rate": 0.003,
+      "loss": 4.0267,
+      "step": 14187
+    },
+    {
+      "epoch": 0.14188,
+      "grad_norm": 1.0200176822347935,
+      "learning_rate": 0.003,
+      "loss": 3.9902,
+      "step": 14188
+    },
+    {
+      "epoch": 0.14189,
+      "grad_norm": 1.5073251661283125,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 14189
+    },
+    {
+      "epoch": 0.1419,
+      "grad_norm": 1.0512805060783437,
+      "learning_rate": 0.003,
+      "loss": 4.0196,
+      "step": 14190
+    },
+    {
+      "epoch": 0.14191,
+      "grad_norm": 1.167991755103892,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 14191
+    },
+    {
+      "epoch": 0.14192,
+      "grad_norm": 1.1772584721220622,
+      "learning_rate": 0.003,
+      "loss": 4.0616,
+      "step": 14192
+    },
+    {
+      "epoch": 0.14193,
+      "grad_norm": 1.1976833915329468,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 14193
+    },
+    {
+      "epoch": 0.14194,
+      "grad_norm": 1.190152113141832,
+      "learning_rate": 0.003,
+      "loss": 4.0488,
+      "step": 14194
+    },
+    {
+      "epoch": 0.14195,
+      "grad_norm": 1.1151335344861781,
+      "learning_rate": 0.003,
+      "loss": 4.032,
+      "step": 14195
+    },
+    {
+      "epoch": 0.14196,
+      "grad_norm": 1.2354467070065152,
+      "learning_rate": 0.003,
+      "loss": 4.0207,
+      "step": 14196
+    },
+    {
+      "epoch": 0.14197,
+      "grad_norm": 1.0970972906795948,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 14197
+    },
+    {
+      "epoch": 0.14198,
+      "grad_norm": 1.177775059299803,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 14198
+    },
+    {
+      "epoch": 0.14199,
+      "grad_norm": 1.144446470023133,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 14199
+    },
+    {
+      "epoch": 0.142,
+      "grad_norm": 1.3620451445274342,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 14200
+    },
+    {
+      "epoch": 0.14201,
+      "grad_norm": 1.1392223147475815,
+      "learning_rate": 0.003,
+      "loss": 3.986,
+      "step": 14201
+    },
+    {
+      "epoch": 0.14202,
+      "grad_norm": 1.3754184187622027,
+      "learning_rate": 0.003,
+      "loss": 4.03,
+      "step": 14202
+    },
+    {
+      "epoch": 0.14203,
+      "grad_norm": 1.048321822711271,
+      "learning_rate": 0.003,
+      "loss": 4.0112,
+      "step": 14203
+    },
+    {
+      "epoch": 0.14204,
+      "grad_norm": 1.3892998384218211,
+      "learning_rate": 0.003,
+      "loss": 4.0092,
+      "step": 14204
+    },
+    {
+      "epoch": 0.14205,
+      "grad_norm": 1.051400001412997,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 14205
+    },
+    {
+      "epoch": 0.14206,
+      "grad_norm": 1.325490620508042,
+      "learning_rate": 0.003,
+      "loss": 3.9901,
+      "step": 14206
+    },
+    {
+      "epoch": 0.14207,
+      "grad_norm": 1.13493940310957,
+      "learning_rate": 0.003,
+      "loss": 4.0393,
+      "step": 14207
+    },
+    {
+      "epoch": 0.14208,
+      "grad_norm": 1.367807284383825,
+      "learning_rate": 0.003,
+      "loss": 4.0053,
+      "step": 14208
+    },
+    {
+      "epoch": 0.14209,
+      "grad_norm": 1.1294605234034383,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 14209
+    },
+    {
+      "epoch": 0.1421,
+      "grad_norm": 1.343495245675508,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 14210
+    },
+    {
+      "epoch": 0.14211,
+      "grad_norm": 1.0038877220611284,
+      "learning_rate": 0.003,
+      "loss": 3.9915,
+      "step": 14211
+    },
+    {
+      "epoch": 0.14212,
+      "grad_norm": 1.534988054028867,
+      "learning_rate": 0.003,
+      "loss": 4.0218,
+      "step": 14212
+    },
+    {
+      "epoch": 0.14213,
+      "grad_norm": 1.0070828660250137,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 14213
+    },
+    {
+      "epoch": 0.14214,
+      "grad_norm": 1.5597998232138048,
+      "learning_rate": 0.003,
+      "loss": 4.0263,
+      "step": 14214
+    },
+    {
+      "epoch": 0.14215,
+      "grad_norm": 1.0651385458745226,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 14215
+    },
+    {
+      "epoch": 0.14216,
+      "grad_norm": 1.3348080877703172,
+      "learning_rate": 0.003,
+      "loss": 4.0244,
+      "step": 14216
+    },
+    {
+      "epoch": 0.14217,
+      "grad_norm": 0.9499056849838106,
+      "learning_rate": 0.003,
+      "loss": 4.0225,
+      "step": 14217
+    },
+    {
+      "epoch": 0.14218,
+      "grad_norm": 1.1418179918085627,
+      "learning_rate": 0.003,
+      "loss": 4.0174,
+      "step": 14218
+    },
+    {
+      "epoch": 0.14219,
+      "grad_norm": 1.3743036275552343,
+      "learning_rate": 0.003,
+      "loss": 4.016,
+      "step": 14219
+    },
+    {
+      "epoch": 0.1422,
+      "grad_norm": 1.0214575397901615,
+      "learning_rate": 0.003,
+      "loss": 3.999,
+      "step": 14220
+    },
+    {
+      "epoch": 0.14221,
+      "grad_norm": 1.5581151608740837,
+      "learning_rate": 0.003,
+      "loss": 4.015,
+      "step": 14221
+    },
+    {
+      "epoch": 0.14222,
+      "grad_norm": 1.027127027705112,
+      "learning_rate": 0.003,
+      "loss": 4.0521,
+      "step": 14222
+    },
+    {
+      "epoch": 0.14223,
+      "grad_norm": 1.4981227018638756,
+      "learning_rate": 0.003,
+      "loss": 4.0309,
+      "step": 14223
+    },
+    {
+      "epoch": 0.14224,
+      "grad_norm": 1.008107220993946,
+      "learning_rate": 0.003,
+      "loss": 3.9814,
+      "step": 14224
+    },
+    {
+      "epoch": 0.14225,
+      "grad_norm": 1.3732868322500928,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 14225
+    },
+    {
+      "epoch": 0.14226,
+      "grad_norm": 1.0437936957982215,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 14226
+    },
+    {
+      "epoch": 0.14227,
+      "grad_norm": 1.2731748595843275,
+      "learning_rate": 0.003,
+      "loss": 4.025,
+      "step": 14227
+    },
+    {
+      "epoch": 0.14228,
+      "grad_norm": 1.3649598712722133,
+      "learning_rate": 0.003,
+      "loss": 4.016,
+      "step": 14228
+    },
+    {
+      "epoch": 0.14229,
+      "grad_norm": 1.1752436368583978,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 14229
+    },
+    {
+      "epoch": 0.1423,
+      "grad_norm": 1.2731387940688244,
+      "learning_rate": 0.003,
+      "loss": 4.004,
+      "step": 14230
+    },
+    {
+      "epoch": 0.14231,
+      "grad_norm": 0.9888217773791853,
+      "learning_rate": 0.003,
+      "loss": 4.0371,
+      "step": 14231
+    },
+    {
+      "epoch": 0.14232,
+      "grad_norm": 1.2419409956986254,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 14232
+    },
+    {
+      "epoch": 0.14233,
+      "grad_norm": 1.1208665504365474,
+      "learning_rate": 0.003,
+      "loss": 4.0515,
+      "step": 14233
+    },
+    {
+      "epoch": 0.14234,
+      "grad_norm": 1.3934997178845823,
+      "learning_rate": 0.003,
+      "loss": 4.0099,
+      "step": 14234
+    },
+    {
+      "epoch": 0.14235,
+      "grad_norm": 0.9939585794166588,
+      "learning_rate": 0.003,
+      "loss": 4.0144,
+      "step": 14235
+    },
+    {
+      "epoch": 0.14236,
+      "grad_norm": 1.466049674388318,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 14236
+    },
+    {
+      "epoch": 0.14237,
+      "grad_norm": 1.1516383833090853,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 14237
+    },
+    {
+      "epoch": 0.14238,
+      "grad_norm": 1.1905120240269704,
+      "learning_rate": 0.003,
+      "loss": 4.0362,
+      "step": 14238
+    },
+    {
+      "epoch": 0.14239,
+      "grad_norm": 1.320357055484459,
+      "learning_rate": 0.003,
+      "loss": 4.0579,
+      "step": 14239
+    },
+    {
+      "epoch": 0.1424,
+      "grad_norm": 1.1739371270086971,
+      "learning_rate": 0.003,
+      "loss": 4.0252,
+      "step": 14240
+    },
+    {
+      "epoch": 0.14241,
+      "grad_norm": 1.1953821818276693,
+      "learning_rate": 0.003,
+      "loss": 4.0458,
+      "step": 14241
+    },
+    {
+      "epoch": 0.14242,
+      "grad_norm": 1.049876836420036,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 14242
+    },
+    {
+      "epoch": 0.14243,
+      "grad_norm": 1.0654099565056714,
+      "learning_rate": 0.003,
+      "loss": 4.0141,
+      "step": 14243
+    },
+    {
+      "epoch": 0.14244,
+      "grad_norm": 1.3029073713304329,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 14244
+    },
+    {
+      "epoch": 0.14245,
+      "grad_norm": 1.256556933511795,
+      "learning_rate": 0.003,
+      "loss": 4.0239,
+      "step": 14245
+    },
+    {
+      "epoch": 0.14246,
+      "grad_norm": 1.2772979759682255,
+      "learning_rate": 0.003,
+      "loss": 4.0315,
+      "step": 14246
+    },
+    {
+      "epoch": 0.14247,
+      "grad_norm": 1.2552451182459947,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 14247
+    },
+    {
+      "epoch": 0.14248,
+      "grad_norm": 0.9939790102981942,
+      "learning_rate": 0.003,
+      "loss": 4.0181,
+      "step": 14248
+    },
+    {
+      "epoch": 0.14249,
+      "grad_norm": 1.3178538598529268,
+      "learning_rate": 0.003,
+      "loss": 3.977,
+      "step": 14249
+    },
+    {
+      "epoch": 0.1425,
+      "grad_norm": 1.1601483906677303,
+      "learning_rate": 0.003,
+      "loss": 3.9886,
+      "step": 14250
+    },
+    {
+      "epoch": 0.14251,
+      "grad_norm": 1.221185680742918,
+      "learning_rate": 0.003,
+      "loss": 4.0171,
+      "step": 14251
+    },
+    {
+      "epoch": 0.14252,
+      "grad_norm": 1.2474809124796564,
+      "learning_rate": 0.003,
+      "loss": 4.0252,
+      "step": 14252
+    },
+    {
+      "epoch": 0.14253,
+      "grad_norm": 1.1992344745787196,
+      "learning_rate": 0.003,
+      "loss": 4.0459,
+      "step": 14253
+    },
+    {
+      "epoch": 0.14254,
+      "grad_norm": 0.9132225327909732,
+      "learning_rate": 0.003,
+      "loss": 4.0007,
+      "step": 14254
+    },
+    {
+      "epoch": 0.14255,
+      "grad_norm": 1.1811406392830759,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 14255
+    },
+    {
+      "epoch": 0.14256,
+      "grad_norm": 1.3453499901407837,
+      "learning_rate": 0.003,
+      "loss": 4.0051,
+      "step": 14256
+    },
+    {
+      "epoch": 0.14257,
+      "grad_norm": 1.1500286543886105,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 14257
+    },
+    {
+      "epoch": 0.14258,
+      "grad_norm": 1.4664633371094062,
+      "learning_rate": 0.003,
+      "loss": 4.0081,
+      "step": 14258
+    },
+    {
+      "epoch": 0.14259,
+      "grad_norm": 1.119114866847188,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 14259
+    },
+    {
+      "epoch": 0.1426,
+      "grad_norm": 1.3478008305817595,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 14260
+    },
+    {
+      "epoch": 0.14261,
+      "grad_norm": 0.9832585037994765,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 14261
+    },
+    {
+      "epoch": 0.14262,
+      "grad_norm": 1.1548328309543094,
+      "learning_rate": 0.003,
+      "loss": 3.99,
+      "step": 14262
+    },
+    {
+      "epoch": 0.14263,
+      "grad_norm": 1.1540288232997642,
+      "learning_rate": 0.003,
+      "loss": 4.0228,
+      "step": 14263
+    },
+    {
+      "epoch": 0.14264,
+      "grad_norm": 1.186729047593634,
+      "learning_rate": 0.003,
+      "loss": 4.0082,
+      "step": 14264
+    },
+    {
+      "epoch": 0.14265,
+      "grad_norm": 1.1810656564347743,
+      "learning_rate": 0.003,
+      "loss": 4.035,
+      "step": 14265
+    },
+    {
+      "epoch": 0.14266,
+      "grad_norm": 1.4770408871743086,
+      "learning_rate": 0.003,
+      "loss": 4.0403,
+      "step": 14266
+    },
+    {
+      "epoch": 0.14267,
+      "grad_norm": 0.9938931214161333,
+      "learning_rate": 0.003,
+      "loss": 4.0327,
+      "step": 14267
+    },
+    {
+      "epoch": 0.14268,
+      "grad_norm": 1.3149471709837792,
+      "learning_rate": 0.003,
+      "loss": 3.9963,
+      "step": 14268
+    },
+    {
+      "epoch": 0.14269,
+      "grad_norm": 1.2328045329778488,
+      "learning_rate": 0.003,
+      "loss": 4.0433,
+      "step": 14269
+    },
+    {
+      "epoch": 0.1427,
+      "grad_norm": 1.1250321693454324,
+      "learning_rate": 0.003,
+      "loss": 4.0288,
+      "step": 14270
+    },
+    {
+      "epoch": 0.14271,
+      "grad_norm": 0.9889732485401035,
+      "learning_rate": 0.003,
+      "loss": 4.0093,
+      "step": 14271
+    },
+    {
+      "epoch": 0.14272,
+      "grad_norm": 1.1476898981247932,
+      "learning_rate": 0.003,
+      "loss": 4.0347,
+      "step": 14272
+    },
+    {
+      "epoch": 0.14273,
+      "grad_norm": 1.1025789465267635,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 14273
+    },
+    {
+      "epoch": 0.14274,
+      "grad_norm": 1.122764833640579,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 14274
+    },
+    {
+      "epoch": 0.14275,
+      "grad_norm": 1.147038214357953,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 14275
+    },
+    {
+      "epoch": 0.14276,
+      "grad_norm": 1.2162589652144875,
+      "learning_rate": 0.003,
+      "loss": 4.0286,
+      "step": 14276
+    },
+    {
+      "epoch": 0.14277,
+      "grad_norm": 1.0566978742450486,
+      "learning_rate": 0.003,
+      "loss": 4.0329,
+      "step": 14277
+    },
+    {
+      "epoch": 0.14278,
+      "grad_norm": 1.7169108876688428,
+      "learning_rate": 0.003,
+      "loss": 4.0099,
+      "step": 14278
+    },
+    {
+      "epoch": 0.14279,
+      "grad_norm": 0.876042071141214,
+      "learning_rate": 0.003,
+      "loss": 4.0468,
+      "step": 14279
+    },
+    {
+      "epoch": 0.1428,
+      "grad_norm": 1.1084611799228745,
+      "learning_rate": 0.003,
+      "loss": 4.0415,
+      "step": 14280
+    },
+    {
+      "epoch": 0.14281,
+      "grad_norm": 1.2709943034324318,
+      "learning_rate": 0.003,
+      "loss": 4.028,
+      "step": 14281
+    },
+    {
+      "epoch": 0.14282,
+      "grad_norm": 1.3812282195884515,
+      "learning_rate": 0.003,
+      "loss": 4.0306,
+      "step": 14282
+    },
+    {
+      "epoch": 0.14283,
+      "grad_norm": 1.1566429269433423,
+      "learning_rate": 0.003,
+      "loss": 3.9993,
+      "step": 14283
+    },
+    {
+      "epoch": 0.14284,
+      "grad_norm": 1.2835084133322305,
+      "learning_rate": 0.003,
+      "loss": 3.9953,
+      "step": 14284
+    },
+    {
+      "epoch": 0.14285,
+      "grad_norm": 1.055340094724688,
+      "learning_rate": 0.003,
+      "loss": 4.0031,
+      "step": 14285
+    },
+    {
+      "epoch": 0.14286,
+      "grad_norm": 1.4105259998316546,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 14286
+    },
+    {
+      "epoch": 0.14287,
+      "grad_norm": 1.2550811667078823,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 14287
+    },
+    {
+      "epoch": 0.14288,
+      "grad_norm": 1.1132410163567108,
+      "learning_rate": 0.003,
+      "loss": 4.0049,
+      "step": 14288
+    },
+    {
+      "epoch": 0.14289,
+      "grad_norm": 1.501973582017856,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 14289
+    },
+    {
+      "epoch": 0.1429,
+      "grad_norm": 0.9719422279166505,
+      "learning_rate": 0.003,
+      "loss": 3.9861,
+      "step": 14290
+    },
+    {
+      "epoch": 0.14291,
+      "grad_norm": 1.3825012805536943,
+      "learning_rate": 0.003,
+      "loss": 4.0284,
+      "step": 14291
+    },
+    {
+      "epoch": 0.14292,
+      "grad_norm": 1.1099027929597218,
+      "learning_rate": 0.003,
+      "loss": 4.0228,
+      "step": 14292
+    },
+    {
+      "epoch": 0.14293,
+      "grad_norm": 1.3461577718551276,
+      "learning_rate": 0.003,
+      "loss": 3.9993,
+      "step": 14293
+    },
+    {
+      "epoch": 0.14294,
+      "grad_norm": 1.24973576329806,
+      "learning_rate": 0.003,
+      "loss": 4.0488,
+      "step": 14294
+    },
+    {
+      "epoch": 0.14295,
+      "grad_norm": 1.4312573908271133,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 14295
+    },
+    {
+      "epoch": 0.14296,
+      "grad_norm": 0.9116815765076999,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 14296
+    },
+    {
+      "epoch": 0.14297,
+      "grad_norm": 1.1882497269250194,
+      "learning_rate": 0.003,
+      "loss": 4.0379,
+      "step": 14297
+    },
+    {
+      "epoch": 0.14298,
+      "grad_norm": 1.243886949272355,
+      "learning_rate": 0.003,
+      "loss": 4.0031,
+      "step": 14298
+    },
+    {
+      "epoch": 0.14299,
+      "grad_norm": 1.3152173645237428,
+      "learning_rate": 0.003,
+      "loss": 4.0273,
+      "step": 14299
+    },
+    {
+      "epoch": 0.143,
+      "grad_norm": 1.0919137431492918,
+      "learning_rate": 0.003,
+      "loss": 4.0388,
+      "step": 14300
+    },
+    {
+      "epoch": 0.14301,
+      "grad_norm": 1.3685064657521364,
+      "learning_rate": 0.003,
+      "loss": 4.0337,
+      "step": 14301
+    },
+    {
+      "epoch": 0.14302,
+      "grad_norm": 1.0110048864429841,
+      "learning_rate": 0.003,
+      "loss": 3.9804,
+      "step": 14302
+    },
+    {
+      "epoch": 0.14303,
+      "grad_norm": 1.4776803140572186,
+      "learning_rate": 0.003,
+      "loss": 4.0282,
+      "step": 14303
+    },
+    {
+      "epoch": 0.14304,
+      "grad_norm": 1.0986579942741568,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 14304
+    },
+    {
+      "epoch": 0.14305,
+      "grad_norm": 1.1158216374588756,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 14305
+    },
+    {
+      "epoch": 0.14306,
+      "grad_norm": 1.2946086944401998,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 14306
+    },
+    {
+      "epoch": 0.14307,
+      "grad_norm": 1.1641566971726678,
+      "learning_rate": 0.003,
+      "loss": 4.0328,
+      "step": 14307
+    },
+    {
+      "epoch": 0.14308,
+      "grad_norm": 1.208708792057949,
+      "learning_rate": 0.003,
+      "loss": 4.0411,
+      "step": 14308
+    },
+    {
+      "epoch": 0.14309,
+      "grad_norm": 1.2861984661320631,
+      "learning_rate": 0.003,
+      "loss": 4.0497,
+      "step": 14309
+    },
+    {
+      "epoch": 0.1431,
+      "grad_norm": 1.1096737366335085,
+      "learning_rate": 0.003,
+      "loss": 4.0037,
+      "step": 14310
+    },
+    {
+      "epoch": 0.14311,
+      "grad_norm": 1.332880293436986,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 14311
+    },
+    {
+      "epoch": 0.14312,
+      "grad_norm": 1.1899327349214601,
+      "learning_rate": 0.003,
+      "loss": 4.0025,
+      "step": 14312
+    },
+    {
+      "epoch": 0.14313,
+      "grad_norm": 1.585214015131889,
+      "learning_rate": 0.003,
+      "loss": 4.0302,
+      "step": 14313
+    },
+    {
+      "epoch": 0.14314,
+      "grad_norm": 0.7871645445442891,
+      "learning_rate": 0.003,
+      "loss": 4.0033,
+      "step": 14314
+    },
+    {
+      "epoch": 0.14315,
+      "grad_norm": 1.0506442944194525,
+      "learning_rate": 0.003,
+      "loss": 4.0162,
+      "step": 14315
+    },
+    {
+      "epoch": 0.14316,
+      "grad_norm": 1.524745255403895,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 14316
+    },
+    {
+      "epoch": 0.14317,
+      "grad_norm": 1.074288517670019,
+      "learning_rate": 0.003,
+      "loss": 4.016,
+      "step": 14317
+    },
+    {
+      "epoch": 0.14318,
+      "grad_norm": 1.5176426081616132,
+      "learning_rate": 0.003,
+      "loss": 4.0328,
+      "step": 14318
+    },
+    {
+      "epoch": 0.14319,
+      "grad_norm": 1.004949697833761,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 14319
+    },
+    {
+      "epoch": 0.1432,
+      "grad_norm": 1.3267022711433671,
+      "learning_rate": 0.003,
+      "loss": 4.0333,
+      "step": 14320
+    },
+    {
+      "epoch": 0.14321,
+      "grad_norm": 1.0043666421766428,
+      "learning_rate": 0.003,
+      "loss": 3.9944,
+      "step": 14321
+    },
+    {
+      "epoch": 0.14322,
+      "grad_norm": 1.23646610606199,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 14322
+    },
+    {
+      "epoch": 0.14323,
+      "grad_norm": 1.2695267039105103,
+      "learning_rate": 0.003,
+      "loss": 3.9969,
+      "step": 14323
+    },
+    {
+      "epoch": 0.14324,
+      "grad_norm": 1.091829883598905,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 14324
+    },
+    {
+      "epoch": 0.14325,
+      "grad_norm": 1.3614936043265236,
+      "learning_rate": 0.003,
+      "loss": 3.9926,
+      "step": 14325
+    },
+    {
+      "epoch": 0.14326,
+      "grad_norm": 1.1215395898553848,
+      "learning_rate": 0.003,
+      "loss": 4.0375,
+      "step": 14326
+    },
+    {
+      "epoch": 0.14327,
+      "grad_norm": 1.2154310116743614,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 14327
+    },
+    {
+      "epoch": 0.14328,
+      "grad_norm": 1.1889107742210372,
+      "learning_rate": 0.003,
+      "loss": 4.0591,
+      "step": 14328
+    },
+    {
+      "epoch": 0.14329,
+      "grad_norm": 1.1507054007861035,
+      "learning_rate": 0.003,
+      "loss": 4.0074,
+      "step": 14329
+    },
+    {
+      "epoch": 0.1433,
+      "grad_norm": 1.330111633581526,
+      "learning_rate": 0.003,
+      "loss": 4.0332,
+      "step": 14330
+    },
+    {
+      "epoch": 0.14331,
+      "grad_norm": 1.0768442491212724,
+      "learning_rate": 0.003,
+      "loss": 4.0011,
+      "step": 14331
+    },
+    {
+      "epoch": 0.14332,
+      "grad_norm": 1.239911147068525,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 14332
+    },
+    {
+      "epoch": 0.14333,
+      "grad_norm": 1.323571559451598,
+      "learning_rate": 0.003,
+      "loss": 4.0081,
+      "step": 14333
+    },
+    {
+      "epoch": 0.14334,
+      "grad_norm": 1.1376701926449524,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 14334
+    },
+    {
+      "epoch": 0.14335,
+      "grad_norm": 1.3340901235718514,
+      "learning_rate": 0.003,
+      "loss": 4.0021,
+      "step": 14335
+    },
+    {
+      "epoch": 0.14336,
+      "grad_norm": 1.2686228303701885,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 14336
+    },
+    {
+      "epoch": 0.14337,
+      "grad_norm": 1.0467394749911172,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 14337
+    },
+    {
+      "epoch": 0.14338,
+      "grad_norm": 1.216880189998311,
+      "learning_rate": 0.003,
+      "loss": 4.0047,
+      "step": 14338
+    },
+    {
+      "epoch": 0.14339,
+      "grad_norm": 1.1505835170743297,
+      "learning_rate": 0.003,
+      "loss": 4.0309,
+      "step": 14339
+    },
+    {
+      "epoch": 0.1434,
+      "grad_norm": 1.1715423738629298,
+      "learning_rate": 0.003,
+      "loss": 3.9986,
+      "step": 14340
+    },
+    {
+      "epoch": 0.14341,
+      "grad_norm": 1.3591295789133988,
+      "learning_rate": 0.003,
+      "loss": 4.0417,
+      "step": 14341
+    },
+    {
+      "epoch": 0.14342,
+      "grad_norm": 1.0802743236414356,
+      "learning_rate": 0.003,
+      "loss": 4.0118,
+      "step": 14342
+    },
+    {
+      "epoch": 0.14343,
+      "grad_norm": 1.2314655572568123,
+      "learning_rate": 0.003,
+      "loss": 4.0569,
+      "step": 14343
+    },
+    {
+      "epoch": 0.14344,
+      "grad_norm": 1.2170050499547125,
+      "learning_rate": 0.003,
+      "loss": 4.0571,
+      "step": 14344
+    },
+    {
+      "epoch": 0.14345,
+      "grad_norm": 1.1764760173694266,
+      "learning_rate": 0.003,
+      "loss": 4.0163,
+      "step": 14345
+    },
+    {
+      "epoch": 0.14346,
+      "grad_norm": 1.4899350031793006,
+      "learning_rate": 0.003,
+      "loss": 4.025,
+      "step": 14346
+    },
+    {
+      "epoch": 0.14347,
+      "grad_norm": 1.0758576003187106,
+      "learning_rate": 0.003,
+      "loss": 4.0339,
+      "step": 14347
+    },
+    {
+      "epoch": 0.14348,
+      "grad_norm": 1.3635564326619183,
+      "learning_rate": 0.003,
+      "loss": 4.0411,
+      "step": 14348
+    },
+    {
+      "epoch": 0.14349,
+      "grad_norm": 1.2235786275907428,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 14349
+    },
+    {
+      "epoch": 0.1435,
+      "grad_norm": 1.2436695980903918,
+      "learning_rate": 0.003,
+      "loss": 4.0123,
+      "step": 14350
+    },
+    {
+      "epoch": 0.14351,
+      "grad_norm": 1.210672505276866,
+      "learning_rate": 0.003,
+      "loss": 4.0228,
+      "step": 14351
+    },
+    {
+      "epoch": 0.14352,
+      "grad_norm": 1.1545045673205454,
+      "learning_rate": 0.003,
+      "loss": 4.0119,
+      "step": 14352
+    },
+    {
+      "epoch": 0.14353,
+      "grad_norm": 1.3725191826934457,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 14353
+    },
+    {
+      "epoch": 0.14354,
+      "grad_norm": 1.1897111683673434,
+      "learning_rate": 0.003,
+      "loss": 3.9837,
+      "step": 14354
+    },
+    {
+      "epoch": 0.14355,
+      "grad_norm": 1.5096476297328265,
+      "learning_rate": 0.003,
+      "loss": 4.0035,
+      "step": 14355
+    },
+    {
+      "epoch": 0.14356,
+      "grad_norm": 0.8717003987565817,
+      "learning_rate": 0.003,
+      "loss": 4.0077,
+      "step": 14356
+    },
+    {
+      "epoch": 0.14357,
+      "grad_norm": 1.0792518544905656,
+      "learning_rate": 0.003,
+      "loss": 3.9955,
+      "step": 14357
+    },
+    {
+      "epoch": 0.14358,
+      "grad_norm": 1.351721600532738,
+      "learning_rate": 0.003,
+      "loss": 3.9955,
+      "step": 14358
+    },
+    {
+      "epoch": 0.14359,
+      "grad_norm": 0.996693627500812,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 14359
+    },
+    {
+      "epoch": 0.1436,
+      "grad_norm": 1.2646311347446006,
+      "learning_rate": 0.003,
+      "loss": 4.0053,
+      "step": 14360
+    },
+    {
+      "epoch": 0.14361,
+      "grad_norm": 1.0228518544107583,
+      "learning_rate": 0.003,
+      "loss": 3.9822,
+      "step": 14361
+    },
+    {
+      "epoch": 0.14362,
+      "grad_norm": 1.256227677652288,
+      "learning_rate": 0.003,
+      "loss": 4.0567,
+      "step": 14362
+    },
+    {
+      "epoch": 0.14363,
+      "grad_norm": 1.166195059307722,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 14363
+    },
+    {
+      "epoch": 0.14364,
+      "grad_norm": 1.172896560370709,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 14364
+    },
+    {
+      "epoch": 0.14365,
+      "grad_norm": 1.2194603487156364,
+      "learning_rate": 0.003,
+      "loss": 4.0154,
+      "step": 14365
+    },
+    {
+      "epoch": 0.14366,
+      "grad_norm": 1.1945393859702242,
+      "learning_rate": 0.003,
+      "loss": 4.0228,
+      "step": 14366
+    },
+    {
+      "epoch": 0.14367,
+      "grad_norm": 1.1653822712601563,
+      "learning_rate": 0.003,
+      "loss": 3.9969,
+      "step": 14367
+    },
+    {
+      "epoch": 0.14368,
+      "grad_norm": 1.2726517892617004,
+      "learning_rate": 0.003,
+      "loss": 4.0151,
+      "step": 14368
+    },
+    {
+      "epoch": 0.14369,
+      "grad_norm": 1.244704823359052,
+      "learning_rate": 0.003,
+      "loss": 4.0078,
+      "step": 14369
+    },
+    {
+      "epoch": 0.1437,
+      "grad_norm": 1.2172796802879724,
+      "learning_rate": 0.003,
+      "loss": 3.9946,
+      "step": 14370
+    },
+    {
+      "epoch": 0.14371,
+      "grad_norm": 1.2603191200469879,
+      "learning_rate": 0.003,
+      "loss": 4.0132,
+      "step": 14371
+    },
+    {
+      "epoch": 0.14372,
+      "grad_norm": 1.0718879428269645,
+      "learning_rate": 0.003,
+      "loss": 4.0085,
+      "step": 14372
+    },
+    {
+      "epoch": 0.14373,
+      "grad_norm": 1.3330073165146854,
+      "learning_rate": 0.003,
+      "loss": 4.0331,
+      "step": 14373
+    },
+    {
+      "epoch": 0.14374,
+      "grad_norm": 1.2012912275507692,
+      "learning_rate": 0.003,
+      "loss": 4.0216,
+      "step": 14374
+    },
+    {
+      "epoch": 0.14375,
+      "grad_norm": 1.1908696738600753,
+      "learning_rate": 0.003,
+      "loss": 4.0339,
+      "step": 14375
+    },
+    {
+      "epoch": 0.14376,
+      "grad_norm": 1.3507919992492639,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 14376
+    },
+    {
+      "epoch": 0.14377,
+      "grad_norm": 1.1733220329176508,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 14377
+    },
+    {
+      "epoch": 0.14378,
+      "grad_norm": 1.363297551963369,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 14378
+    },
+    {
+      "epoch": 0.14379,
+      "grad_norm": 1.0313070601521575,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 14379
+    },
+    {
+      "epoch": 0.1438,
+      "grad_norm": 1.3397131378092488,
+      "learning_rate": 0.003,
+      "loss": 4.0123,
+      "step": 14380
+    },
+    {
+      "epoch": 0.14381,
+      "grad_norm": 1.0121858969636834,
+      "learning_rate": 0.003,
+      "loss": 3.997,
+      "step": 14381
+    },
+    {
+      "epoch": 0.14382,
+      "grad_norm": 1.390088716518865,
+      "learning_rate": 0.003,
+      "loss": 4.0099,
+      "step": 14382
+    },
+    {
+      "epoch": 0.14383,
+      "grad_norm": 1.1129271392471018,
+      "learning_rate": 0.003,
+      "loss": 4.02,
+      "step": 14383
+    },
+    {
+      "epoch": 0.14384,
+      "grad_norm": 1.2375419522985815,
+      "learning_rate": 0.003,
+      "loss": 4.0128,
+      "step": 14384
+    },
+    {
+      "epoch": 0.14385,
+      "grad_norm": 1.3673457599885632,
+      "learning_rate": 0.003,
+      "loss": 4.0306,
+      "step": 14385
+    },
+    {
+      "epoch": 0.14386,
+      "grad_norm": 0.9916084434443762,
+      "learning_rate": 0.003,
+      "loss": 4.0169,
+      "step": 14386
+    },
+    {
+      "epoch": 0.14387,
+      "grad_norm": 1.4002355526911558,
+      "learning_rate": 0.003,
+      "loss": 3.9976,
+      "step": 14387
+    },
+    {
+      "epoch": 0.14388,
+      "grad_norm": 1.1109558133777058,
+      "learning_rate": 0.003,
+      "loss": 4.0198,
+      "step": 14388
+    },
+    {
+      "epoch": 0.14389,
+      "grad_norm": 1.3626692795113289,
+      "learning_rate": 0.003,
+      "loss": 4.0451,
+      "step": 14389
+    },
+    {
+      "epoch": 0.1439,
+      "grad_norm": 1.2460630032368645,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 14390
+    },
+    {
+      "epoch": 0.14391,
+      "grad_norm": 1.2243928195308638,
+      "learning_rate": 0.003,
+      "loss": 4.0081,
+      "step": 14391
+    },
+    {
+      "epoch": 0.14392,
+      "grad_norm": 1.1784102432358567,
+      "learning_rate": 0.003,
+      "loss": 4.0126,
+      "step": 14392
+    },
+    {
+      "epoch": 0.14393,
+      "grad_norm": 1.4939716515347823,
+      "learning_rate": 0.003,
+      "loss": 3.9962,
+      "step": 14393
+    },
+    {
+      "epoch": 0.14394,
+      "grad_norm": 1.1498274365748753,
+      "learning_rate": 0.003,
+      "loss": 4.0198,
+      "step": 14394
+    },
+    {
+      "epoch": 0.14395,
+      "grad_norm": 1.200810908936847,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 14395
+    },
+    {
+      "epoch": 0.14396,
+      "grad_norm": 1.0745855059534282,
+      "learning_rate": 0.003,
+      "loss": 4.0229,
+      "step": 14396
+    },
+    {
+      "epoch": 0.14397,
+      "grad_norm": 1.277116387315723,
+      "learning_rate": 0.003,
+      "loss": 3.9976,
+      "step": 14397
+    },
+    {
+      "epoch": 0.14398,
+      "grad_norm": 0.9419150034212755,
+      "learning_rate": 0.003,
+      "loss": 3.9998,
+      "step": 14398
+    },
+    {
+      "epoch": 0.14399,
+      "grad_norm": 1.3295125544631583,
+      "learning_rate": 0.003,
+      "loss": 3.999,
+      "step": 14399
+    },
+    {
+      "epoch": 0.144,
+      "grad_norm": 1.1989912070727897,
+      "learning_rate": 0.003,
+      "loss": 4.0174,
+      "step": 14400
+    },
+    {
+      "epoch": 0.14401,
+      "grad_norm": 1.3189184717411828,
+      "learning_rate": 0.003,
+      "loss": 4.0016,
+      "step": 14401
+    },
+    {
+      "epoch": 0.14402,
+      "grad_norm": 1.0816437140157895,
+      "learning_rate": 0.003,
+      "loss": 4.0316,
+      "step": 14402
+    },
+    {
+      "epoch": 0.14403,
+      "grad_norm": 1.4360315372365975,
+      "learning_rate": 0.003,
+      "loss": 4.0369,
+      "step": 14403
+    },
+    {
+      "epoch": 0.14404,
+      "grad_norm": 1.2193549764527278,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 14404
+    },
+    {
+      "epoch": 0.14405,
+      "grad_norm": 1.0823921330138107,
+      "learning_rate": 0.003,
+      "loss": 4.0281,
+      "step": 14405
+    },
+    {
+      "epoch": 0.14406,
+      "grad_norm": 1.1963637207195172,
+      "learning_rate": 0.003,
+      "loss": 4.0388,
+      "step": 14406
+    },
+    {
+      "epoch": 0.14407,
+      "grad_norm": 1.297169350319569,
+      "learning_rate": 0.003,
+      "loss": 4.0292,
+      "step": 14407
+    },
+    {
+      "epoch": 0.14408,
+      "grad_norm": 1.275499530630152,
+      "learning_rate": 0.003,
+      "loss": 4.0456,
+      "step": 14408
+    },
+    {
+      "epoch": 0.14409,
+      "grad_norm": 1.228439110366136,
+      "learning_rate": 0.003,
+      "loss": 3.9945,
+      "step": 14409
+    },
+    {
+      "epoch": 0.1441,
+      "grad_norm": 1.5688924175322991,
+      "learning_rate": 0.003,
+      "loss": 3.9948,
+      "step": 14410
+    },
+    {
+      "epoch": 0.14411,
+      "grad_norm": 1.0120584218587096,
+      "learning_rate": 0.003,
+      "loss": 4.002,
+      "step": 14411
+    },
+    {
+      "epoch": 0.14412,
+      "grad_norm": 1.3011317791543677,
+      "learning_rate": 0.003,
+      "loss": 4.0018,
+      "step": 14412
+    },
+    {
+      "epoch": 0.14413,
+      "grad_norm": 1.0668175565997582,
+      "learning_rate": 0.003,
+      "loss": 4.0279,
+      "step": 14413
+    },
+    {
+      "epoch": 0.14414,
+      "grad_norm": 1.5931538460060055,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 14414
+    },
+    {
+      "epoch": 0.14415,
+      "grad_norm": 1.078989729152812,
+      "learning_rate": 0.003,
+      "loss": 3.9768,
+      "step": 14415
+    },
+    {
+      "epoch": 0.14416,
+      "grad_norm": 1.1701151551911053,
+      "learning_rate": 0.003,
+      "loss": 3.99,
+      "step": 14416
+    },
+    {
+      "epoch": 0.14417,
+      "grad_norm": 1.0128987161588474,
+      "learning_rate": 0.003,
+      "loss": 3.9967,
+      "step": 14417
+    },
+    {
+      "epoch": 0.14418,
+      "grad_norm": 1.3226158471972884,
+      "learning_rate": 0.003,
+      "loss": 4.0251,
+      "step": 14418
+    },
+    {
+      "epoch": 0.14419,
+      "grad_norm": 1.0391012761182818,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 14419
+    },
+    {
+      "epoch": 0.1442,
+      "grad_norm": 1.2995035977172058,
+      "learning_rate": 0.003,
+      "loss": 4.0364,
+      "step": 14420
+    },
+    {
+      "epoch": 0.14421,
+      "grad_norm": 1.1675231453686032,
+      "learning_rate": 0.003,
+      "loss": 4.0276,
+      "step": 14421
+    },
+    {
+      "epoch": 0.14422,
+      "grad_norm": 1.1990662827938074,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 14422
+    },
+    {
+      "epoch": 0.14423,
+      "grad_norm": 1.3341843292074833,
+      "learning_rate": 0.003,
+      "loss": 4.0236,
+      "step": 14423
+    },
+    {
+      "epoch": 0.14424,
+      "grad_norm": 1.255184302280917,
+      "learning_rate": 0.003,
+      "loss": 3.9859,
+      "step": 14424
+    },
+    {
+      "epoch": 0.14425,
+      "grad_norm": 1.213102381507609,
+      "learning_rate": 0.003,
+      "loss": 3.992,
+      "step": 14425
+    },
+    {
+      "epoch": 0.14426,
+      "grad_norm": 1.460646676879946,
+      "learning_rate": 0.003,
+      "loss": 4.0255,
+      "step": 14426
+    },
+    {
+      "epoch": 0.14427,
+      "grad_norm": 1.0654397567379355,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 14427
+    },
+    {
+      "epoch": 0.14428,
+      "grad_norm": 1.4763855575417097,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 14428
+    },
+    {
+      "epoch": 0.14429,
+      "grad_norm": 1.236018760227688,
+      "learning_rate": 0.003,
+      "loss": 4.0442,
+      "step": 14429
+    },
+    {
+      "epoch": 0.1443,
+      "grad_norm": 1.1701305528800572,
+      "learning_rate": 0.003,
+      "loss": 4.0346,
+      "step": 14430
+    },
+    {
+      "epoch": 0.14431,
+      "grad_norm": 1.0277455869468615,
+      "learning_rate": 0.003,
+      "loss": 4.0412,
+      "step": 14431
+    },
+    {
+      "epoch": 0.14432,
+      "grad_norm": 1.264236895061662,
+      "learning_rate": 0.003,
+      "loss": 4.0282,
+      "step": 14432
+    },
+    {
+      "epoch": 0.14433,
+      "grad_norm": 1.1450179472542663,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 14433
+    },
+    {
+      "epoch": 0.14434,
+      "grad_norm": 1.3260135156923754,
+      "learning_rate": 0.003,
+      "loss": 4.0082,
+      "step": 14434
+    },
+    {
+      "epoch": 0.14435,
+      "grad_norm": 1.0950989832118134,
+      "learning_rate": 0.003,
+      "loss": 4.0266,
+      "step": 14435
+    },
+    {
+      "epoch": 0.14436,
+      "grad_norm": 1.3891352119599327,
+      "learning_rate": 0.003,
+      "loss": 4.046,
+      "step": 14436
+    },
+    {
+      "epoch": 0.14437,
+      "grad_norm": 1.051389794925024,
+      "learning_rate": 0.003,
+      "loss": 4.0146,
+      "step": 14437
+    },
+    {
+      "epoch": 0.14438,
+      "grad_norm": 1.4529679895972711,
+      "learning_rate": 0.003,
+      "loss": 4.0311,
+      "step": 14438
+    },
+    {
+      "epoch": 0.14439,
+      "grad_norm": 0.9878120297750842,
+      "learning_rate": 0.003,
+      "loss": 4.0489,
+      "step": 14439
+    },
+    {
+      "epoch": 0.1444,
+      "grad_norm": 1.3500863354125725,
+      "learning_rate": 0.003,
+      "loss": 4.0289,
+      "step": 14440
+    },
+    {
+      "epoch": 0.14441,
+      "grad_norm": 1.0391767489442227,
+      "learning_rate": 0.003,
+      "loss": 4.0413,
+      "step": 14441
+    },
+    {
+      "epoch": 0.14442,
+      "grad_norm": 1.3341558927499608,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 14442
+    },
+    {
+      "epoch": 0.14443,
+      "grad_norm": 1.2367517579258847,
+      "learning_rate": 0.003,
+      "loss": 3.9974,
+      "step": 14443
+    },
+    {
+      "epoch": 0.14444,
+      "grad_norm": 0.999685303003077,
+      "learning_rate": 0.003,
+      "loss": 4.0023,
+      "step": 14444
+    },
+    {
+      "epoch": 0.14445,
+      "grad_norm": 1.637842531994357,
+      "learning_rate": 0.003,
+      "loss": 4.0334,
+      "step": 14445
+    },
+    {
+      "epoch": 0.14446,
+      "grad_norm": 1.1935103420326467,
+      "learning_rate": 0.003,
+      "loss": 4.0036,
+      "step": 14446
+    },
+    {
+      "epoch": 0.14447,
+      "grad_norm": 1.3946879258079155,
+      "learning_rate": 0.003,
+      "loss": 4.0467,
+      "step": 14447
+    },
+    {
+      "epoch": 0.14448,
+      "grad_norm": 1.001298245674135,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 14448
+    },
+    {
+      "epoch": 0.14449,
+      "grad_norm": 1.273338157626047,
+      "learning_rate": 0.003,
+      "loss": 3.9923,
+      "step": 14449
+    },
+    {
+      "epoch": 0.1445,
+      "grad_norm": 1.0845018678469514,
+      "learning_rate": 0.003,
+      "loss": 4.033,
+      "step": 14450
+    },
+    {
+      "epoch": 0.14451,
+      "grad_norm": 1.5101736035199864,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 14451
+    },
+    {
+      "epoch": 0.14452,
+      "grad_norm": 1.0905745218691874,
+      "learning_rate": 0.003,
+      "loss": 4.0105,
+      "step": 14452
+    },
+    {
+      "epoch": 0.14453,
+      "grad_norm": 1.3842347273258648,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 14453
+    },
+    {
+      "epoch": 0.14454,
+      "grad_norm": 1.2922298534952803,
+      "learning_rate": 0.003,
+      "loss": 4.0119,
+      "step": 14454
+    },
+    {
+      "epoch": 0.14455,
+      "grad_norm": 1.0925683960156223,
+      "learning_rate": 0.003,
+      "loss": 3.9918,
+      "step": 14455
+    },
+    {
+      "epoch": 0.14456,
+      "grad_norm": 1.3559823940041604,
+      "learning_rate": 0.003,
+      "loss": 3.989,
+      "step": 14456
+    },
+    {
+      "epoch": 0.14457,
+      "grad_norm": 1.2658248605229905,
+      "learning_rate": 0.003,
+      "loss": 4.0286,
+      "step": 14457
+    },
+    {
+      "epoch": 0.14458,
+      "grad_norm": 1.0732730407104027,
+      "learning_rate": 0.003,
+      "loss": 4.003,
+      "step": 14458
+    },
+    {
+      "epoch": 0.14459,
+      "grad_norm": 1.160826402475991,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 14459
+    },
+    {
+      "epoch": 0.1446,
+      "grad_norm": 1.2237950140205267,
+      "learning_rate": 0.003,
+      "loss": 4.0225,
+      "step": 14460
+    },
+    {
+      "epoch": 0.14461,
+      "grad_norm": 1.1535901728274076,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 14461
+    },
+    {
+      "epoch": 0.14462,
+      "grad_norm": 1.3750677627530832,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 14462
+    },
+    {
+      "epoch": 0.14463,
+      "grad_norm": 1.1796103564127114,
+      "learning_rate": 0.003,
+      "loss": 4.0291,
+      "step": 14463
+    },
+    {
+      "epoch": 0.14464,
+      "grad_norm": 1.422757421374509,
+      "learning_rate": 0.003,
+      "loss": 4.0414,
+      "step": 14464
+    },
+    {
+      "epoch": 0.14465,
+      "grad_norm": 1.1045263307117212,
+      "learning_rate": 0.003,
+      "loss": 4.0164,
+      "step": 14465
+    },
+    {
+      "epoch": 0.14466,
+      "grad_norm": 1.180752551862371,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 14466
+    },
+    {
+      "epoch": 0.14467,
+      "grad_norm": 1.2050557387864302,
+      "learning_rate": 0.003,
+      "loss": 4.0564,
+      "step": 14467
+    },
+    {
+      "epoch": 0.14468,
+      "grad_norm": 1.0681333646184352,
+      "learning_rate": 0.003,
+      "loss": 4.0023,
+      "step": 14468
+    },
+    {
+      "epoch": 0.14469,
+      "grad_norm": 1.3246381531360234,
+      "learning_rate": 0.003,
+      "loss": 4.0216,
+      "step": 14469
+    },
+    {
+      "epoch": 0.1447,
+      "grad_norm": 1.1523402047220455,
+      "learning_rate": 0.003,
+      "loss": 4.0407,
+      "step": 14470
+    },
+    {
+      "epoch": 0.14471,
+      "grad_norm": 1.2207650779108505,
+      "learning_rate": 0.003,
+      "loss": 4.0109,
+      "step": 14471
+    },
+    {
+      "epoch": 0.14472,
+      "grad_norm": 1.1728700266594385,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 14472
+    },
+    {
+      "epoch": 0.14473,
+      "grad_norm": 1.250042755849908,
+      "learning_rate": 0.003,
+      "loss": 3.9943,
+      "step": 14473
+    },
+    {
+      "epoch": 0.14474,
+      "grad_norm": 1.3570314137605364,
+      "learning_rate": 0.003,
+      "loss": 4.0246,
+      "step": 14474
+    },
+    {
+      "epoch": 0.14475,
+      "grad_norm": 1.0777052849486088,
+      "learning_rate": 0.003,
+      "loss": 4.0428,
+      "step": 14475
+    },
+    {
+      "epoch": 0.14476,
+      "grad_norm": 1.2889017126743465,
+      "learning_rate": 0.003,
+      "loss": 4.043,
+      "step": 14476
+    },
+    {
+      "epoch": 0.14477,
+      "grad_norm": 1.0346286013190817,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 14477
+    },
+    {
+      "epoch": 0.14478,
+      "grad_norm": 1.1620056393435554,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 14478
+    },
+    {
+      "epoch": 0.14479,
+      "grad_norm": 1.484373566585143,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 14479
+    },
+    {
+      "epoch": 0.1448,
+      "grad_norm": 1.1443086993542309,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 14480
+    },
+    {
+      "epoch": 0.14481,
+      "grad_norm": 1.2698849354218817,
+      "learning_rate": 0.003,
+      "loss": 4.0311,
+      "step": 14481
+    },
+    {
+      "epoch": 0.14482,
+      "grad_norm": 1.0087013208617437,
+      "learning_rate": 0.003,
+      "loss": 4.0033,
+      "step": 14482
+    },
+    {
+      "epoch": 0.14483,
+      "grad_norm": 1.4960437345417261,
+      "learning_rate": 0.003,
+      "loss": 4.0075,
+      "step": 14483
+    },
+    {
+      "epoch": 0.14484,
+      "grad_norm": 1.0081903193331918,
+      "learning_rate": 0.003,
+      "loss": 3.9988,
+      "step": 14484
+    },
+    {
+      "epoch": 0.14485,
+      "grad_norm": 1.3618954479781298,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 14485
+    },
+    {
+      "epoch": 0.14486,
+      "grad_norm": 1.174742735761288,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 14486
+    },
+    {
+      "epoch": 0.14487,
+      "grad_norm": 1.3257975675308202,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 14487
+    },
+    {
+      "epoch": 0.14488,
+      "grad_norm": 1.290576379160918,
+      "learning_rate": 0.003,
+      "loss": 4.0379,
+      "step": 14488
+    },
+    {
+      "epoch": 0.14489,
+      "grad_norm": 1.1448041897333847,
+      "learning_rate": 0.003,
+      "loss": 4.0077,
+      "step": 14489
+    },
+    {
+      "epoch": 0.1449,
+      "grad_norm": 1.318156067960972,
+      "learning_rate": 0.003,
+      "loss": 4.0084,
+      "step": 14490
+    },
+    {
+      "epoch": 0.14491,
+      "grad_norm": 1.0646783634293895,
+      "learning_rate": 0.003,
+      "loss": 4.0069,
+      "step": 14491
+    },
+    {
+      "epoch": 0.14492,
+      "grad_norm": 1.32623979475304,
+      "learning_rate": 0.003,
+      "loss": 4.0187,
+      "step": 14492
+    },
+    {
+      "epoch": 0.14493,
+      "grad_norm": 1.1016488012244479,
+      "learning_rate": 0.003,
+      "loss": 4.0177,
+      "step": 14493
+    },
+    {
+      "epoch": 0.14494,
+      "grad_norm": 1.3909336423726262,
+      "learning_rate": 0.003,
+      "loss": 4.0365,
+      "step": 14494
+    },
+    {
+      "epoch": 0.14495,
+      "grad_norm": 1.1601648377855989,
+      "learning_rate": 0.003,
+      "loss": 4.0188,
+      "step": 14495
+    },
+    {
+      "epoch": 0.14496,
+      "grad_norm": 1.009980094021526,
+      "learning_rate": 0.003,
+      "loss": 3.9946,
+      "step": 14496
+    },
+    {
+      "epoch": 0.14497,
+      "grad_norm": 1.1757064908364916,
+      "learning_rate": 0.003,
+      "loss": 4.0167,
+      "step": 14497
+    },
+    {
+      "epoch": 0.14498,
+      "grad_norm": 1.2557706682498537,
+      "learning_rate": 0.003,
+      "loss": 4.0072,
+      "step": 14498
+    },
+    {
+      "epoch": 0.14499,
+      "grad_norm": 1.0063481278247886,
+      "learning_rate": 0.003,
+      "loss": 4.0247,
+      "step": 14499
+    },
+    {
+      "epoch": 0.145,
+      "grad_norm": 1.2600965724146316,
+      "learning_rate": 0.003,
+      "loss": 4.0501,
+      "step": 14500
+    },
+    {
+      "epoch": 0.14501,
+      "grad_norm": 1.058860986503806,
+      "learning_rate": 0.003,
+      "loss": 4.0244,
+      "step": 14501
+    },
+    {
+      "epoch": 0.14502,
+      "grad_norm": 1.2407155637824785,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 14502
+    },
+    {
+      "epoch": 0.14503,
+      "grad_norm": 1.309155248761219,
+      "learning_rate": 0.003,
+      "loss": 4.0433,
+      "step": 14503
+    },
+    {
+      "epoch": 0.14504,
+      "grad_norm": 1.4214121220645077,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 14504
+    },
+    {
+      "epoch": 0.14505,
+      "grad_norm": 1.0404374530095637,
+      "learning_rate": 0.003,
+      "loss": 3.9961,
+      "step": 14505
+    },
+    {
+      "epoch": 0.14506,
+      "grad_norm": 1.2873683959126516,
+      "learning_rate": 0.003,
+      "loss": 3.9997,
+      "step": 14506
+    },
+    {
+      "epoch": 0.14507,
+      "grad_norm": 1.2001126154689117,
+      "learning_rate": 0.003,
+      "loss": 3.986,
+      "step": 14507
+    },
+    {
+      "epoch": 0.14508,
+      "grad_norm": 1.1862564918121277,
+      "learning_rate": 0.003,
+      "loss": 3.9944,
+      "step": 14508
+    },
+    {
+      "epoch": 0.14509,
+      "grad_norm": 1.1429814837311603,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 14509
+    },
+    {
+      "epoch": 0.1451,
+      "grad_norm": 1.0366689188633833,
+      "learning_rate": 0.003,
+      "loss": 4.0286,
+      "step": 14510
+    },
+    {
+      "epoch": 0.14511,
+      "grad_norm": 1.2689118182788326,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 14511
+    },
+    {
+      "epoch": 0.14512,
+      "grad_norm": 1.221645510450628,
+      "learning_rate": 0.003,
+      "loss": 4.0569,
+      "step": 14512
+    },
+    {
+      "epoch": 0.14513,
+      "grad_norm": 1.0676268731187524,
+      "learning_rate": 0.003,
+      "loss": 3.9954,
+      "step": 14513
+    },
+    {
+      "epoch": 0.14514,
+      "grad_norm": 1.3803401448631745,
+      "learning_rate": 0.003,
+      "loss": 4.037,
+      "step": 14514
+    },
+    {
+      "epoch": 0.14515,
+      "grad_norm": 1.0381879543839039,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 14515
+    },
+    {
+      "epoch": 0.14516,
+      "grad_norm": 1.3728094284848495,
+      "learning_rate": 0.003,
+      "loss": 4.0425,
+      "step": 14516
+    },
+    {
+      "epoch": 0.14517,
+      "grad_norm": 1.2038839059792454,
+      "learning_rate": 0.003,
+      "loss": 4.0167,
+      "step": 14517
+    },
+    {
+      "epoch": 0.14518,
+      "grad_norm": 1.5016318963257844,
+      "learning_rate": 0.003,
+      "loss": 4.0382,
+      "step": 14518
+    },
+    {
+      "epoch": 0.14519,
+      "grad_norm": 1.1748809794876305,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 14519
+    },
+    {
+      "epoch": 0.1452,
+      "grad_norm": 1.4488835508199935,
+      "learning_rate": 0.003,
+      "loss": 4.0121,
+      "step": 14520
+    },
+    {
+      "epoch": 0.14521,
+      "grad_norm": 0.9583289734041514,
+      "learning_rate": 0.003,
+      "loss": 4.0163,
+      "step": 14521
+    },
+    {
+      "epoch": 0.14522,
+      "grad_norm": 1.2530762804116282,
+      "learning_rate": 0.003,
+      "loss": 3.986,
+      "step": 14522
+    },
+    {
+      "epoch": 0.14523,
+      "grad_norm": 1.3470868291082656,
+      "learning_rate": 0.003,
+      "loss": 4.0244,
+      "step": 14523
+    },
+    {
+      "epoch": 0.14524,
+      "grad_norm": 1.516748058417483,
+      "learning_rate": 0.003,
+      "loss": 4.05,
+      "step": 14524
+    },
+    {
+      "epoch": 0.14525,
+      "grad_norm": 0.8688628064559125,
+      "learning_rate": 0.003,
+      "loss": 3.9981,
+      "step": 14525
+    },
+    {
+      "epoch": 0.14526,
+      "grad_norm": 1.2290374421396448,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 14526
+    },
+    {
+      "epoch": 0.14527,
+      "grad_norm": 1.4106250986583697,
+      "learning_rate": 0.003,
+      "loss": 4.0291,
+      "step": 14527
+    },
+    {
+      "epoch": 0.14528,
+      "grad_norm": 1.1960020034701262,
+      "learning_rate": 0.003,
+      "loss": 4.0169,
+      "step": 14528
+    },
+    {
+      "epoch": 0.14529,
+      "grad_norm": 1.2461522806726895,
+      "learning_rate": 0.003,
+      "loss": 4.0209,
+      "step": 14529
+    },
+    {
+      "epoch": 0.1453,
+      "grad_norm": 1.343435581151887,
+      "learning_rate": 0.003,
+      "loss": 4.0184,
+      "step": 14530
+    },
+    {
+      "epoch": 0.14531,
+      "grad_norm": 1.1897863146723584,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 14531
+    },
+    {
+      "epoch": 0.14532,
+      "grad_norm": 1.2406366984067851,
+      "learning_rate": 0.003,
+      "loss": 4.0254,
+      "step": 14532
+    },
+    {
+      "epoch": 0.14533,
+      "grad_norm": 1.2770937031087264,
+      "learning_rate": 0.003,
+      "loss": 4.0421,
+      "step": 14533
+    },
+    {
+      "epoch": 0.14534,
+      "grad_norm": 1.1333862009610007,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 14534
+    },
+    {
+      "epoch": 0.14535,
+      "grad_norm": 1.1215687848618767,
+      "learning_rate": 0.003,
+      "loss": 4.0313,
+      "step": 14535
+    },
+    {
+      "epoch": 0.14536,
+      "grad_norm": 1.2456154158145807,
+      "learning_rate": 0.003,
+      "loss": 4.0298,
+      "step": 14536
+    },
+    {
+      "epoch": 0.14537,
+      "grad_norm": 0.9894400543840622,
+      "learning_rate": 0.003,
+      "loss": 3.9942,
+      "step": 14537
+    },
+    {
+      "epoch": 0.14538,
+      "grad_norm": 1.4556690665591743,
+      "learning_rate": 0.003,
+      "loss": 4.0287,
+      "step": 14538
+    },
+    {
+      "epoch": 0.14539,
+      "grad_norm": 1.1367777065612945,
+      "learning_rate": 0.003,
+      "loss": 4.0082,
+      "step": 14539
+    },
+    {
+      "epoch": 0.1454,
+      "grad_norm": 1.301526864027351,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 14540
+    },
+    {
+      "epoch": 0.14541,
+      "grad_norm": 1.2157751688778053,
+      "learning_rate": 0.003,
+      "loss": 4.0216,
+      "step": 14541
+    },
+    {
+      "epoch": 0.14542,
+      "grad_norm": 0.9290021911687516,
+      "learning_rate": 0.003,
+      "loss": 4.0092,
+      "step": 14542
+    },
+    {
+      "epoch": 0.14543,
+      "grad_norm": 1.4507913936570669,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 14543
+    },
+    {
+      "epoch": 0.14544,
+      "grad_norm": 1.2253577184771525,
+      "learning_rate": 0.003,
+      "loss": 4.0376,
+      "step": 14544
+    },
+    {
+      "epoch": 0.14545,
+      "grad_norm": 1.1595512555771437,
+      "learning_rate": 0.003,
+      "loss": 3.9996,
+      "step": 14545
+    },
+    {
+      "epoch": 0.14546,
+      "grad_norm": 1.4697108397166665,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 14546
+    },
+    {
+      "epoch": 0.14547,
+      "grad_norm": 1.204273495324109,
+      "learning_rate": 0.003,
+      "loss": 4.0343,
+      "step": 14547
+    },
+    {
+      "epoch": 0.14548,
+      "grad_norm": 1.2210909540672343,
+      "learning_rate": 0.003,
+      "loss": 4.0006,
+      "step": 14548
+    },
+    {
+      "epoch": 0.14549,
+      "grad_norm": 1.1960358153078625,
+      "learning_rate": 0.003,
+      "loss": 4.031,
+      "step": 14549
+    },
+    {
+      "epoch": 0.1455,
+      "grad_norm": 1.168400476383576,
+      "learning_rate": 0.003,
+      "loss": 4.0247,
+      "step": 14550
+    },
+    {
+      "epoch": 0.14551,
+      "grad_norm": 1.3522931121842838,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 14551
+    },
+    {
+      "epoch": 0.14552,
+      "grad_norm": 0.9815479797794744,
+      "learning_rate": 0.003,
+      "loss": 4.038,
+      "step": 14552
+    },
+    {
+      "epoch": 0.14553,
+      "grad_norm": 1.2883507256957447,
+      "learning_rate": 0.003,
+      "loss": 4.0382,
+      "step": 14553
+    },
+    {
+      "epoch": 0.14554,
+      "grad_norm": 1.0742019647017573,
+      "learning_rate": 0.003,
+      "loss": 4.0068,
+      "step": 14554
+    },
+    {
+      "epoch": 0.14555,
+      "grad_norm": 1.1017979923376091,
+      "learning_rate": 0.003,
+      "loss": 4.0049,
+      "step": 14555
+    },
+    {
+      "epoch": 0.14556,
+      "grad_norm": 1.0805201358805017,
+      "learning_rate": 0.003,
+      "loss": 4.0109,
+      "step": 14556
+    },
+    {
+      "epoch": 0.14557,
+      "grad_norm": 1.4082542123251285,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 14557
+    },
+    {
+      "epoch": 0.14558,
+      "grad_norm": 1.2638267673234083,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 14558
+    },
+    {
+      "epoch": 0.14559,
+      "grad_norm": 1.2636206096964009,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 14559
+    },
+    {
+      "epoch": 0.1456,
+      "grad_norm": 1.2256733740875092,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 14560
+    },
+    {
+      "epoch": 0.14561,
+      "grad_norm": 1.4019023401389583,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 14561
+    },
+    {
+      "epoch": 0.14562,
+      "grad_norm": 0.987825905196306,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 14562
+    },
+    {
+      "epoch": 0.14563,
+      "grad_norm": 1.3195123345596431,
+      "learning_rate": 0.003,
+      "loss": 4.038,
+      "step": 14563
+    },
+    {
+      "epoch": 0.14564,
+      "grad_norm": 1.1794370029884693,
+      "learning_rate": 0.003,
+      "loss": 3.9796,
+      "step": 14564
+    },
+    {
+      "epoch": 0.14565,
+      "grad_norm": 1.1246915934472845,
+      "learning_rate": 0.003,
+      "loss": 4.0368,
+      "step": 14565
+    },
+    {
+      "epoch": 0.14566,
+      "grad_norm": 1.1994594345382452,
+      "learning_rate": 0.003,
+      "loss": 4.0423,
+      "step": 14566
+    },
+    {
+      "epoch": 0.14567,
+      "grad_norm": 1.2891949561316804,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 14567
+    },
+    {
+      "epoch": 0.14568,
+      "grad_norm": 1.234574364873565,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 14568
+    },
+    {
+      "epoch": 0.14569,
+      "grad_norm": 1.1069870175505885,
+      "learning_rate": 0.003,
+      "loss": 4.0439,
+      "step": 14569
+    },
+    {
+      "epoch": 0.1457,
+      "grad_norm": 1.5472796516265446,
+      "learning_rate": 0.003,
+      "loss": 4.0288,
+      "step": 14570
+    },
+    {
+      "epoch": 0.14571,
+      "grad_norm": 1.0606829134008178,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 14571
+    },
+    {
+      "epoch": 0.14572,
+      "grad_norm": 1.3441520210728717,
+      "learning_rate": 0.003,
+      "loss": 4.015,
+      "step": 14572
+    },
+    {
+      "epoch": 0.14573,
+      "grad_norm": 1.2160307278040676,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 14573
+    },
+    {
+      "epoch": 0.14574,
+      "grad_norm": 1.3261360269716858,
+      "learning_rate": 0.003,
+      "loss": 4.0081,
+      "step": 14574
+    },
+    {
+      "epoch": 0.14575,
+      "grad_norm": 1.0168868414381258,
+      "learning_rate": 0.003,
+      "loss": 4.0119,
+      "step": 14575
+    },
+    {
+      "epoch": 0.14576,
+      "grad_norm": 1.398200028892131,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 14576
+    },
+    {
+      "epoch": 0.14577,
+      "grad_norm": 0.975521183755697,
+      "learning_rate": 0.003,
+      "loss": 4.0014,
+      "step": 14577
+    },
+    {
+      "epoch": 0.14578,
+      "grad_norm": 1.347932974764977,
+      "learning_rate": 0.003,
+      "loss": 4.0154,
+      "step": 14578
+    },
+    {
+      "epoch": 0.14579,
+      "grad_norm": 1.320107335942108,
+      "learning_rate": 0.003,
+      "loss": 3.9988,
+      "step": 14579
+    },
+    {
+      "epoch": 0.1458,
+      "grad_norm": 1.1804506897454756,
+      "learning_rate": 0.003,
+      "loss": 4.0212,
+      "step": 14580
+    },
+    {
+      "epoch": 0.14581,
+      "grad_norm": 1.511927370026859,
+      "learning_rate": 0.003,
+      "loss": 3.9943,
+      "step": 14581
+    },
+    {
+      "epoch": 0.14582,
+      "grad_norm": 0.9657891579308665,
+      "learning_rate": 0.003,
+      "loss": 4.0146,
+      "step": 14582
+    },
+    {
+      "epoch": 0.14583,
+      "grad_norm": 1.5982287604362448,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 14583
+    },
+    {
+      "epoch": 0.14584,
+      "grad_norm": 1.2063158861095558,
+      "learning_rate": 0.003,
+      "loss": 4.0399,
+      "step": 14584
+    },
+    {
+      "epoch": 0.14585,
+      "grad_norm": 1.3204813971301048,
+      "learning_rate": 0.003,
+      "loss": 4.0343,
+      "step": 14585
+    },
+    {
+      "epoch": 0.14586,
+      "grad_norm": 1.165718809426018,
+      "learning_rate": 0.003,
+      "loss": 3.9976,
+      "step": 14586
+    },
+    {
+      "epoch": 0.14587,
+      "grad_norm": 1.4342957416193656,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 14587
+    },
+    {
+      "epoch": 0.14588,
+      "grad_norm": 1.3097579948327902,
+      "learning_rate": 0.003,
+      "loss": 4.0223,
+      "step": 14588
+    },
+    {
+      "epoch": 0.14589,
+      "grad_norm": 1.1819895455392044,
+      "learning_rate": 0.003,
+      "loss": 3.9819,
+      "step": 14589
+    },
+    {
+      "epoch": 0.1459,
+      "grad_norm": 1.1519733997254258,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 14590
+    },
+    {
+      "epoch": 0.14591,
+      "grad_norm": 1.196795608704991,
+      "learning_rate": 0.003,
+      "loss": 3.9955,
+      "step": 14591
+    },
+    {
+      "epoch": 0.14592,
+      "grad_norm": 1.212206219142562,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 14592
+    },
+    {
+      "epoch": 0.14593,
+      "grad_norm": 1.3748622890900157,
+      "learning_rate": 0.003,
+      "loss": 4.0436,
+      "step": 14593
+    },
+    {
+      "epoch": 0.14594,
+      "grad_norm": 1.1321959038523033,
+      "learning_rate": 0.003,
+      "loss": 4.0213,
+      "step": 14594
+    },
+    {
+      "epoch": 0.14595,
+      "grad_norm": 1.2479704493325805,
+      "learning_rate": 0.003,
+      "loss": 4.0182,
+      "step": 14595
+    },
+    {
+      "epoch": 0.14596,
+      "grad_norm": 1.2157254751435294,
+      "learning_rate": 0.003,
+      "loss": 4.0236,
+      "step": 14596
+    },
+    {
+      "epoch": 0.14597,
+      "grad_norm": 1.2113154403134787,
+      "learning_rate": 0.003,
+      "loss": 4.0518,
+      "step": 14597
+    },
+    {
+      "epoch": 0.14598,
+      "grad_norm": 1.1652249828071028,
+      "learning_rate": 0.003,
+      "loss": 4.0622,
+      "step": 14598
+    },
+    {
+      "epoch": 0.14599,
+      "grad_norm": 1.1249297186797358,
+      "learning_rate": 0.003,
+      "loss": 3.9882,
+      "step": 14599
+    },
+    {
+      "epoch": 0.146,
+      "grad_norm": 1.1161516878373015,
+      "learning_rate": 0.003,
+      "loss": 4.0309,
+      "step": 14600
+    },
+    {
+      "epoch": 0.14601,
+      "grad_norm": 1.2387498558393009,
+      "learning_rate": 0.003,
+      "loss": 4.0279,
+      "step": 14601
+    },
+    {
+      "epoch": 0.14602,
+      "grad_norm": 1.0064218516264403,
+      "learning_rate": 0.003,
+      "loss": 3.9982,
+      "step": 14602
+    },
+    {
+      "epoch": 0.14603,
+      "grad_norm": 1.2928516477312995,
+      "learning_rate": 0.003,
+      "loss": 4.0384,
+      "step": 14603
+    },
+    {
+      "epoch": 0.14604,
+      "grad_norm": 1.2079191749475882,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 14604
+    },
+    {
+      "epoch": 0.14605,
+      "grad_norm": 1.191874828203266,
+      "learning_rate": 0.003,
+      "loss": 3.9978,
+      "step": 14605
+    },
+    {
+      "epoch": 0.14606,
+      "grad_norm": 1.2855761569668036,
+      "learning_rate": 0.003,
+      "loss": 4.0091,
+      "step": 14606
+    },
+    {
+      "epoch": 0.14607,
+      "grad_norm": 1.3084030583655266,
+      "learning_rate": 0.003,
+      "loss": 4.025,
+      "step": 14607
+    },
+    {
+      "epoch": 0.14608,
+      "grad_norm": 1.0814048860065364,
+      "learning_rate": 0.003,
+      "loss": 4.0027,
+      "step": 14608
+    },
+    {
+      "epoch": 0.14609,
+      "grad_norm": 1.318711360861515,
+      "learning_rate": 0.003,
+      "loss": 3.9784,
+      "step": 14609
+    },
+    {
+      "epoch": 0.1461,
+      "grad_norm": 1.0843550800914612,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 14610
+    },
+    {
+      "epoch": 0.14611,
+      "grad_norm": 1.2146539882263698,
+      "learning_rate": 0.003,
+      "loss": 4.0342,
+      "step": 14611
+    },
+    {
+      "epoch": 0.14612,
+      "grad_norm": 1.3111635235348604,
+      "learning_rate": 0.003,
+      "loss": 4.0322,
+      "step": 14612
+    },
+    {
+      "epoch": 0.14613,
+      "grad_norm": 1.3431111310121864,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 14613
+    },
+    {
+      "epoch": 0.14614,
+      "grad_norm": 1.0556331991470425,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 14614
+    },
+    {
+      "epoch": 0.14615,
+      "grad_norm": 1.3210666923234133,
+      "learning_rate": 0.003,
+      "loss": 4.015,
+      "step": 14615
+    },
+    {
+      "epoch": 0.14616,
+      "grad_norm": 1.0424732096281708,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 14616
+    },
+    {
+      "epoch": 0.14617,
+      "grad_norm": 1.4222481728984127,
+      "learning_rate": 0.003,
+      "loss": 4.0255,
+      "step": 14617
+    },
+    {
+      "epoch": 0.14618,
+      "grad_norm": 1.1671226113095667,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 14618
+    },
+    {
+      "epoch": 0.14619,
+      "grad_norm": 1.1566156305201265,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 14619
+    },
+    {
+      "epoch": 0.1462,
+      "grad_norm": 1.4435759830205142,
+      "learning_rate": 0.003,
+      "loss": 4.0404,
+      "step": 14620
+    },
+    {
+      "epoch": 0.14621,
+      "grad_norm": 1.0040521628546581,
+      "learning_rate": 0.003,
+      "loss": 4.043,
+      "step": 14621
+    },
+    {
+      "epoch": 0.14622,
+      "grad_norm": 1.5013433657193513,
+      "learning_rate": 0.003,
+      "loss": 4.0321,
+      "step": 14622
+    },
+    {
+      "epoch": 0.14623,
+      "grad_norm": 1.0041942566980275,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 14623
+    },
+    {
+      "epoch": 0.14624,
+      "grad_norm": 1.2788742166205531,
+      "learning_rate": 0.003,
+      "loss": 4.0415,
+      "step": 14624
+    },
+    {
+      "epoch": 0.14625,
+      "grad_norm": 1.1526320122181346,
+      "learning_rate": 0.003,
+      "loss": 4.0379,
+      "step": 14625
+    },
+    {
+      "epoch": 0.14626,
+      "grad_norm": 1.5960919046817466,
+      "learning_rate": 0.003,
+      "loss": 4.0486,
+      "step": 14626
+    },
+    {
+      "epoch": 0.14627,
+      "grad_norm": 1.0154809815070478,
+      "learning_rate": 0.003,
+      "loss": 4.0283,
+      "step": 14627
+    },
+    {
+      "epoch": 0.14628,
+      "grad_norm": 1.5591770055501386,
+      "learning_rate": 0.003,
+      "loss": 4.0466,
+      "step": 14628
+    },
+    {
+      "epoch": 0.14629,
+      "grad_norm": 1.2433875029595995,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 14629
+    },
+    {
+      "epoch": 0.1463,
+      "grad_norm": 1.0446726464340053,
+      "learning_rate": 0.003,
+      "loss": 4.0549,
+      "step": 14630
+    },
+    {
+      "epoch": 0.14631,
+      "grad_norm": 1.3196216495460884,
+      "learning_rate": 0.003,
+      "loss": 4.0194,
+      "step": 14631
+    },
+    {
+      "epoch": 0.14632,
+      "grad_norm": 1.0795033261710623,
+      "learning_rate": 0.003,
+      "loss": 3.9938,
+      "step": 14632
+    },
+    {
+      "epoch": 0.14633,
+      "grad_norm": 1.3754481147013444,
+      "learning_rate": 0.003,
+      "loss": 4.0222,
+      "step": 14633
+    },
+    {
+      "epoch": 0.14634,
+      "grad_norm": 1.3076650469022453,
+      "learning_rate": 0.003,
+      "loss": 4.0469,
+      "step": 14634
+    },
+    {
+      "epoch": 0.14635,
+      "grad_norm": 1.243730585112518,
+      "learning_rate": 0.003,
+      "loss": 4.037,
+      "step": 14635
+    },
+    {
+      "epoch": 0.14636,
+      "grad_norm": 1.2090911362187096,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 14636
+    },
+    {
+      "epoch": 0.14637,
+      "grad_norm": 1.0881962337733553,
+      "learning_rate": 0.003,
+      "loss": 4.0466,
+      "step": 14637
+    },
+    {
+      "epoch": 0.14638,
+      "grad_norm": 1.3315979686751431,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 14638
+    },
+    {
+      "epoch": 0.14639,
+      "grad_norm": 1.0951279000513339,
+      "learning_rate": 0.003,
+      "loss": 3.9947,
+      "step": 14639
+    },
+    {
+      "epoch": 0.1464,
+      "grad_norm": 1.2122185434801238,
+      "learning_rate": 0.003,
+      "loss": 4.0048,
+      "step": 14640
+    },
+    {
+      "epoch": 0.14641,
+      "grad_norm": 1.1938786649463586,
+      "learning_rate": 0.003,
+      "loss": 4.03,
+      "step": 14641
+    },
+    {
+      "epoch": 0.14642,
+      "grad_norm": 1.1415369026720699,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 14642
+    },
+    {
+      "epoch": 0.14643,
+      "grad_norm": 0.9299398623994157,
+      "learning_rate": 0.003,
+      "loss": 4.0224,
+      "step": 14643
+    },
+    {
+      "epoch": 0.14644,
+      "grad_norm": 0.8238908005739586,
+      "learning_rate": 0.003,
+      "loss": 3.9715,
+      "step": 14644
+    },
+    {
+      "epoch": 0.14645,
+      "grad_norm": 1.0600623354228622,
+      "learning_rate": 0.003,
+      "loss": 4.028,
+      "step": 14645
+    },
+    {
+      "epoch": 0.14646,
+      "grad_norm": 1.4662081180329973,
+      "learning_rate": 0.003,
+      "loss": 4.0357,
+      "step": 14646
+    },
+    {
+      "epoch": 0.14647,
+      "grad_norm": 1.0760830273484745,
+      "learning_rate": 0.003,
+      "loss": 3.9912,
+      "step": 14647
+    },
+    {
+      "epoch": 0.14648,
+      "grad_norm": 1.5618154390340337,
+      "learning_rate": 0.003,
+      "loss": 4.0435,
+      "step": 14648
+    },
+    {
+      "epoch": 0.14649,
+      "grad_norm": 1.029574143897717,
+      "learning_rate": 0.003,
+      "loss": 4.0288,
+      "step": 14649
+    },
+    {
+      "epoch": 0.1465,
+      "grad_norm": 1.3437312355913529,
+      "learning_rate": 0.003,
+      "loss": 4.0305,
+      "step": 14650
+    },
+    {
+      "epoch": 0.14651,
+      "grad_norm": 1.0808326315459529,
+      "learning_rate": 0.003,
+      "loss": 4.0456,
+      "step": 14651
+    },
+    {
+      "epoch": 0.14652,
+      "grad_norm": 1.2761441979796349,
+      "learning_rate": 0.003,
+      "loss": 4.0134,
+      "step": 14652
+    },
+    {
+      "epoch": 0.14653,
+      "grad_norm": 1.132692142274991,
+      "learning_rate": 0.003,
+      "loss": 4.0406,
+      "step": 14653
+    },
+    {
+      "epoch": 0.14654,
+      "grad_norm": 1.3585879621138313,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 14654
+    },
+    {
+      "epoch": 0.14655,
+      "grad_norm": 1.0463229754674632,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 14655
+    },
+    {
+      "epoch": 0.14656,
+      "grad_norm": 1.3918935935980463,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 14656
+    },
+    {
+      "epoch": 0.14657,
+      "grad_norm": 1.0312057243069037,
+      "learning_rate": 0.003,
+      "loss": 3.9994,
+      "step": 14657
+    },
+    {
+      "epoch": 0.14658,
+      "grad_norm": 1.2350850647234748,
+      "learning_rate": 0.003,
+      "loss": 3.989,
+      "step": 14658
+    },
+    {
+      "epoch": 0.14659,
+      "grad_norm": 1.1383384238061172,
+      "learning_rate": 0.003,
+      "loss": 4.0191,
+      "step": 14659
+    },
+    {
+      "epoch": 0.1466,
+      "grad_norm": 1.1225998777663533,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 14660
+    },
+    {
+      "epoch": 0.14661,
+      "grad_norm": 1.1805257298545155,
+      "learning_rate": 0.003,
+      "loss": 4.0263,
+      "step": 14661
+    },
+    {
+      "epoch": 0.14662,
+      "grad_norm": 0.9972619206686567,
+      "learning_rate": 0.003,
+      "loss": 3.9985,
+      "step": 14662
+    },
+    {
+      "epoch": 0.14663,
+      "grad_norm": 1.3309615460438815,
+      "learning_rate": 0.003,
+      "loss": 4.043,
+      "step": 14663
+    },
+    {
+      "epoch": 0.14664,
+      "grad_norm": 1.2252668564509699,
+      "learning_rate": 0.003,
+      "loss": 3.9916,
+      "step": 14664
+    },
+    {
+      "epoch": 0.14665,
+      "grad_norm": 1.2211356385365015,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 14665
+    },
+    {
+      "epoch": 0.14666,
+      "grad_norm": 1.248661926118289,
+      "learning_rate": 0.003,
+      "loss": 4.0099,
+      "step": 14666
+    },
+    {
+      "epoch": 0.14667,
+      "grad_norm": 1.039743337030898,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 14667
+    },
+    {
+      "epoch": 0.14668,
+      "grad_norm": 1.3064724626176418,
+      "learning_rate": 0.003,
+      "loss": 4.0479,
+      "step": 14668
+    },
+    {
+      "epoch": 0.14669,
+      "grad_norm": 1.0394125549913507,
+      "learning_rate": 0.003,
+      "loss": 4.0316,
+      "step": 14669
+    },
+    {
+      "epoch": 0.1467,
+      "grad_norm": 1.523471981607469,
+      "learning_rate": 0.003,
+      "loss": 4.025,
+      "step": 14670
+    },
+    {
+      "epoch": 0.14671,
+      "grad_norm": 1.0238160913510082,
+      "learning_rate": 0.003,
+      "loss": 4.0331,
+      "step": 14671
+    },
+    {
+      "epoch": 0.14672,
+      "grad_norm": 1.24781229708128,
+      "learning_rate": 0.003,
+      "loss": 4.0233,
+      "step": 14672
+    },
+    {
+      "epoch": 0.14673,
+      "grad_norm": 1.1999723508752755,
+      "learning_rate": 0.003,
+      "loss": 4.0273,
+      "step": 14673
+    },
+    {
+      "epoch": 0.14674,
+      "grad_norm": 1.298555433229248,
+      "learning_rate": 0.003,
+      "loss": 3.987,
+      "step": 14674
+    },
+    {
+      "epoch": 0.14675,
+      "grad_norm": 1.1401112535442113,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 14675
+    },
+    {
+      "epoch": 0.14676,
+      "grad_norm": 1.2317303457429185,
+      "learning_rate": 0.003,
+      "loss": 4.0163,
+      "step": 14676
+    },
+    {
+      "epoch": 0.14677,
+      "grad_norm": 1.1622651650359708,
+      "learning_rate": 0.003,
+      "loss": 3.9992,
+      "step": 14677
+    },
+    {
+      "epoch": 0.14678,
+      "grad_norm": 1.456096924396937,
+      "learning_rate": 0.003,
+      "loss": 4.0284,
+      "step": 14678
+    },
+    {
+      "epoch": 0.14679,
+      "grad_norm": 1.1842110405256305,
+      "learning_rate": 0.003,
+      "loss": 4.0255,
+      "step": 14679
+    },
+    {
+      "epoch": 0.1468,
+      "grad_norm": 1.2673044205478405,
+      "learning_rate": 0.003,
+      "loss": 4.0409,
+      "step": 14680
+    },
+    {
+      "epoch": 0.14681,
+      "grad_norm": 1.2099931664574737,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 14681
+    },
+    {
+      "epoch": 0.14682,
+      "grad_norm": 1.164083021610876,
+      "learning_rate": 0.003,
+      "loss": 4.008,
+      "step": 14682
+    },
+    {
+      "epoch": 0.14683,
+      "grad_norm": 1.2816239034400825,
+      "learning_rate": 0.003,
+      "loss": 4.0115,
+      "step": 14683
+    },
+    {
+      "epoch": 0.14684,
+      "grad_norm": 1.1309571213745622,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 14684
+    },
+    {
+      "epoch": 0.14685,
+      "grad_norm": 1.598024848121282,
+      "learning_rate": 0.003,
+      "loss": 4.0371,
+      "step": 14685
+    },
+    {
+      "epoch": 0.14686,
+      "grad_norm": 0.9280187236625966,
+      "learning_rate": 0.003,
+      "loss": 3.9916,
+      "step": 14686
+    },
+    {
+      "epoch": 0.14687,
+      "grad_norm": 1.336377928291785,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 14687
+    },
+    {
+      "epoch": 0.14688,
+      "grad_norm": 1.2549502107529549,
+      "learning_rate": 0.003,
+      "loss": 4.0132,
+      "step": 14688
+    },
+    {
+      "epoch": 0.14689,
+      "grad_norm": 1.3464356675156035,
+      "learning_rate": 0.003,
+      "loss": 4.0304,
+      "step": 14689
+    },
+    {
+      "epoch": 0.1469,
+      "grad_norm": 0.9290986244236656,
+      "learning_rate": 0.003,
+      "loss": 4.0049,
+      "step": 14690
+    },
+    {
+      "epoch": 0.14691,
+      "grad_norm": 1.13828097238937,
+      "learning_rate": 0.003,
+      "loss": 3.9962,
+      "step": 14691
+    },
+    {
+      "epoch": 0.14692,
+      "grad_norm": 1.244377214165893,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 14692
+    },
+    {
+      "epoch": 0.14693,
+      "grad_norm": 1.061497537684576,
+      "learning_rate": 0.003,
+      "loss": 4.0121,
+      "step": 14693
+    },
+    {
+      "epoch": 0.14694,
+      "grad_norm": 1.4475906454353726,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 14694
+    },
+    {
+      "epoch": 0.14695,
+      "grad_norm": 1.1298290215779774,
+      "learning_rate": 0.003,
+      "loss": 4.0415,
+      "step": 14695
+    },
+    {
+      "epoch": 0.14696,
+      "grad_norm": 1.5073789409427754,
+      "learning_rate": 0.003,
+      "loss": 4.0077,
+      "step": 14696
+    },
+    {
+      "epoch": 0.14697,
+      "grad_norm": 0.9979149420622156,
+      "learning_rate": 0.003,
+      "loss": 4.0113,
+      "step": 14697
+    },
+    {
+      "epoch": 0.14698,
+      "grad_norm": 1.4211626262696988,
+      "learning_rate": 0.003,
+      "loss": 4.0032,
+      "step": 14698
+    },
+    {
+      "epoch": 0.14699,
+      "grad_norm": 1.2299602276245312,
+      "learning_rate": 0.003,
+      "loss": 3.9862,
+      "step": 14699
+    },
+    {
+      "epoch": 0.147,
+      "grad_norm": 1.103525583737697,
+      "learning_rate": 0.003,
+      "loss": 4.0279,
+      "step": 14700
+    },
+    {
+      "epoch": 0.14701,
+      "grad_norm": 1.5488878701382407,
+      "learning_rate": 0.003,
+      "loss": 4.0177,
+      "step": 14701
+    },
+    {
+      "epoch": 0.14702,
+      "grad_norm": 1.1238259051215245,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 14702
+    },
+    {
+      "epoch": 0.14703,
+      "grad_norm": 1.182223801623779,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 14703
+    },
+    {
+      "epoch": 0.14704,
+      "grad_norm": 1.2729793388297799,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 14704
+    },
+    {
+      "epoch": 0.14705,
+      "grad_norm": 1.113098206652036,
+      "learning_rate": 0.003,
+      "loss": 4.0104,
+      "step": 14705
+    },
+    {
+      "epoch": 0.14706,
+      "grad_norm": 1.0715062674554707,
+      "learning_rate": 0.003,
+      "loss": 4.02,
+      "step": 14706
+    },
+    {
+      "epoch": 0.14707,
+      "grad_norm": 1.351644327955927,
+      "learning_rate": 0.003,
+      "loss": 4.0358,
+      "step": 14707
+    },
+    {
+      "epoch": 0.14708,
+      "grad_norm": 1.1156719712544556,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 14708
+    },
+    {
+      "epoch": 0.14709,
+      "grad_norm": 1.1937709438439554,
+      "learning_rate": 0.003,
+      "loss": 3.9743,
+      "step": 14709
+    },
+    {
+      "epoch": 0.1471,
+      "grad_norm": 1.4057778479805412,
+      "learning_rate": 0.003,
+      "loss": 4.0111,
+      "step": 14710
+    },
+    {
+      "epoch": 0.14711,
+      "grad_norm": 1.1272384668575615,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 14711
+    },
+    {
+      "epoch": 0.14712,
+      "grad_norm": 1.0489197743851302,
+      "learning_rate": 0.003,
+      "loss": 3.9846,
+      "step": 14712
+    },
+    {
+      "epoch": 0.14713,
+      "grad_norm": 1.2933434063586542,
+      "learning_rate": 0.003,
+      "loss": 3.9906,
+      "step": 14713
+    },
+    {
+      "epoch": 0.14714,
+      "grad_norm": 1.0627743043290785,
+      "learning_rate": 0.003,
+      "loss": 4.0002,
+      "step": 14714
+    },
+    {
+      "epoch": 0.14715,
+      "grad_norm": 1.2665616873727155,
+      "learning_rate": 0.003,
+      "loss": 4.0404,
+      "step": 14715
+    },
+    {
+      "epoch": 0.14716,
+      "grad_norm": 1.1546203680120108,
+      "learning_rate": 0.003,
+      "loss": 4.0471,
+      "step": 14716
+    },
+    {
+      "epoch": 0.14717,
+      "grad_norm": 1.5435870011362298,
+      "learning_rate": 0.003,
+      "loss": 4.039,
+      "step": 14717
+    },
+    {
+      "epoch": 0.14718,
+      "grad_norm": 1.3015859295181127,
+      "learning_rate": 0.003,
+      "loss": 3.9923,
+      "step": 14718
+    },
+    {
+      "epoch": 0.14719,
+      "grad_norm": 1.1719290752628724,
+      "learning_rate": 0.003,
+      "loss": 3.9888,
+      "step": 14719
+    },
+    {
+      "epoch": 0.1472,
+      "grad_norm": 1.1957700973606493,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 14720
+    },
+    {
+      "epoch": 0.14721,
+      "grad_norm": 1.1863942728845447,
+      "learning_rate": 0.003,
+      "loss": 4.015,
+      "step": 14721
+    },
+    {
+      "epoch": 0.14722,
+      "grad_norm": 1.1910356355427554,
+      "learning_rate": 0.003,
+      "loss": 4.0298,
+      "step": 14722
+    },
+    {
+      "epoch": 0.14723,
+      "grad_norm": 1.148093174968149,
+      "learning_rate": 0.003,
+      "loss": 3.9952,
+      "step": 14723
+    },
+    {
+      "epoch": 0.14724,
+      "grad_norm": 1.1059163747624803,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 14724
+    },
+    {
+      "epoch": 0.14725,
+      "grad_norm": 1.2193482342947928,
+      "learning_rate": 0.003,
+      "loss": 4.0174,
+      "step": 14725
+    },
+    {
+      "epoch": 0.14726,
+      "grad_norm": 1.2003680766415543,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 14726
+    },
+    {
+      "epoch": 0.14727,
+      "grad_norm": 1.1325797812355616,
+      "learning_rate": 0.003,
+      "loss": 4.0105,
+      "step": 14727
+    },
+    {
+      "epoch": 0.14728,
+      "grad_norm": 1.1920454237282632,
+      "learning_rate": 0.003,
+      "loss": 3.9996,
+      "step": 14728
+    },
+    {
+      "epoch": 0.14729,
+      "grad_norm": 1.1181121359816617,
+      "learning_rate": 0.003,
+      "loss": 4.0132,
+      "step": 14729
+    },
+    {
+      "epoch": 0.1473,
+      "grad_norm": 1.3021322297897138,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 14730
+    },
+    {
+      "epoch": 0.14731,
+      "grad_norm": 1.3341848128938727,
+      "learning_rate": 0.003,
+      "loss": 4.0364,
+      "step": 14731
+    },
+    {
+      "epoch": 0.14732,
+      "grad_norm": 1.1596557785781894,
+      "learning_rate": 0.003,
+      "loss": 4.0354,
+      "step": 14732
+    },
+    {
+      "epoch": 0.14733,
+      "grad_norm": 1.182715193552188,
+      "learning_rate": 0.003,
+      "loss": 4.0142,
+      "step": 14733
+    },
+    {
+      "epoch": 0.14734,
+      "grad_norm": 1.392854241245674,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 14734
+    },
+    {
+      "epoch": 0.14735,
+      "grad_norm": 1.0982923705804346,
+      "learning_rate": 0.003,
+      "loss": 4.0281,
+      "step": 14735
+    },
+    {
+      "epoch": 0.14736,
+      "grad_norm": 1.3453311896029474,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 14736
+    },
+    {
+      "epoch": 0.14737,
+      "grad_norm": 1.1451446830165797,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 14737
+    },
+    {
+      "epoch": 0.14738,
+      "grad_norm": 1.202671897629652,
+      "learning_rate": 0.003,
+      "loss": 4.0182,
+      "step": 14738
+    },
+    {
+      "epoch": 0.14739,
+      "grad_norm": 1.3168548001667901,
+      "learning_rate": 0.003,
+      "loss": 3.9976,
+      "step": 14739
+    },
+    {
+      "epoch": 0.1474,
+      "grad_norm": 1.1757506276149658,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 14740
+    },
+    {
+      "epoch": 0.14741,
+      "grad_norm": 1.4512270939919778,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 14741
+    },
+    {
+      "epoch": 0.14742,
+      "grad_norm": 1.3569803064813937,
+      "learning_rate": 0.003,
+      "loss": 4.0064,
+      "step": 14742
+    },
+    {
+      "epoch": 0.14743,
+      "grad_norm": 1.194524578480824,
+      "learning_rate": 0.003,
+      "loss": 4.0302,
+      "step": 14743
+    },
+    {
+      "epoch": 0.14744,
+      "grad_norm": 1.2610976221389463,
+      "learning_rate": 0.003,
+      "loss": 4.0028,
+      "step": 14744
+    },
+    {
+      "epoch": 0.14745,
+      "grad_norm": 0.9805340364303272,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 14745
+    },
+    {
+      "epoch": 0.14746,
+      "grad_norm": 1.239657032352779,
+      "learning_rate": 0.003,
+      "loss": 3.9832,
+      "step": 14746
+    },
+    {
+      "epoch": 0.14747,
+      "grad_norm": 0.9568751883716193,
+      "learning_rate": 0.003,
+      "loss": 4.0169,
+      "step": 14747
+    },
+    {
+      "epoch": 0.14748,
+      "grad_norm": 1.3422374066034728,
+      "learning_rate": 0.003,
+      "loss": 4.0223,
+      "step": 14748
+    },
+    {
+      "epoch": 0.14749,
+      "grad_norm": 1.2456508296319184,
+      "learning_rate": 0.003,
+      "loss": 4.0169,
+      "step": 14749
+    },
+    {
+      "epoch": 0.1475,
+      "grad_norm": 1.2726613821317687,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 14750
+    },
+    {
+      "epoch": 0.14751,
+      "grad_norm": 1.1803598228688557,
+      "learning_rate": 0.003,
+      "loss": 3.9918,
+      "step": 14751
+    },
+    {
+      "epoch": 0.14752,
+      "grad_norm": 1.3777719414878142,
+      "learning_rate": 0.003,
+      "loss": 4.0416,
+      "step": 14752
+    },
+    {
+      "epoch": 0.14753,
+      "grad_norm": 1.4180466112113057,
+      "learning_rate": 0.003,
+      "loss": 3.9966,
+      "step": 14753
+    },
+    {
+      "epoch": 0.14754,
+      "grad_norm": 0.8278486097470231,
+      "learning_rate": 0.003,
+      "loss": 3.9941,
+      "step": 14754
+    },
+    {
+      "epoch": 0.14755,
+      "grad_norm": 0.9499984355175111,
+      "learning_rate": 0.003,
+      "loss": 4.0377,
+      "step": 14755
+    },
+    {
+      "epoch": 0.14756,
+      "grad_norm": 1.3191873255352862,
+      "learning_rate": 0.003,
+      "loss": 4.0098,
+      "step": 14756
+    },
+    {
+      "epoch": 0.14757,
+      "grad_norm": 1.1279864050152106,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 14757
+    },
+    {
+      "epoch": 0.14758,
+      "grad_norm": 1.112383834697207,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 14758
+    },
+    {
+      "epoch": 0.14759,
+      "grad_norm": 1.1386057827145057,
+      "learning_rate": 0.003,
+      "loss": 4.0241,
+      "step": 14759
+    },
+    {
+      "epoch": 0.1476,
+      "grad_norm": 1.4423913059381863,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 14760
+    },
+    {
+      "epoch": 0.14761,
+      "grad_norm": 1.0955425381374513,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 14761
+    },
+    {
+      "epoch": 0.14762,
+      "grad_norm": 1.4947533994825475,
+      "learning_rate": 0.003,
+      "loss": 4.0188,
+      "step": 14762
+    },
+    {
+      "epoch": 0.14763,
+      "grad_norm": 1.0062595696736336,
+      "learning_rate": 0.003,
+      "loss": 4.0052,
+      "step": 14763
+    },
+    {
+      "epoch": 0.14764,
+      "grad_norm": 1.3722162324539398,
+      "learning_rate": 0.003,
+      "loss": 4.0107,
+      "step": 14764
+    },
+    {
+      "epoch": 0.14765,
+      "grad_norm": 1.201731727510067,
+      "learning_rate": 0.003,
+      "loss": 4.0181,
+      "step": 14765
+    },
+    {
+      "epoch": 0.14766,
+      "grad_norm": 1.3068623774193429,
+      "learning_rate": 0.003,
+      "loss": 4.0266,
+      "step": 14766
+    },
+    {
+      "epoch": 0.14767,
+      "grad_norm": 1.3451901636459578,
+      "learning_rate": 0.003,
+      "loss": 4.0546,
+      "step": 14767
+    },
+    {
+      "epoch": 0.14768,
+      "grad_norm": 1.2390635797654017,
+      "learning_rate": 0.003,
+      "loss": 4.0174,
+      "step": 14768
+    },
+    {
+      "epoch": 0.14769,
+      "grad_norm": 1.2865212311468084,
+      "learning_rate": 0.003,
+      "loss": 3.9996,
+      "step": 14769
+    },
+    {
+      "epoch": 0.1477,
+      "grad_norm": 1.0752257596790689,
+      "learning_rate": 0.003,
+      "loss": 4.0396,
+      "step": 14770
+    },
+    {
+      "epoch": 0.14771,
+      "grad_norm": 1.1960158251847295,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 14771
+    },
+    {
+      "epoch": 0.14772,
+      "grad_norm": 1.3336239932913452,
+      "learning_rate": 0.003,
+      "loss": 4.0039,
+      "step": 14772
+    },
+    {
+      "epoch": 0.14773,
+      "grad_norm": 1.171080291569365,
+      "learning_rate": 0.003,
+      "loss": 4.0327,
+      "step": 14773
+    },
+    {
+      "epoch": 0.14774,
+      "grad_norm": 1.2020832448468304,
+      "learning_rate": 0.003,
+      "loss": 4.0254,
+      "step": 14774
+    },
+    {
+      "epoch": 0.14775,
+      "grad_norm": 1.0974795077534392,
+      "learning_rate": 0.003,
+      "loss": 4.0126,
+      "step": 14775
+    },
+    {
+      "epoch": 0.14776,
+      "grad_norm": 1.319108091993031,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 14776
+    },
+    {
+      "epoch": 0.14777,
+      "grad_norm": 1.24349520064016,
+      "learning_rate": 0.003,
+      "loss": 4.0177,
+      "step": 14777
+    },
+    {
+      "epoch": 0.14778,
+      "grad_norm": 1.3257656870019594,
+      "learning_rate": 0.003,
+      "loss": 4.0309,
+      "step": 14778
+    },
+    {
+      "epoch": 0.14779,
+      "grad_norm": 1.0370219200672475,
+      "learning_rate": 0.003,
+      "loss": 4.0086,
+      "step": 14779
+    },
+    {
+      "epoch": 0.1478,
+      "grad_norm": 1.7092061151710198,
+      "learning_rate": 0.003,
+      "loss": 4.0236,
+      "step": 14780
+    },
+    {
+      "epoch": 0.14781,
+      "grad_norm": 1.1086415995649592,
+      "learning_rate": 0.003,
+      "loss": 3.9899,
+      "step": 14781
+    },
+    {
+      "epoch": 0.14782,
+      "grad_norm": 1.1549386726359416,
+      "learning_rate": 0.003,
+      "loss": 4.0353,
+      "step": 14782
+    },
+    {
+      "epoch": 0.14783,
+      "grad_norm": 1.0851773484666918,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 14783
+    },
+    {
+      "epoch": 0.14784,
+      "grad_norm": 1.3011776728231434,
+      "learning_rate": 0.003,
+      "loss": 4.0005,
+      "step": 14784
+    },
+    {
+      "epoch": 0.14785,
+      "grad_norm": 1.3671877333509035,
+      "learning_rate": 0.003,
+      "loss": 3.9998,
+      "step": 14785
+    },
+    {
+      "epoch": 0.14786,
+      "grad_norm": 1.0755374511587592,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 14786
+    },
+    {
+      "epoch": 0.14787,
+      "grad_norm": 1.2171324500253284,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 14787
+    },
+    {
+      "epoch": 0.14788,
+      "grad_norm": 1.0902739128743564,
+      "learning_rate": 0.003,
+      "loss": 3.9775,
+      "step": 14788
+    },
+    {
+      "epoch": 0.14789,
+      "grad_norm": 1.186812643568498,
+      "learning_rate": 0.003,
+      "loss": 4.0245,
+      "step": 14789
+    },
+    {
+      "epoch": 0.1479,
+      "grad_norm": 1.139087631217558,
+      "learning_rate": 0.003,
+      "loss": 4.008,
+      "step": 14790
+    },
+    {
+      "epoch": 0.14791,
+      "grad_norm": 1.1221619294261334,
+      "learning_rate": 0.003,
+      "loss": 4.0181,
+      "step": 14791
+    },
+    {
+      "epoch": 0.14792,
+      "grad_norm": 1.3475228213640233,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 14792
+    },
+    {
+      "epoch": 0.14793,
+      "grad_norm": 0.9394196299688248,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 14793
+    },
+    {
+      "epoch": 0.14794,
+      "grad_norm": 1.1896221994674032,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 14794
+    },
+    {
+      "epoch": 0.14795,
+      "grad_norm": 1.3234921820123722,
+      "learning_rate": 0.003,
+      "loss": 4.064,
+      "step": 14795
+    },
+    {
+      "epoch": 0.14796,
+      "grad_norm": 1.0543878759494105,
+      "learning_rate": 0.003,
+      "loss": 3.9824,
+      "step": 14796
+    },
+    {
+      "epoch": 0.14797,
+      "grad_norm": 1.122857036841386,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 14797
+    },
+    {
+      "epoch": 0.14798,
+      "grad_norm": 1.4259025619320995,
+      "learning_rate": 0.003,
+      "loss": 4.0355,
+      "step": 14798
+    },
+    {
+      "epoch": 0.14799,
+      "grad_norm": 0.9954471138847973,
+      "learning_rate": 0.003,
+      "loss": 4.0172,
+      "step": 14799
+    },
+    {
+      "epoch": 0.148,
+      "grad_norm": 1.3635514739065993,
+      "learning_rate": 0.003,
+      "loss": 4.0068,
+      "step": 14800
+    },
+    {
+      "epoch": 0.14801,
+      "grad_norm": 1.355864793729671,
+      "learning_rate": 0.003,
+      "loss": 4.0354,
+      "step": 14801
+    },
+    {
+      "epoch": 0.14802,
+      "grad_norm": 1.2951051954086201,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 14802
+    },
+    {
+      "epoch": 0.14803,
+      "grad_norm": 1.240531649391423,
+      "learning_rate": 0.003,
+      "loss": 4.0292,
+      "step": 14803
+    },
+    {
+      "epoch": 0.14804,
+      "grad_norm": 1.1882329146439534,
+      "learning_rate": 0.003,
+      "loss": 4.003,
+      "step": 14804
+    },
+    {
+      "epoch": 0.14805,
+      "grad_norm": 1.0839561759223866,
+      "learning_rate": 0.003,
+      "loss": 4.0069,
+      "step": 14805
+    },
+    {
+      "epoch": 0.14806,
+      "grad_norm": 1.1895202923837271,
+      "learning_rate": 0.003,
+      "loss": 3.9992,
+      "step": 14806
+    },
+    {
+      "epoch": 0.14807,
+      "grad_norm": 1.0984721627105687,
+      "learning_rate": 0.003,
+      "loss": 3.979,
+      "step": 14807
+    },
+    {
+      "epoch": 0.14808,
+      "grad_norm": 1.5108702288765057,
+      "learning_rate": 0.003,
+      "loss": 4.0419,
+      "step": 14808
+    },
+    {
+      "epoch": 0.14809,
+      "grad_norm": 1.0499726722241747,
+      "learning_rate": 0.003,
+      "loss": 3.9808,
+      "step": 14809
+    },
+    {
+      "epoch": 0.1481,
+      "grad_norm": 1.2681284049610142,
+      "learning_rate": 0.003,
+      "loss": 4.0212,
+      "step": 14810
+    },
+    {
+      "epoch": 0.14811,
+      "grad_norm": 1.1083564858942436,
+      "learning_rate": 0.003,
+      "loss": 4.0191,
+      "step": 14811
+    },
+    {
+      "epoch": 0.14812,
+      "grad_norm": 1.2076947134347684,
+      "learning_rate": 0.003,
+      "loss": 3.9659,
+      "step": 14812
+    },
+    {
+      "epoch": 0.14813,
+      "grad_norm": 1.307342413103559,
+      "learning_rate": 0.003,
+      "loss": 4.0308,
+      "step": 14813
+    },
+    {
+      "epoch": 0.14814,
+      "grad_norm": 1.1750548839920394,
+      "learning_rate": 0.003,
+      "loss": 4.007,
+      "step": 14814
+    },
+    {
+      "epoch": 0.14815,
+      "grad_norm": 1.1184260700584692,
+      "learning_rate": 0.003,
+      "loss": 4.0413,
+      "step": 14815
+    },
+    {
+      "epoch": 0.14816,
+      "grad_norm": 1.2485263669466287,
+      "learning_rate": 0.003,
+      "loss": 4.0239,
+      "step": 14816
+    },
+    {
+      "epoch": 0.14817,
+      "grad_norm": 1.1876209717504822,
+      "learning_rate": 0.003,
+      "loss": 4.021,
+      "step": 14817
+    },
+    {
+      "epoch": 0.14818,
+      "grad_norm": 1.4994157297009993,
+      "learning_rate": 0.003,
+      "loss": 3.9858,
+      "step": 14818
+    },
+    {
+      "epoch": 0.14819,
+      "grad_norm": 1.3463228432989796,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 14819
+    },
+    {
+      "epoch": 0.1482,
+      "grad_norm": 1.2097675025712753,
+      "learning_rate": 0.003,
+      "loss": 4.0022,
+      "step": 14820
+    },
+    {
+      "epoch": 0.14821,
+      "grad_norm": 1.2459321887205477,
+      "learning_rate": 0.003,
+      "loss": 3.9964,
+      "step": 14821
+    },
+    {
+      "epoch": 0.14822,
+      "grad_norm": 1.145568372480429,
+      "learning_rate": 0.003,
+      "loss": 3.9806,
+      "step": 14822
+    },
+    {
+      "epoch": 0.14823,
+      "grad_norm": 1.4011757991781326,
+      "learning_rate": 0.003,
+      "loss": 3.998,
+      "step": 14823
+    },
+    {
+      "epoch": 0.14824,
+      "grad_norm": 1.2026590669628292,
+      "learning_rate": 0.003,
+      "loss": 4.0409,
+      "step": 14824
+    },
+    {
+      "epoch": 0.14825,
+      "grad_norm": 1.298554081521772,
+      "learning_rate": 0.003,
+      "loss": 4.0135,
+      "step": 14825
+    },
+    {
+      "epoch": 0.14826,
+      "grad_norm": 1.1783814713327967,
+      "learning_rate": 0.003,
+      "loss": 4.0129,
+      "step": 14826
+    },
+    {
+      "epoch": 0.14827,
+      "grad_norm": 1.4774961433233849,
+      "learning_rate": 0.003,
+      "loss": 4.036,
+      "step": 14827
+    },
+    {
+      "epoch": 0.14828,
+      "grad_norm": 0.9330859051005298,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 14828
+    },
+    {
+      "epoch": 0.14829,
+      "grad_norm": 1.6178608281109528,
+      "learning_rate": 0.003,
+      "loss": 4.0258,
+      "step": 14829
+    },
+    {
+      "epoch": 0.1483,
+      "grad_norm": 0.9426351181235804,
+      "learning_rate": 0.003,
+      "loss": 3.9746,
+      "step": 14830
+    },
+    {
+      "epoch": 0.14831,
+      "grad_norm": 1.1842062208065405,
+      "learning_rate": 0.003,
+      "loss": 4.0309,
+      "step": 14831
+    },
+    {
+      "epoch": 0.14832,
+      "grad_norm": 1.168425062054937,
+      "learning_rate": 0.003,
+      "loss": 4.0366,
+      "step": 14832
+    },
+    {
+      "epoch": 0.14833,
+      "grad_norm": 1.221455526865599,
+      "learning_rate": 0.003,
+      "loss": 4.0284,
+      "step": 14833
+    },
+    {
+      "epoch": 0.14834,
+      "grad_norm": 1.0884537180758096,
+      "learning_rate": 0.003,
+      "loss": 4.0211,
+      "step": 14834
+    },
+    {
+      "epoch": 0.14835,
+      "grad_norm": 1.342286263334974,
+      "learning_rate": 0.003,
+      "loss": 4.0229,
+      "step": 14835
+    },
+    {
+      "epoch": 0.14836,
+      "grad_norm": 1.220178604711903,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 14836
+    },
+    {
+      "epoch": 0.14837,
+      "grad_norm": 1.2912726229956275,
+      "learning_rate": 0.003,
+      "loss": 4.025,
+      "step": 14837
+    },
+    {
+      "epoch": 0.14838,
+      "grad_norm": 1.0858951962638583,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 14838
+    },
+    {
+      "epoch": 0.14839,
+      "grad_norm": 1.3854215262077978,
+      "learning_rate": 0.003,
+      "loss": 4.0348,
+      "step": 14839
+    },
+    {
+      "epoch": 0.1484,
+      "grad_norm": 1.3035567966579602,
+      "learning_rate": 0.003,
+      "loss": 4.0042,
+      "step": 14840
+    },
+    {
+      "epoch": 0.14841,
+      "grad_norm": 1.026277131753848,
+      "learning_rate": 0.003,
+      "loss": 3.9825,
+      "step": 14841
+    },
+    {
+      "epoch": 0.14842,
+      "grad_norm": 1.3953361705547809,
+      "learning_rate": 0.003,
+      "loss": 4.015,
+      "step": 14842
+    },
+    {
+      "epoch": 0.14843,
+      "grad_norm": 1.1548116580406775,
+      "learning_rate": 0.003,
+      "loss": 4.015,
+      "step": 14843
+    },
+    {
+      "epoch": 0.14844,
+      "grad_norm": 1.1248324434636736,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 14844
+    },
+    {
+      "epoch": 0.14845,
+      "grad_norm": 1.3108243504310484,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 14845
+    },
+    {
+      "epoch": 0.14846,
+      "grad_norm": 1.2222485880266103,
+      "learning_rate": 0.003,
+      "loss": 3.9973,
+      "step": 14846
+    },
+    {
+      "epoch": 0.14847,
+      "grad_norm": 1.1495284914620727,
+      "learning_rate": 0.003,
+      "loss": 3.9967,
+      "step": 14847
+    },
+    {
+      "epoch": 0.14848,
+      "grad_norm": 1.2515593067163204,
+      "learning_rate": 0.003,
+      "loss": 4.0332,
+      "step": 14848
+    },
+    {
+      "epoch": 0.14849,
+      "grad_norm": 1.1804797578470736,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 14849
+    },
+    {
+      "epoch": 0.1485,
+      "grad_norm": 1.167497533650182,
+      "learning_rate": 0.003,
+      "loss": 3.9994,
+      "step": 14850
+    },
+    {
+      "epoch": 0.14851,
+      "grad_norm": 1.052082466564659,
+      "learning_rate": 0.003,
+      "loss": 3.9988,
+      "step": 14851
+    },
+    {
+      "epoch": 0.14852,
+      "grad_norm": 1.4757345167866347,
+      "learning_rate": 0.003,
+      "loss": 4.0049,
+      "step": 14852
+    },
+    {
+      "epoch": 0.14853,
+      "grad_norm": 1.1738947991619102,
+      "learning_rate": 0.003,
+      "loss": 4.0232,
+      "step": 14853
+    },
+    {
+      "epoch": 0.14854,
+      "grad_norm": 1.3960930732542705,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 14854
+    },
+    {
+      "epoch": 0.14855,
+      "grad_norm": 1.1977614959325054,
+      "learning_rate": 0.003,
+      "loss": 4.0369,
+      "step": 14855
+    },
+    {
+      "epoch": 0.14856,
+      "grad_norm": 1.2826098837508362,
+      "learning_rate": 0.003,
+      "loss": 4.0085,
+      "step": 14856
+    },
+    {
+      "epoch": 0.14857,
+      "grad_norm": 1.2771632194462115,
+      "learning_rate": 0.003,
+      "loss": 4.038,
+      "step": 14857
+    },
+    {
+      "epoch": 0.14858,
+      "grad_norm": 1.2649138401002105,
+      "learning_rate": 0.003,
+      "loss": 4.0244,
+      "step": 14858
+    },
+    {
+      "epoch": 0.14859,
+      "grad_norm": 1.2621133328507186,
+      "learning_rate": 0.003,
+      "loss": 4.0279,
+      "step": 14859
+    },
+    {
+      "epoch": 0.1486,
+      "grad_norm": 1.165204313150529,
+      "learning_rate": 0.003,
+      "loss": 4.0079,
+      "step": 14860
+    },
+    {
+      "epoch": 0.14861,
+      "grad_norm": 1.4595143374205735,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 14861
+    },
+    {
+      "epoch": 0.14862,
+      "grad_norm": 1.313118096897284,
+      "learning_rate": 0.003,
+      "loss": 3.9972,
+      "step": 14862
+    },
+    {
+      "epoch": 0.14863,
+      "grad_norm": 0.9960033788648959,
+      "learning_rate": 0.003,
+      "loss": 4.0187,
+      "step": 14863
+    },
+    {
+      "epoch": 0.14864,
+      "grad_norm": 1.3999675969687877,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 14864
+    },
+    {
+      "epoch": 0.14865,
+      "grad_norm": 0.971526221637175,
+      "learning_rate": 0.003,
+      "loss": 4.0418,
+      "step": 14865
+    },
+    {
+      "epoch": 0.14866,
+      "grad_norm": 1.5885001965903711,
+      "learning_rate": 0.003,
+      "loss": 4.0134,
+      "step": 14866
+    },
+    {
+      "epoch": 0.14867,
+      "grad_norm": 0.8794496752242735,
+      "learning_rate": 0.003,
+      "loss": 3.9882,
+      "step": 14867
+    },
+    {
+      "epoch": 0.14868,
+      "grad_norm": 1.1457341472984988,
+      "learning_rate": 0.003,
+      "loss": 4.0144,
+      "step": 14868
+    },
+    {
+      "epoch": 0.14869,
+      "grad_norm": 1.2460462196880944,
+      "learning_rate": 0.003,
+      "loss": 4.0287,
+      "step": 14869
+    },
+    {
+      "epoch": 0.1487,
+      "grad_norm": 1.3004944633604691,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 14870
+    },
+    {
+      "epoch": 0.14871,
+      "grad_norm": 1.0737833889267785,
+      "learning_rate": 0.003,
+      "loss": 4.0286,
+      "step": 14871
+    },
+    {
+      "epoch": 0.14872,
+      "grad_norm": 1.1905628483988502,
+      "learning_rate": 0.003,
+      "loss": 4.0267,
+      "step": 14872
+    },
+    {
+      "epoch": 0.14873,
+      "grad_norm": 1.2150252779319972,
+      "learning_rate": 0.003,
+      "loss": 4.0369,
+      "step": 14873
+    },
+    {
+      "epoch": 0.14874,
+      "grad_norm": 1.413716137598804,
+      "learning_rate": 0.003,
+      "loss": 3.9921,
+      "step": 14874
+    },
+    {
+      "epoch": 0.14875,
+      "grad_norm": 1.2319027017276702,
+      "learning_rate": 0.003,
+      "loss": 4.0255,
+      "step": 14875
+    },
+    {
+      "epoch": 0.14876,
+      "grad_norm": 1.1590811670007173,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 14876
+    },
+    {
+      "epoch": 0.14877,
+      "grad_norm": 1.1502595888784863,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 14877
+    },
+    {
+      "epoch": 0.14878,
+      "grad_norm": 1.135405812618134,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 14878
+    },
+    {
+      "epoch": 0.14879,
+      "grad_norm": 0.9817877807968799,
+      "learning_rate": 0.003,
+      "loss": 4.0352,
+      "step": 14879
+    },
+    {
+      "epoch": 0.1488,
+      "grad_norm": 1.1457316336220291,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 14880
+    },
+    {
+      "epoch": 0.14881,
+      "grad_norm": 1.120078496043829,
+      "learning_rate": 0.003,
+      "loss": 4.0064,
+      "step": 14881
+    },
+    {
+      "epoch": 0.14882,
+      "grad_norm": 1.3998521551348393,
+      "learning_rate": 0.003,
+      "loss": 4.0362,
+      "step": 14882
+    },
+    {
+      "epoch": 0.14883,
+      "grad_norm": 1.2585695115298041,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 14883
+    },
+    {
+      "epoch": 0.14884,
+      "grad_norm": 1.1596322530573377,
+      "learning_rate": 0.003,
+      "loss": 4.0144,
+      "step": 14884
+    },
+    {
+      "epoch": 0.14885,
+      "grad_norm": 1.2900109792885086,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 14885
+    },
+    {
+      "epoch": 0.14886,
+      "grad_norm": 1.0941650068530546,
+      "learning_rate": 0.003,
+      "loss": 3.9952,
+      "step": 14886
+    },
+    {
+      "epoch": 0.14887,
+      "grad_norm": 1.2701767655262914,
+      "learning_rate": 0.003,
+      "loss": 4.0171,
+      "step": 14887
+    },
+    {
+      "epoch": 0.14888,
+      "grad_norm": 1.3785028945271498,
+      "learning_rate": 0.003,
+      "loss": 4.0521,
+      "step": 14888
+    },
+    {
+      "epoch": 0.14889,
+      "grad_norm": 1.1640558071841207,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 14889
+    },
+    {
+      "epoch": 0.1489,
+      "grad_norm": 1.2350016360468847,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 14890
+    },
+    {
+      "epoch": 0.14891,
+      "grad_norm": 1.1909812740009393,
+      "learning_rate": 0.003,
+      "loss": 3.9948,
+      "step": 14891
+    },
+    {
+      "epoch": 0.14892,
+      "grad_norm": 1.245236229330982,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 14892
+    },
+    {
+      "epoch": 0.14893,
+      "grad_norm": 1.0319952693329486,
+      "learning_rate": 0.003,
+      "loss": 3.9997,
+      "step": 14893
+    },
+    {
+      "epoch": 0.14894,
+      "grad_norm": 1.404395060703985,
+      "learning_rate": 0.003,
+      "loss": 4.0325,
+      "step": 14894
+    },
+    {
+      "epoch": 0.14895,
+      "grad_norm": 1.3312321228312276,
+      "learning_rate": 0.003,
+      "loss": 4.0418,
+      "step": 14895
+    },
+    {
+      "epoch": 0.14896,
+      "grad_norm": 1.4031247593198042,
+      "learning_rate": 0.003,
+      "loss": 4.0224,
+      "step": 14896
+    },
+    {
+      "epoch": 0.14897,
+      "grad_norm": 0.9251746593633249,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 14897
+    },
+    {
+      "epoch": 0.14898,
+      "grad_norm": 1.1089796838097807,
+      "learning_rate": 0.003,
+      "loss": 4.0122,
+      "step": 14898
+    },
+    {
+      "epoch": 0.14899,
+      "grad_norm": 1.1160083080503167,
+      "learning_rate": 0.003,
+      "loss": 4.0074,
+      "step": 14899
+    },
+    {
+      "epoch": 0.149,
+      "grad_norm": 1.3488201450168495,
+      "learning_rate": 0.003,
+      "loss": 4.02,
+      "step": 14900
+    },
+    {
+      "epoch": 0.14901,
+      "grad_norm": 1.4817746969596457,
+      "learning_rate": 0.003,
+      "loss": 4.0312,
+      "step": 14901
+    },
+    {
+      "epoch": 0.14902,
+      "grad_norm": 0.9974601034053736,
+      "learning_rate": 0.003,
+      "loss": 4.0276,
+      "step": 14902
+    },
+    {
+      "epoch": 0.14903,
+      "grad_norm": 1.5739684402092466,
+      "learning_rate": 0.003,
+      "loss": 4.0325,
+      "step": 14903
+    },
+    {
+      "epoch": 0.14904,
+      "grad_norm": 1.2032902211761658,
+      "learning_rate": 0.003,
+      "loss": 4.0447,
+      "step": 14904
+    },
+    {
+      "epoch": 0.14905,
+      "grad_norm": 1.1779051842351325,
+      "learning_rate": 0.003,
+      "loss": 4.0181,
+      "step": 14905
+    },
+    {
+      "epoch": 0.14906,
+      "grad_norm": 1.304753964643637,
+      "learning_rate": 0.003,
+      "loss": 4.0216,
+      "step": 14906
+    },
+    {
+      "epoch": 0.14907,
+      "grad_norm": 1.1017996705146436,
+      "learning_rate": 0.003,
+      "loss": 4.0234,
+      "step": 14907
+    },
+    {
+      "epoch": 0.14908,
+      "grad_norm": 1.413055102243228,
+      "learning_rate": 0.003,
+      "loss": 4.0435,
+      "step": 14908
+    },
+    {
+      "epoch": 0.14909,
+      "grad_norm": 1.1993242494244376,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 14909
+    },
+    {
+      "epoch": 0.1491,
+      "grad_norm": 1.3960283841416354,
+      "learning_rate": 0.003,
+      "loss": 4.0273,
+      "step": 14910
+    },
+    {
+      "epoch": 0.14911,
+      "grad_norm": 0.9131043857587414,
+      "learning_rate": 0.003,
+      "loss": 4.0353,
+      "step": 14911
+    },
+    {
+      "epoch": 0.14912,
+      "grad_norm": 1.251331191866348,
+      "learning_rate": 0.003,
+      "loss": 4.035,
+      "step": 14912
+    },
+    {
+      "epoch": 0.14913,
+      "grad_norm": 1.2244501109261372,
+      "learning_rate": 0.003,
+      "loss": 4.0002,
+      "step": 14913
+    },
+    {
+      "epoch": 0.14914,
+      "grad_norm": 1.2586212474739633,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 14914
+    },
+    {
+      "epoch": 0.14915,
+      "grad_norm": 1.0999447784539422,
+      "learning_rate": 0.003,
+      "loss": 4.0135,
+      "step": 14915
+    },
+    {
+      "epoch": 0.14916,
+      "grad_norm": 1.5530642038910838,
+      "learning_rate": 0.003,
+      "loss": 4.0377,
+      "step": 14916
+    },
+    {
+      "epoch": 0.14917,
+      "grad_norm": 1.0477200805793776,
+      "learning_rate": 0.003,
+      "loss": 4.0466,
+      "step": 14917
+    },
+    {
+      "epoch": 0.14918,
+      "grad_norm": 1.4866370403962696,
+      "learning_rate": 0.003,
+      "loss": 4.0297,
+      "step": 14918
+    },
+    {
+      "epoch": 0.14919,
+      "grad_norm": 1.1401361636789986,
+      "learning_rate": 0.003,
+      "loss": 4.0442,
+      "step": 14919
+    },
+    {
+      "epoch": 0.1492,
+      "grad_norm": 1.3919223969013914,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 14920
+    },
+    {
+      "epoch": 0.14921,
+      "grad_norm": 1.2378662180360431,
+      "learning_rate": 0.003,
+      "loss": 4.0338,
+      "step": 14921
+    },
+    {
+      "epoch": 0.14922,
+      "grad_norm": 1.3964563554545333,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 14922
+    },
+    {
+      "epoch": 0.14923,
+      "grad_norm": 1.3434961179490488,
+      "learning_rate": 0.003,
+      "loss": 3.9947,
+      "step": 14923
+    },
+    {
+      "epoch": 0.14924,
+      "grad_norm": 1.1174456729435764,
+      "learning_rate": 0.003,
+      "loss": 3.9989,
+      "step": 14924
+    },
+    {
+      "epoch": 0.14925,
+      "grad_norm": 1.3257139886781153,
+      "learning_rate": 0.003,
+      "loss": 4.0142,
+      "step": 14925
+    },
+    {
+      "epoch": 0.14926,
+      "grad_norm": 1.1451098945090743,
+      "learning_rate": 0.003,
+      "loss": 4.0105,
+      "step": 14926
+    },
+    {
+      "epoch": 0.14927,
+      "grad_norm": 1.460942215878829,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 14927
+    },
+    {
+      "epoch": 0.14928,
+      "grad_norm": 1.0171135357041543,
+      "learning_rate": 0.003,
+      "loss": 4.0404,
+      "step": 14928
+    },
+    {
+      "epoch": 0.14929,
+      "grad_norm": 1.2490868086048281,
+      "learning_rate": 0.003,
+      "loss": 4.0236,
+      "step": 14929
+    },
+    {
+      "epoch": 0.1493,
+      "grad_norm": 0.9820383576630475,
+      "learning_rate": 0.003,
+      "loss": 4.0078,
+      "step": 14930
+    },
+    {
+      "epoch": 0.14931,
+      "grad_norm": 1.4240111064662544,
+      "learning_rate": 0.003,
+      "loss": 4.016,
+      "step": 14931
+    },
+    {
+      "epoch": 0.14932,
+      "grad_norm": 1.0312709532553408,
+      "learning_rate": 0.003,
+      "loss": 4.0196,
+      "step": 14932
+    },
+    {
+      "epoch": 0.14933,
+      "grad_norm": 1.3033329876881157,
+      "learning_rate": 0.003,
+      "loss": 4.0086,
+      "step": 14933
+    },
+    {
+      "epoch": 0.14934,
+      "grad_norm": 1.1022488175504013,
+      "learning_rate": 0.003,
+      "loss": 4.0301,
+      "step": 14934
+    },
+    {
+      "epoch": 0.14935,
+      "grad_norm": 1.259008325842684,
+      "learning_rate": 0.003,
+      "loss": 4.0213,
+      "step": 14935
+    },
+    {
+      "epoch": 0.14936,
+      "grad_norm": 1.2953376945757966,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 14936
+    },
+    {
+      "epoch": 0.14937,
+      "grad_norm": 0.8954211650418383,
+      "learning_rate": 0.003,
+      "loss": 4.0047,
+      "step": 14937
+    },
+    {
+      "epoch": 0.14938,
+      "grad_norm": 1.108314327570641,
+      "learning_rate": 0.003,
+      "loss": 4.0019,
+      "step": 14938
+    },
+    {
+      "epoch": 0.14939,
+      "grad_norm": 1.5431478241610768,
+      "learning_rate": 0.003,
+      "loss": 4.0051,
+      "step": 14939
+    },
+    {
+      "epoch": 0.1494,
+      "grad_norm": 0.9261299070131537,
+      "learning_rate": 0.003,
+      "loss": 4.0128,
+      "step": 14940
+    },
+    {
+      "epoch": 0.14941,
+      "grad_norm": 1.3487899537436916,
+      "learning_rate": 0.003,
+      "loss": 4.0606,
+      "step": 14941
+    },
+    {
+      "epoch": 0.14942,
+      "grad_norm": 1.0720868304846356,
+      "learning_rate": 0.003,
+      "loss": 3.997,
+      "step": 14942
+    },
+    {
+      "epoch": 0.14943,
+      "grad_norm": 1.1356899271035579,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 14943
+    },
+    {
+      "epoch": 0.14944,
+      "grad_norm": 1.159036689495631,
+      "learning_rate": 0.003,
+      "loss": 4.0064,
+      "step": 14944
+    },
+    {
+      "epoch": 0.14945,
+      "grad_norm": 1.294910603968105,
+      "learning_rate": 0.003,
+      "loss": 3.9945,
+      "step": 14945
+    },
+    {
+      "epoch": 0.14946,
+      "grad_norm": 1.2310211532261401,
+      "learning_rate": 0.003,
+      "loss": 4.0276,
+      "step": 14946
+    },
+    {
+      "epoch": 0.14947,
+      "grad_norm": 1.147713769303728,
+      "learning_rate": 0.003,
+      "loss": 4.0121,
+      "step": 14947
+    },
+    {
+      "epoch": 0.14948,
+      "grad_norm": 1.230167779841433,
+      "learning_rate": 0.003,
+      "loss": 4.0163,
+      "step": 14948
+    },
+    {
+      "epoch": 0.14949,
+      "grad_norm": 1.2306050786117049,
+      "learning_rate": 0.003,
+      "loss": 4.0072,
+      "step": 14949
+    },
+    {
+      "epoch": 0.1495,
+      "grad_norm": 1.3135866891956622,
+      "learning_rate": 0.003,
+      "loss": 4.0315,
+      "step": 14950
+    },
+    {
+      "epoch": 0.14951,
+      "grad_norm": 1.3623435968877635,
+      "learning_rate": 0.003,
+      "loss": 3.9811,
+      "step": 14951
+    },
+    {
+      "epoch": 0.14952,
+      "grad_norm": 1.152662938484965,
+      "learning_rate": 0.003,
+      "loss": 4.0123,
+      "step": 14952
+    },
+    {
+      "epoch": 0.14953,
+      "grad_norm": 1.289088875479219,
+      "learning_rate": 0.003,
+      "loss": 4.0149,
+      "step": 14953
+    },
+    {
+      "epoch": 0.14954,
+      "grad_norm": 1.1215923060881257,
+      "learning_rate": 0.003,
+      "loss": 4.0477,
+      "step": 14954
+    },
+    {
+      "epoch": 0.14955,
+      "grad_norm": 1.3221633165675728,
+      "learning_rate": 0.003,
+      "loss": 3.9856,
+      "step": 14955
+    },
+    {
+      "epoch": 0.14956,
+      "grad_norm": 0.9652269514638793,
+      "learning_rate": 0.003,
+      "loss": 3.997,
+      "step": 14956
+    },
+    {
+      "epoch": 0.14957,
+      "grad_norm": 1.2836725479916864,
+      "learning_rate": 0.003,
+      "loss": 3.9956,
+      "step": 14957
+    },
+    {
+      "epoch": 0.14958,
+      "grad_norm": 1.222776077178544,
+      "learning_rate": 0.003,
+      "loss": 4.0406,
+      "step": 14958
+    },
+    {
+      "epoch": 0.14959,
+      "grad_norm": 1.3037456196499888,
+      "learning_rate": 0.003,
+      "loss": 3.9816,
+      "step": 14959
+    },
+    {
+      "epoch": 0.1496,
+      "grad_norm": 1.2338554444148002,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 14960
+    },
+    {
+      "epoch": 0.14961,
+      "grad_norm": 1.057582098078062,
+      "learning_rate": 0.003,
+      "loss": 4.0059,
+      "step": 14961
+    },
+    {
+      "epoch": 0.14962,
+      "grad_norm": 1.2566371433084496,
+      "learning_rate": 0.003,
+      "loss": 4.0024,
+      "step": 14962
+    },
+    {
+      "epoch": 0.14963,
+      "grad_norm": 1.2417135314919132,
+      "learning_rate": 0.003,
+      "loss": 4.0116,
+      "step": 14963
+    },
+    {
+      "epoch": 0.14964,
+      "grad_norm": 1.240855453420091,
+      "learning_rate": 0.003,
+      "loss": 4.0063,
+      "step": 14964
+    },
+    {
+      "epoch": 0.14965,
+      "grad_norm": 1.2042763426088086,
+      "learning_rate": 0.003,
+      "loss": 4.0041,
+      "step": 14965
+    },
+    {
+      "epoch": 0.14966,
+      "grad_norm": 1.4578607611652201,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 14966
+    },
+    {
+      "epoch": 0.14967,
+      "grad_norm": 1.173609245954742,
+      "learning_rate": 0.003,
+      "loss": 4.039,
+      "step": 14967
+    },
+    {
+      "epoch": 0.14968,
+      "grad_norm": 1.0947873844270783,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 14968
+    },
+    {
+      "epoch": 0.14969,
+      "grad_norm": 1.3683915784044167,
+      "learning_rate": 0.003,
+      "loss": 4.0292,
+      "step": 14969
+    },
+    {
+      "epoch": 0.1497,
+      "grad_norm": 1.2199880471450995,
+      "learning_rate": 0.003,
+      "loss": 4.0291,
+      "step": 14970
+    },
+    {
+      "epoch": 0.14971,
+      "grad_norm": 1.309851887266059,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 14971
+    },
+    {
+      "epoch": 0.14972,
+      "grad_norm": 1.1758886914043591,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 14972
+    },
+    {
+      "epoch": 0.14973,
+      "grad_norm": 1.3609476281993964,
+      "learning_rate": 0.003,
+      "loss": 3.9746,
+      "step": 14973
+    },
+    {
+      "epoch": 0.14974,
+      "grad_norm": 0.9816117678678208,
+      "learning_rate": 0.003,
+      "loss": 3.9968,
+      "step": 14974
+    },
+    {
+      "epoch": 0.14975,
+      "grad_norm": 1.4053278904134376,
+      "learning_rate": 0.003,
+      "loss": 4.0283,
+      "step": 14975
+    },
+    {
+      "epoch": 0.14976,
+      "grad_norm": 1.3177315280510327,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 14976
+    },
+    {
+      "epoch": 0.14977,
+      "grad_norm": 1.321177287660058,
+      "learning_rate": 0.003,
+      "loss": 4.0388,
+      "step": 14977
+    },
+    {
+      "epoch": 0.14978,
+      "grad_norm": 1.1995771594106088,
+      "learning_rate": 0.003,
+      "loss": 4.0234,
+      "step": 14978
+    },
+    {
+      "epoch": 0.14979,
+      "grad_norm": 1.1852421784022913,
+      "learning_rate": 0.003,
+      "loss": 4.0149,
+      "step": 14979
+    },
+    {
+      "epoch": 0.1498,
+      "grad_norm": 1.0344875601882786,
+      "learning_rate": 0.003,
+      "loss": 4.0064,
+      "step": 14980
+    },
+    {
+      "epoch": 0.14981,
+      "grad_norm": 1.218236211138091,
+      "learning_rate": 0.003,
+      "loss": 4.0295,
+      "step": 14981
+    },
+    {
+      "epoch": 0.14982,
+      "grad_norm": 1.0531872899673196,
+      "learning_rate": 0.003,
+      "loss": 4.0099,
+      "step": 14982
+    },
+    {
+      "epoch": 0.14983,
+      "grad_norm": 1.3701361125306657,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 14983
+    },
+    {
+      "epoch": 0.14984,
+      "grad_norm": 1.1561564936517743,
+      "learning_rate": 0.003,
+      "loss": 4.0002,
+      "step": 14984
+    },
+    {
+      "epoch": 0.14985,
+      "grad_norm": 1.2203615274256243,
+      "learning_rate": 0.003,
+      "loss": 4.0214,
+      "step": 14985
+    },
+    {
+      "epoch": 0.14986,
+      "grad_norm": 1.3589539058043363,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 14986
+    },
+    {
+      "epoch": 0.14987,
+      "grad_norm": 1.007262450095289,
+      "learning_rate": 0.003,
+      "loss": 4.0025,
+      "step": 14987
+    },
+    {
+      "epoch": 0.14988,
+      "grad_norm": 1.4869886265733574,
+      "learning_rate": 0.003,
+      "loss": 4.002,
+      "step": 14988
+    },
+    {
+      "epoch": 0.14989,
+      "grad_norm": 1.0009057794425513,
+      "learning_rate": 0.003,
+      "loss": 4.0162,
+      "step": 14989
+    },
+    {
+      "epoch": 0.1499,
+      "grad_norm": 1.389189092010671,
+      "learning_rate": 0.003,
+      "loss": 4.0144,
+      "step": 14990
+    },
+    {
+      "epoch": 0.14991,
+      "grad_norm": 1.152524627524547,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 14991
+    },
+    {
+      "epoch": 0.14992,
+      "grad_norm": 1.1099450346476132,
+      "learning_rate": 0.003,
+      "loss": 3.9982,
+      "step": 14992
+    },
+    {
+      "epoch": 0.14993,
+      "grad_norm": 1.3414137567732916,
+      "learning_rate": 0.003,
+      "loss": 4.008,
+      "step": 14993
+    },
+    {
+      "epoch": 0.14994,
+      "grad_norm": 1.1093458689260562,
+      "learning_rate": 0.003,
+      "loss": 4.0232,
+      "step": 14994
+    },
+    {
+      "epoch": 0.14995,
+      "grad_norm": 1.3059990531641175,
+      "learning_rate": 0.003,
+      "loss": 4.0272,
+      "step": 14995
+    },
+    {
+      "epoch": 0.14996,
+      "grad_norm": 1.2710449589157764,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 14996
+    },
+    {
+      "epoch": 0.14997,
+      "grad_norm": 1.324001036249548,
+      "learning_rate": 0.003,
+      "loss": 4.0342,
+      "step": 14997
+    },
+    {
+      "epoch": 0.14998,
+      "grad_norm": 1.5132413834470855,
+      "learning_rate": 0.003,
+      "loss": 4.0173,
+      "step": 14998
+    },
+    {
+      "epoch": 0.14999,
+      "grad_norm": 1.1022008116316366,
+      "learning_rate": 0.003,
+      "loss": 4.0397,
+      "step": 14999
+    },
+    {
+      "epoch": 0.15,
+      "grad_norm": 1.2511547133707417,
+      "learning_rate": 0.003,
+      "loss": 4.0252,
+      "step": 15000
+    },
+    {
+      "epoch": 0.15001,
+      "grad_norm": 1.0834107812897231,
+      "learning_rate": 0.003,
+      "loss": 4.0086,
+      "step": 15001
+    },
+    {
+      "epoch": 0.15002,
+      "grad_norm": 1.4583003026965216,
+      "learning_rate": 0.003,
+      "loss": 3.9953,
+      "step": 15002
+    },
+    {
+      "epoch": 0.15003,
+      "grad_norm": 1.0164623980748966,
+      "learning_rate": 0.003,
+      "loss": 4.0324,
+      "step": 15003
+    },
+    {
+      "epoch": 0.15004,
+      "grad_norm": 1.343262825216341,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 15004
+    },
+    {
+      "epoch": 0.15005,
+      "grad_norm": 1.1370870689830004,
+      "learning_rate": 0.003,
+      "loss": 4.0439,
+      "step": 15005
+    },
+    {
+      "epoch": 0.15006,
+      "grad_norm": 1.3223233899220281,
+      "learning_rate": 0.003,
+      "loss": 4.0463,
+      "step": 15006
+    },
+    {
+      "epoch": 0.15007,
+      "grad_norm": 1.0370331143992404,
+      "learning_rate": 0.003,
+      "loss": 4.0442,
+      "step": 15007
+    },
+    {
+      "epoch": 0.15008,
+      "grad_norm": 1.4445294117832153,
+      "learning_rate": 0.003,
+      "loss": 4.0046,
+      "step": 15008
+    },
+    {
+      "epoch": 0.15009,
+      "grad_norm": 1.3518029276829822,
+      "learning_rate": 0.003,
+      "loss": 3.9951,
+      "step": 15009
+    },
+    {
+      "epoch": 0.1501,
+      "grad_norm": 1.2309957046300406,
+      "learning_rate": 0.003,
+      "loss": 4.0016,
+      "step": 15010
+    },
+    {
+      "epoch": 0.15011,
+      "grad_norm": 1.27589356901691,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 15011
+    },
+    {
+      "epoch": 0.15012,
+      "grad_norm": 1.2144644609278672,
+      "learning_rate": 0.003,
+      "loss": 3.9975,
+      "step": 15012
+    },
+    {
+      "epoch": 0.15013,
+      "grad_norm": 1.1501449654053362,
+      "learning_rate": 0.003,
+      "loss": 4.0251,
+      "step": 15013
+    },
+    {
+      "epoch": 0.15014,
+      "grad_norm": 1.6288277390725783,
+      "learning_rate": 0.003,
+      "loss": 4.004,
+      "step": 15014
+    },
+    {
+      "epoch": 0.15015,
+      "grad_norm": 0.9856979647827699,
+      "learning_rate": 0.003,
+      "loss": 4.0059,
+      "step": 15015
+    },
+    {
+      "epoch": 0.15016,
+      "grad_norm": 1.4520037698240609,
+      "learning_rate": 0.003,
+      "loss": 4.0021,
+      "step": 15016
+    },
+    {
+      "epoch": 0.15017,
+      "grad_norm": 1.040970103974602,
+      "learning_rate": 0.003,
+      "loss": 4.0198,
+      "step": 15017
+    },
+    {
+      "epoch": 0.15018,
+      "grad_norm": 1.2634763102058675,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 15018
+    },
+    {
+      "epoch": 0.15019,
+      "grad_norm": 1.0772080110580602,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 15019
+    },
+    {
+      "epoch": 0.1502,
+      "grad_norm": 1.2687453816983203,
+      "learning_rate": 0.003,
+      "loss": 4.0122,
+      "step": 15020
+    },
+    {
+      "epoch": 0.15021,
+      "grad_norm": 1.2614304451140905,
+      "learning_rate": 0.003,
+      "loss": 4.0267,
+      "step": 15021
+    },
+    {
+      "epoch": 0.15022,
+      "grad_norm": 1.176662629932817,
+      "learning_rate": 0.003,
+      "loss": 4.0381,
+      "step": 15022
+    },
+    {
+      "epoch": 0.15023,
+      "grad_norm": 1.3364690597724231,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 15023
+    },
+    {
+      "epoch": 0.15024,
+      "grad_norm": 0.9801971142283072,
+      "learning_rate": 0.003,
+      "loss": 3.9926,
+      "step": 15024
+    },
+    {
+      "epoch": 0.15025,
+      "grad_norm": 1.290867464177766,
+      "learning_rate": 0.003,
+      "loss": 3.9984,
+      "step": 15025
+    },
+    {
+      "epoch": 0.15026,
+      "grad_norm": 1.5256383410336505,
+      "learning_rate": 0.003,
+      "loss": 4.0001,
+      "step": 15026
+    },
+    {
+      "epoch": 0.15027,
+      "grad_norm": 0.9554157287977645,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 15027
+    },
+    {
+      "epoch": 0.15028,
+      "grad_norm": 1.2344367686432,
+      "learning_rate": 0.003,
+      "loss": 4.0329,
+      "step": 15028
+    },
+    {
+      "epoch": 0.15029,
+      "grad_norm": 1.058874736871547,
+      "learning_rate": 0.003,
+      "loss": 3.9981,
+      "step": 15029
+    },
+    {
+      "epoch": 0.1503,
+      "grad_norm": 1.4628155784648897,
+      "learning_rate": 0.003,
+      "loss": 3.986,
+      "step": 15030
+    },
+    {
+      "epoch": 0.15031,
+      "grad_norm": 1.000440083099698,
+      "learning_rate": 0.003,
+      "loss": 4.0056,
+      "step": 15031
+    },
+    {
+      "epoch": 0.15032,
+      "grad_norm": 1.6221985468900189,
+      "learning_rate": 0.003,
+      "loss": 4.0049,
+      "step": 15032
+    },
+    {
+      "epoch": 0.15033,
+      "grad_norm": 1.0593767482313574,
+      "learning_rate": 0.003,
+      "loss": 4.0111,
+      "step": 15033
+    },
+    {
+      "epoch": 0.15034,
+      "grad_norm": 1.3033259842482872,
+      "learning_rate": 0.003,
+      "loss": 4.0301,
+      "step": 15034
+    },
+    {
+      "epoch": 0.15035,
+      "grad_norm": 1.271582243810947,
+      "learning_rate": 0.003,
+      "loss": 4.0211,
+      "step": 15035
+    },
+    {
+      "epoch": 0.15036,
+      "grad_norm": 1.2504946374764305,
+      "learning_rate": 0.003,
+      "loss": 4.0212,
+      "step": 15036
+    },
+    {
+      "epoch": 0.15037,
+      "grad_norm": 1.1392408868055681,
+      "learning_rate": 0.003,
+      "loss": 4.0491,
+      "step": 15037
+    },
+    {
+      "epoch": 0.15038,
+      "grad_norm": 1.2827374034740893,
+      "learning_rate": 0.003,
+      "loss": 4.0384,
+      "step": 15038
+    },
+    {
+      "epoch": 0.15039,
+      "grad_norm": 1.097446245620214,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 15039
+    },
+    {
+      "epoch": 0.1504,
+      "grad_norm": 1.3790289652472876,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 15040
+    },
+    {
+      "epoch": 0.15041,
+      "grad_norm": 1.2343541743317246,
+      "learning_rate": 0.003,
+      "loss": 4.061,
+      "step": 15041
+    },
+    {
+      "epoch": 0.15042,
+      "grad_norm": 1.3551423323541314,
+      "learning_rate": 0.003,
+      "loss": 3.9966,
+      "step": 15042
+    },
+    {
+      "epoch": 0.15043,
+      "grad_norm": 1.0136000020055114,
+      "learning_rate": 0.003,
+      "loss": 4.0244,
+      "step": 15043
+    },
+    {
+      "epoch": 0.15044,
+      "grad_norm": 1.386402889011217,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 15044
+    },
+    {
+      "epoch": 0.15045,
+      "grad_norm": 1.182671672958449,
+      "learning_rate": 0.003,
+      "loss": 4.0297,
+      "step": 15045
+    },
+    {
+      "epoch": 0.15046,
+      "grad_norm": 1.1943280336924613,
+      "learning_rate": 0.003,
+      "loss": 4.0339,
+      "step": 15046
+    },
+    {
+      "epoch": 0.15047,
+      "grad_norm": 1.067087083299192,
+      "learning_rate": 0.003,
+      "loss": 4.0036,
+      "step": 15047
+    },
+    {
+      "epoch": 0.15048,
+      "grad_norm": 1.2101321504804678,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 15048
+    },
+    {
+      "epoch": 0.15049,
+      "grad_norm": 1.1918369767211583,
+      "learning_rate": 0.003,
+      "loss": 4.0421,
+      "step": 15049
+    },
+    {
+      "epoch": 0.1505,
+      "grad_norm": 1.1865037174439503,
+      "learning_rate": 0.003,
+      "loss": 4.0294,
+      "step": 15050
+    },
+    {
+      "epoch": 0.15051,
+      "grad_norm": 1.307294713323573,
+      "learning_rate": 0.003,
+      "loss": 3.9911,
+      "step": 15051
+    },
+    {
+      "epoch": 0.15052,
+      "grad_norm": 1.0843302266527004,
+      "learning_rate": 0.003,
+      "loss": 4.0334,
+      "step": 15052
+    },
+    {
+      "epoch": 0.15053,
+      "grad_norm": 1.368616511764391,
+      "learning_rate": 0.003,
+      "loss": 4.0388,
+      "step": 15053
+    },
+    {
+      "epoch": 0.15054,
+      "grad_norm": 1.1503707138668346,
+      "learning_rate": 0.003,
+      "loss": 4.0286,
+      "step": 15054
+    },
+    {
+      "epoch": 0.15055,
+      "grad_norm": 1.24232930332824,
+      "learning_rate": 0.003,
+      "loss": 4.0126,
+      "step": 15055
+    },
+    {
+      "epoch": 0.15056,
+      "grad_norm": 1.2210548505793515,
+      "learning_rate": 0.003,
+      "loss": 3.9721,
+      "step": 15056
+    },
+    {
+      "epoch": 0.15057,
+      "grad_norm": 1.2804783401997903,
+      "learning_rate": 0.003,
+      "loss": 4.0211,
+      "step": 15057
+    },
+    {
+      "epoch": 0.15058,
+      "grad_norm": 1.1121947561924939,
+      "learning_rate": 0.003,
+      "loss": 4.0373,
+      "step": 15058
+    },
+    {
+      "epoch": 0.15059,
+      "grad_norm": 1.2666290669327305,
+      "learning_rate": 0.003,
+      "loss": 4.0064,
+      "step": 15059
+    },
+    {
+      "epoch": 0.1506,
+      "grad_norm": 1.1033006586389937,
+      "learning_rate": 0.003,
+      "loss": 3.9978,
+      "step": 15060
+    },
+    {
+      "epoch": 0.15061,
+      "grad_norm": 1.3958301674075322,
+      "learning_rate": 0.003,
+      "loss": 4.0405,
+      "step": 15061
+    },
+    {
+      "epoch": 0.15062,
+      "grad_norm": 1.1571985815771526,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 15062
+    },
+    {
+      "epoch": 0.15063,
+      "grad_norm": 1.1693679449700813,
+      "learning_rate": 0.003,
+      "loss": 4.0304,
+      "step": 15063
+    },
+    {
+      "epoch": 0.15064,
+      "grad_norm": 1.479241432093687,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 15064
+    },
+    {
+      "epoch": 0.15065,
+      "grad_norm": 1.2611691539565617,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 15065
+    },
+    {
+      "epoch": 0.15066,
+      "grad_norm": 1.354519508848747,
+      "learning_rate": 0.003,
+      "loss": 4.0413,
+      "step": 15066
+    },
+    {
+      "epoch": 0.15067,
+      "grad_norm": 1.1264546163898423,
+      "learning_rate": 0.003,
+      "loss": 4.0481,
+      "step": 15067
+    },
+    {
+      "epoch": 0.15068,
+      "grad_norm": 1.0276693408258029,
+      "learning_rate": 0.003,
+      "loss": 4.0079,
+      "step": 15068
+    },
+    {
+      "epoch": 0.15069,
+      "grad_norm": 1.2274172626033997,
+      "learning_rate": 0.003,
+      "loss": 4.0169,
+      "step": 15069
+    },
+    {
+      "epoch": 0.1507,
+      "grad_norm": 1.104144935418708,
+      "learning_rate": 0.003,
+      "loss": 3.9937,
+      "step": 15070
+    },
+    {
+      "epoch": 0.15071,
+      "grad_norm": 1.3419237823732055,
+      "learning_rate": 0.003,
+      "loss": 4.0191,
+      "step": 15071
+    },
+    {
+      "epoch": 0.15072,
+      "grad_norm": 1.3650899614680156,
+      "learning_rate": 0.003,
+      "loss": 4.0266,
+      "step": 15072
+    },
+    {
+      "epoch": 0.15073,
+      "grad_norm": 1.091256776156269,
+      "learning_rate": 0.003,
+      "loss": 4.0004,
+      "step": 15073
+    },
+    {
+      "epoch": 0.15074,
+      "grad_norm": 1.1837415652261045,
+      "learning_rate": 0.003,
+      "loss": 4.0238,
+      "step": 15074
+    },
+    {
+      "epoch": 0.15075,
+      "grad_norm": 1.070345352497782,
+      "learning_rate": 0.003,
+      "loss": 4.0239,
+      "step": 15075
+    },
+    {
+      "epoch": 0.15076,
+      "grad_norm": 1.3670014269058248,
+      "learning_rate": 0.003,
+      "loss": 4.0048,
+      "step": 15076
+    },
+    {
+      "epoch": 0.15077,
+      "grad_norm": 1.049884830296458,
+      "learning_rate": 0.003,
+      "loss": 3.9865,
+      "step": 15077
+    },
+    {
+      "epoch": 0.15078,
+      "grad_norm": 1.3611626126884704,
+      "learning_rate": 0.003,
+      "loss": 4.0012,
+      "step": 15078
+    },
+    {
+      "epoch": 0.15079,
+      "grad_norm": 1.049809942194643,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 15079
+    },
+    {
+      "epoch": 0.1508,
+      "grad_norm": 1.4038742234145964,
+      "learning_rate": 0.003,
+      "loss": 4.038,
+      "step": 15080
+    },
+    {
+      "epoch": 0.15081,
+      "grad_norm": 1.060595492700757,
+      "learning_rate": 0.003,
+      "loss": 4.0454,
+      "step": 15081
+    },
+    {
+      "epoch": 0.15082,
+      "grad_norm": 1.3183300499048776,
+      "learning_rate": 0.003,
+      "loss": 4.0281,
+      "step": 15082
+    },
+    {
+      "epoch": 0.15083,
+      "grad_norm": 1.076680798390479,
+      "learning_rate": 0.003,
+      "loss": 4.0059,
+      "step": 15083
+    },
+    {
+      "epoch": 0.15084,
+      "grad_norm": 1.357153988614293,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 15084
+    },
+    {
+      "epoch": 0.15085,
+      "grad_norm": 1.0042635001063007,
+      "learning_rate": 0.003,
+      "loss": 4.0368,
+      "step": 15085
+    },
+    {
+      "epoch": 0.15086,
+      "grad_norm": 1.3771181358622575,
+      "learning_rate": 0.003,
+      "loss": 3.9907,
+      "step": 15086
+    },
+    {
+      "epoch": 0.15087,
+      "grad_norm": 1.147283702797723,
+      "learning_rate": 0.003,
+      "loss": 4.0333,
+      "step": 15087
+    },
+    {
+      "epoch": 0.15088,
+      "grad_norm": 1.2341217391719035,
+      "learning_rate": 0.003,
+      "loss": 4.0171,
+      "step": 15088
+    },
+    {
+      "epoch": 0.15089,
+      "grad_norm": 1.3516384833027888,
+      "learning_rate": 0.003,
+      "loss": 4.0258,
+      "step": 15089
+    },
+    {
+      "epoch": 0.1509,
+      "grad_norm": 1.2292463982304918,
+      "learning_rate": 0.003,
+      "loss": 4.0002,
+      "step": 15090
+    },
+    {
+      "epoch": 0.15091,
+      "grad_norm": 1.3564703702419483,
+      "learning_rate": 0.003,
+      "loss": 4.0415,
+      "step": 15091
+    },
+    {
+      "epoch": 0.15092,
+      "grad_norm": 1.112792931335097,
+      "learning_rate": 0.003,
+      "loss": 4.0421,
+      "step": 15092
+    },
+    {
+      "epoch": 0.15093,
+      "grad_norm": 1.3136370078155426,
+      "learning_rate": 0.003,
+      "loss": 3.9944,
+      "step": 15093
+    },
+    {
+      "epoch": 0.15094,
+      "grad_norm": 1.221216042533791,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 15094
+    },
+    {
+      "epoch": 0.15095,
+      "grad_norm": 1.2145692979169989,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 15095
+    },
+    {
+      "epoch": 0.15096,
+      "grad_norm": 1.3465204947883336,
+      "learning_rate": 0.003,
+      "loss": 4.0025,
+      "step": 15096
+    },
+    {
+      "epoch": 0.15097,
+      "grad_norm": 0.9430128887268873,
+      "learning_rate": 0.003,
+      "loss": 4.0078,
+      "step": 15097
+    },
+    {
+      "epoch": 0.15098,
+      "grad_norm": 1.2901837174572874,
+      "learning_rate": 0.003,
+      "loss": 4.0051,
+      "step": 15098
+    },
+    {
+      "epoch": 0.15099,
+      "grad_norm": 1.165445295507976,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 15099
+    },
+    {
+      "epoch": 0.151,
+      "grad_norm": 1.3232825503918688,
+      "learning_rate": 0.003,
+      "loss": 3.9881,
+      "step": 15100
+    },
+    {
+      "epoch": 0.15101,
+      "grad_norm": 1.132837865211714,
+      "learning_rate": 0.003,
+      "loss": 3.9994,
+      "step": 15101
+    },
+    {
+      "epoch": 0.15102,
+      "grad_norm": 1.241982532953572,
+      "learning_rate": 0.003,
+      "loss": 4.0263,
+      "step": 15102
+    },
+    {
+      "epoch": 0.15103,
+      "grad_norm": 1.2383420097536164,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 15103
+    },
+    {
+      "epoch": 0.15104,
+      "grad_norm": 1.262949064967333,
+      "learning_rate": 0.003,
+      "loss": 3.9912,
+      "step": 15104
+    },
+    {
+      "epoch": 0.15105,
+      "grad_norm": 1.513535550351292,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 15105
+    },
+    {
+      "epoch": 0.15106,
+      "grad_norm": 1.257750614192443,
+      "learning_rate": 0.003,
+      "loss": 4.0118,
+      "step": 15106
+    },
+    {
+      "epoch": 0.15107,
+      "grad_norm": 1.116147918666188,
+      "learning_rate": 0.003,
+      "loss": 4.0016,
+      "step": 15107
+    },
+    {
+      "epoch": 0.15108,
+      "grad_norm": 1.189241053979111,
+      "learning_rate": 0.003,
+      "loss": 4.0163,
+      "step": 15108
+    },
+    {
+      "epoch": 0.15109,
+      "grad_norm": 1.1349573681696845,
+      "learning_rate": 0.003,
+      "loss": 4.0081,
+      "step": 15109
+    },
+    {
+      "epoch": 0.1511,
+      "grad_norm": 1.272700322893393,
+      "learning_rate": 0.003,
+      "loss": 3.9829,
+      "step": 15110
+    },
+    {
+      "epoch": 0.15111,
+      "grad_norm": 1.2802884196351139,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 15111
+    },
+    {
+      "epoch": 0.15112,
+      "grad_norm": 1.2370215688402926,
+      "learning_rate": 0.003,
+      "loss": 4.0003,
+      "step": 15112
+    },
+    {
+      "epoch": 0.15113,
+      "grad_norm": 1.2016811206691678,
+      "learning_rate": 0.003,
+      "loss": 4.0153,
+      "step": 15113
+    },
+    {
+      "epoch": 0.15114,
+      "grad_norm": 1.1700164562301176,
+      "learning_rate": 0.003,
+      "loss": 3.995,
+      "step": 15114
+    },
+    {
+      "epoch": 0.15115,
+      "grad_norm": 1.2068934294145686,
+      "learning_rate": 0.003,
+      "loss": 4.0052,
+      "step": 15115
+    },
+    {
+      "epoch": 0.15116,
+      "grad_norm": 1.1332474111238924,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 15116
+    },
+    {
+      "epoch": 0.15117,
+      "grad_norm": 1.2410907164496658,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 15117
+    },
+    {
+      "epoch": 0.15118,
+      "grad_norm": 1.273940139764871,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 15118
+    },
+    {
+      "epoch": 0.15119,
+      "grad_norm": 1.367244553089254,
+      "learning_rate": 0.003,
+      "loss": 4.0304,
+      "step": 15119
+    },
+    {
+      "epoch": 0.1512,
+      "grad_norm": 1.2379281616283513,
+      "learning_rate": 0.003,
+      "loss": 4.0404,
+      "step": 15120
+    },
+    {
+      "epoch": 0.15121,
+      "grad_norm": 1.4274532742144836,
+      "learning_rate": 0.003,
+      "loss": 3.9941,
+      "step": 15121
+    },
+    {
+      "epoch": 0.15122,
+      "grad_norm": 0.8765441414346433,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 15122
+    },
+    {
+      "epoch": 0.15123,
+      "grad_norm": 1.0869158342956753,
+      "learning_rate": 0.003,
+      "loss": 3.9918,
+      "step": 15123
+    },
+    {
+      "epoch": 0.15124,
+      "grad_norm": 1.3050167137936268,
+      "learning_rate": 0.003,
+      "loss": 3.9896,
+      "step": 15124
+    },
+    {
+      "epoch": 0.15125,
+      "grad_norm": 1.1404648288786916,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 15125
+    },
+    {
+      "epoch": 0.15126,
+      "grad_norm": 1.4148215055467332,
+      "learning_rate": 0.003,
+      "loss": 4.0239,
+      "step": 15126
+    },
+    {
+      "epoch": 0.15127,
+      "grad_norm": 1.1147866554659929,
+      "learning_rate": 0.003,
+      "loss": 4.026,
+      "step": 15127
+    },
+    {
+      "epoch": 0.15128,
+      "grad_norm": 1.2610087486392272,
+      "learning_rate": 0.003,
+      "loss": 4.0289,
+      "step": 15128
+    },
+    {
+      "epoch": 0.15129,
+      "grad_norm": 0.9286406884499967,
+      "learning_rate": 0.003,
+      "loss": 4.0355,
+      "step": 15129
+    },
+    {
+      "epoch": 0.1513,
+      "grad_norm": 1.2832639151976415,
+      "learning_rate": 0.003,
+      "loss": 3.9937,
+      "step": 15130
+    },
+    {
+      "epoch": 0.15131,
+      "grad_norm": 1.0621449933845621,
+      "learning_rate": 0.003,
+      "loss": 4.0096,
+      "step": 15131
+    },
+    {
+      "epoch": 0.15132,
+      "grad_norm": 1.4334878759851153,
+      "learning_rate": 0.003,
+      "loss": 4.0225,
+      "step": 15132
+    },
+    {
+      "epoch": 0.15133,
+      "grad_norm": 1.3105529744462483,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 15133
+    },
+    {
+      "epoch": 0.15134,
+      "grad_norm": 1.0549616111045212,
+      "learning_rate": 0.003,
+      "loss": 4.0017,
+      "step": 15134
+    },
+    {
+      "epoch": 0.15135,
+      "grad_norm": 1.286201041219918,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 15135
+    },
+    {
+      "epoch": 0.15136,
+      "grad_norm": 1.2613815297065765,
+      "learning_rate": 0.003,
+      "loss": 3.9821,
+      "step": 15136
+    },
+    {
+      "epoch": 0.15137,
+      "grad_norm": 1.3049514329532583,
+      "learning_rate": 0.003,
+      "loss": 4.0029,
+      "step": 15137
+    },
+    {
+      "epoch": 0.15138,
+      "grad_norm": 1.2523480185780507,
+      "learning_rate": 0.003,
+      "loss": 4.0541,
+      "step": 15138
+    },
+    {
+      "epoch": 0.15139,
+      "grad_norm": 1.6897379703811235,
+      "learning_rate": 0.003,
+      "loss": 4.0129,
+      "step": 15139
+    },
+    {
+      "epoch": 0.1514,
+      "grad_norm": 1.2114299946762168,
+      "learning_rate": 0.003,
+      "loss": 3.9982,
+      "step": 15140
+    },
+    {
+      "epoch": 0.15141,
+      "grad_norm": 1.3552575020562225,
+      "learning_rate": 0.003,
+      "loss": 4.0042,
+      "step": 15141
+    },
+    {
+      "epoch": 0.15142,
+      "grad_norm": 1.0988214288233396,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 15142
+    },
+    {
+      "epoch": 0.15143,
+      "grad_norm": 1.400017500835591,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 15143
+    },
+    {
+      "epoch": 0.15144,
+      "grad_norm": 1.294718444209177,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 15144
+    },
+    {
+      "epoch": 0.15145,
+      "grad_norm": 1.264073306861436,
+      "learning_rate": 0.003,
+      "loss": 3.9953,
+      "step": 15145
+    },
+    {
+      "epoch": 0.15146,
+      "grad_norm": 1.147161227195975,
+      "learning_rate": 0.003,
+      "loss": 4.0198,
+      "step": 15146
+    },
+    {
+      "epoch": 0.15147,
+      "grad_norm": 1.4421115859568234,
+      "learning_rate": 0.003,
+      "loss": 3.9939,
+      "step": 15147
+    },
+    {
+      "epoch": 0.15148,
+      "grad_norm": 1.0578395740545363,
+      "learning_rate": 0.003,
+      "loss": 4.0107,
+      "step": 15148
+    },
+    {
+      "epoch": 0.15149,
+      "grad_norm": 1.2865955220632557,
+      "learning_rate": 0.003,
+      "loss": 4.035,
+      "step": 15149
+    },
+    {
+      "epoch": 0.1515,
+      "grad_norm": 1.021024571309487,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 15150
+    },
+    {
+      "epoch": 0.15151,
+      "grad_norm": 1.5256734661903013,
+      "learning_rate": 0.003,
+      "loss": 4.0129,
+      "step": 15151
+    },
+    {
+      "epoch": 0.15152,
+      "grad_norm": 1.074404522380423,
+      "learning_rate": 0.003,
+      "loss": 4.055,
+      "step": 15152
+    },
+    {
+      "epoch": 0.15153,
+      "grad_norm": 1.3833997517816128,
+      "learning_rate": 0.003,
+      "loss": 4.0309,
+      "step": 15153
+    },
+    {
+      "epoch": 0.15154,
+      "grad_norm": 1.0423269119897456,
+      "learning_rate": 0.003,
+      "loss": 3.9988,
+      "step": 15154
+    },
+    {
+      "epoch": 0.15155,
+      "grad_norm": 1.2210503757117748,
+      "learning_rate": 0.003,
+      "loss": 4.0255,
+      "step": 15155
+    },
+    {
+      "epoch": 0.15156,
+      "grad_norm": 1.108145226130091,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 15156
+    },
+    {
+      "epoch": 0.15157,
+      "grad_norm": 1.2422893744328773,
+      "learning_rate": 0.003,
+      "loss": 4.0111,
+      "step": 15157
+    },
+    {
+      "epoch": 0.15158,
+      "grad_norm": 1.1585359463147444,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 15158
+    },
+    {
+      "epoch": 0.15159,
+      "grad_norm": 1.3052493913554555,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 15159
+    },
+    {
+      "epoch": 0.1516,
+      "grad_norm": 1.0391936207685226,
+      "learning_rate": 0.003,
+      "loss": 4.0008,
+      "step": 15160
+    },
+    {
+      "epoch": 0.15161,
+      "grad_norm": 1.3719432821989905,
+      "learning_rate": 0.003,
+      "loss": 4.0188,
+      "step": 15161
+    },
+    {
+      "epoch": 0.15162,
+      "grad_norm": 1.2060382045199585,
+      "learning_rate": 0.003,
+      "loss": 4.0502,
+      "step": 15162
+    },
+    {
+      "epoch": 0.15163,
+      "grad_norm": 1.2938521061808144,
+      "learning_rate": 0.003,
+      "loss": 4.0141,
+      "step": 15163
+    },
+    {
+      "epoch": 0.15164,
+      "grad_norm": 1.2728919704370556,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 15164
+    },
+    {
+      "epoch": 0.15165,
+      "grad_norm": 1.2976291980902035,
+      "learning_rate": 0.003,
+      "loss": 3.9911,
+      "step": 15165
+    },
+    {
+      "epoch": 0.15166,
+      "grad_norm": 1.1318885985271303,
+      "learning_rate": 0.003,
+      "loss": 3.9929,
+      "step": 15166
+    },
+    {
+      "epoch": 0.15167,
+      "grad_norm": 1.1218973473108103,
+      "learning_rate": 0.003,
+      "loss": 4.0072,
+      "step": 15167
+    },
+    {
+      "epoch": 0.15168,
+      "grad_norm": 1.2558430257149893,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 15168
+    },
+    {
+      "epoch": 0.15169,
+      "grad_norm": 1.1632066368507694,
+      "learning_rate": 0.003,
+      "loss": 4.0006,
+      "step": 15169
+    },
+    {
+      "epoch": 0.1517,
+      "grad_norm": 1.512747349596697,
+      "learning_rate": 0.003,
+      "loss": 4.0015,
+      "step": 15170
+    },
+    {
+      "epoch": 0.15171,
+      "grad_norm": 1.1607688250338546,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 15171
+    },
+    {
+      "epoch": 0.15172,
+      "grad_norm": 1.5669471842154912,
+      "learning_rate": 0.003,
+      "loss": 4.0153,
+      "step": 15172
+    },
+    {
+      "epoch": 0.15173,
+      "grad_norm": 0.9690746984375851,
+      "learning_rate": 0.003,
+      "loss": 3.9999,
+      "step": 15173
+    },
+    {
+      "epoch": 0.15174,
+      "grad_norm": 1.424525009288961,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 15174
+    },
+    {
+      "epoch": 0.15175,
+      "grad_norm": 1.0884188106452057,
+      "learning_rate": 0.003,
+      "loss": 4.0255,
+      "step": 15175
+    },
+    {
+      "epoch": 0.15176,
+      "grad_norm": 1.3406032412403708,
+      "learning_rate": 0.003,
+      "loss": 4.0111,
+      "step": 15176
+    },
+    {
+      "epoch": 0.15177,
+      "grad_norm": 1.2010227603841026,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 15177
+    },
+    {
+      "epoch": 0.15178,
+      "grad_norm": 1.1403832764384894,
+      "learning_rate": 0.003,
+      "loss": 3.9874,
+      "step": 15178
+    },
+    {
+      "epoch": 0.15179,
+      "grad_norm": 1.3656623173642093,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 15179
+    },
+    {
+      "epoch": 0.1518,
+      "grad_norm": 0.9446342092378506,
+      "learning_rate": 0.003,
+      "loss": 3.9947,
+      "step": 15180
+    },
+    {
+      "epoch": 0.15181,
+      "grad_norm": 1.1632519133270702,
+      "learning_rate": 0.003,
+      "loss": 4.0006,
+      "step": 15181
+    },
+    {
+      "epoch": 0.15182,
+      "grad_norm": 1.2515943146416864,
+      "learning_rate": 0.003,
+      "loss": 4.0312,
+      "step": 15182
+    },
+    {
+      "epoch": 0.15183,
+      "grad_norm": 1.3866824702413552,
+      "learning_rate": 0.003,
+      "loss": 3.9987,
+      "step": 15183
+    },
+    {
+      "epoch": 0.15184,
+      "grad_norm": 1.1669861813371163,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 15184
+    },
+    {
+      "epoch": 0.15185,
+      "grad_norm": 1.1595091466120568,
+      "learning_rate": 0.003,
+      "loss": 4.0223,
+      "step": 15185
+    },
+    {
+      "epoch": 0.15186,
+      "grad_norm": 1.3859410529014762,
+      "learning_rate": 0.003,
+      "loss": 4.0089,
+      "step": 15186
+    },
+    {
+      "epoch": 0.15187,
+      "grad_norm": 1.3425423501975218,
+      "learning_rate": 0.003,
+      "loss": 4.0298,
+      "step": 15187
+    },
+    {
+      "epoch": 0.15188,
+      "grad_norm": 1.0608433290080848,
+      "learning_rate": 0.003,
+      "loss": 4.0228,
+      "step": 15188
+    },
+    {
+      "epoch": 0.15189,
+      "grad_norm": 1.0314082381097578,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 15189
+    },
+    {
+      "epoch": 0.1519,
+      "grad_norm": 1.2723553619786312,
+      "learning_rate": 0.003,
+      "loss": 4.0379,
+      "step": 15190
+    },
+    {
+      "epoch": 0.15191,
+      "grad_norm": 1.142633684616142,
+      "learning_rate": 0.003,
+      "loss": 4.0484,
+      "step": 15191
+    },
+    {
+      "epoch": 0.15192,
+      "grad_norm": 1.1497651283192816,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 15192
+    },
+    {
+      "epoch": 0.15193,
+      "grad_norm": 1.141694588201836,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 15193
+    },
+    {
+      "epoch": 0.15194,
+      "grad_norm": 1.3196754641868844,
+      "learning_rate": 0.003,
+      "loss": 4.0242,
+      "step": 15194
+    },
+    {
+      "epoch": 0.15195,
+      "grad_norm": 1.4382441258392895,
+      "learning_rate": 0.003,
+      "loss": 4.0042,
+      "step": 15195
+    },
+    {
+      "epoch": 0.15196,
+      "grad_norm": 1.2905753364700843,
+      "learning_rate": 0.003,
+      "loss": 4.0368,
+      "step": 15196
+    },
+    {
+      "epoch": 0.15197,
+      "grad_norm": 1.3322175812610237,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 15197
+    },
+    {
+      "epoch": 0.15198,
+      "grad_norm": 1.2828552826700481,
+      "learning_rate": 0.003,
+      "loss": 4.0289,
+      "step": 15198
+    },
+    {
+      "epoch": 0.15199,
+      "grad_norm": 1.1331332190523502,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 15199
+    },
+    {
+      "epoch": 0.152,
+      "grad_norm": 1.2949163051420256,
+      "learning_rate": 0.003,
+      "loss": 4.0181,
+      "step": 15200
+    },
+    {
+      "epoch": 0.15201,
+      "grad_norm": 1.2667175452466484,
+      "learning_rate": 0.003,
+      "loss": 4.0174,
+      "step": 15201
+    },
+    {
+      "epoch": 0.15202,
+      "grad_norm": 0.9320838745996926,
+      "learning_rate": 0.003,
+      "loss": 4.0122,
+      "step": 15202
+    },
+    {
+      "epoch": 0.15203,
+      "grad_norm": 1.0453995384359727,
+      "learning_rate": 0.003,
+      "loss": 3.9992,
+      "step": 15203
+    },
+    {
+      "epoch": 0.15204,
+      "grad_norm": 1.2597649012105097,
+      "learning_rate": 0.003,
+      "loss": 4.0437,
+      "step": 15204
+    },
+    {
+      "epoch": 0.15205,
+      "grad_norm": 1.1961925801405553,
+      "learning_rate": 0.003,
+      "loss": 4.0153,
+      "step": 15205
+    },
+    {
+      "epoch": 0.15206,
+      "grad_norm": 1.181160444706395,
+      "learning_rate": 0.003,
+      "loss": 4.0014,
+      "step": 15206
+    },
+    {
+      "epoch": 0.15207,
+      "grad_norm": 1.4697980303009301,
+      "learning_rate": 0.003,
+      "loss": 3.9988,
+      "step": 15207
+    },
+    {
+      "epoch": 0.15208,
+      "grad_norm": 1.0050373769088243,
+      "learning_rate": 0.003,
+      "loss": 3.9765,
+      "step": 15208
+    },
+    {
+      "epoch": 0.15209,
+      "grad_norm": 1.4211911658134273,
+      "learning_rate": 0.003,
+      "loss": 4.0429,
+      "step": 15209
+    },
+    {
+      "epoch": 0.1521,
+      "grad_norm": 1.0497449525528875,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 15210
+    },
+    {
+      "epoch": 0.15211,
+      "grad_norm": 1.3400095009725441,
+      "learning_rate": 0.003,
+      "loss": 4.0367,
+      "step": 15211
+    },
+    {
+      "epoch": 0.15212,
+      "grad_norm": 1.035172309022198,
+      "learning_rate": 0.003,
+      "loss": 4.0232,
+      "step": 15212
+    },
+    {
+      "epoch": 0.15213,
+      "grad_norm": 1.2720197272996543,
+      "learning_rate": 0.003,
+      "loss": 3.9942,
+      "step": 15213
+    },
+    {
+      "epoch": 0.15214,
+      "grad_norm": 1.1914354855934521,
+      "learning_rate": 0.003,
+      "loss": 4.0184,
+      "step": 15214
+    },
+    {
+      "epoch": 0.15215,
+      "grad_norm": 1.4567297823795853,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 15215
+    },
+    {
+      "epoch": 0.15216,
+      "grad_norm": 1.1699730057421303,
+      "learning_rate": 0.003,
+      "loss": 3.9922,
+      "step": 15216
+    },
+    {
+      "epoch": 0.15217,
+      "grad_norm": 1.4255275150565567,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 15217
+    },
+    {
+      "epoch": 0.15218,
+      "grad_norm": 1.3457622723850626,
+      "learning_rate": 0.003,
+      "loss": 3.9975,
+      "step": 15218
+    },
+    {
+      "epoch": 0.15219,
+      "grad_norm": 1.259914794142906,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 15219
+    },
+    {
+      "epoch": 0.1522,
+      "grad_norm": 1.3385804625484585,
+      "learning_rate": 0.003,
+      "loss": 4.0236,
+      "step": 15220
+    },
+    {
+      "epoch": 0.15221,
+      "grad_norm": 1.1452926856393928,
+      "learning_rate": 0.003,
+      "loss": 4.0355,
+      "step": 15221
+    },
+    {
+      "epoch": 0.15222,
+      "grad_norm": 1.2199673305085796,
+      "learning_rate": 0.003,
+      "loss": 4.0182,
+      "step": 15222
+    },
+    {
+      "epoch": 0.15223,
+      "grad_norm": 1.1614406086533022,
+      "learning_rate": 0.003,
+      "loss": 3.9866,
+      "step": 15223
+    },
+    {
+      "epoch": 0.15224,
+      "grad_norm": 1.2578319969789138,
+      "learning_rate": 0.003,
+      "loss": 4.0339,
+      "step": 15224
+    },
+    {
+      "epoch": 0.15225,
+      "grad_norm": 1.0908275282131534,
+      "learning_rate": 0.003,
+      "loss": 4.0081,
+      "step": 15225
+    },
+    {
+      "epoch": 0.15226,
+      "grad_norm": 1.436842674134083,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 15226
+    },
+    {
+      "epoch": 0.15227,
+      "grad_norm": 1.1763822273200253,
+      "learning_rate": 0.003,
+      "loss": 4.0173,
+      "step": 15227
+    },
+    {
+      "epoch": 0.15228,
+      "grad_norm": 1.236367653955839,
+      "learning_rate": 0.003,
+      "loss": 3.9995,
+      "step": 15228
+    },
+    {
+      "epoch": 0.15229,
+      "grad_norm": 1.1369189614223947,
+      "learning_rate": 0.003,
+      "loss": 3.9741,
+      "step": 15229
+    },
+    {
+      "epoch": 0.1523,
+      "grad_norm": 1.3009550318043999,
+      "learning_rate": 0.003,
+      "loss": 4.0328,
+      "step": 15230
+    },
+    {
+      "epoch": 0.15231,
+      "grad_norm": 1.0469260377035405,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 15231
+    },
+    {
+      "epoch": 0.15232,
+      "grad_norm": 1.316792046305704,
+      "learning_rate": 0.003,
+      "loss": 4.0019,
+      "step": 15232
+    },
+    {
+      "epoch": 0.15233,
+      "grad_norm": 1.075327317499854,
+      "learning_rate": 0.003,
+      "loss": 3.9992,
+      "step": 15233
+    },
+    {
+      "epoch": 0.15234,
+      "grad_norm": 1.5401713771580106,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 15234
+    },
+    {
+      "epoch": 0.15235,
+      "grad_norm": 1.3384975214423407,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 15235
+    },
+    {
+      "epoch": 0.15236,
+      "grad_norm": 1.2302142023888756,
+      "learning_rate": 0.003,
+      "loss": 4.0375,
+      "step": 15236
+    },
+    {
+      "epoch": 0.15237,
+      "grad_norm": 1.1467050032159485,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 15237
+    },
+    {
+      "epoch": 0.15238,
+      "grad_norm": 1.2608892431989909,
+      "learning_rate": 0.003,
+      "loss": 3.9917,
+      "step": 15238
+    },
+    {
+      "epoch": 0.15239,
+      "grad_norm": 1.1340177552695188,
+      "learning_rate": 0.003,
+      "loss": 3.9968,
+      "step": 15239
+    },
+    {
+      "epoch": 0.1524,
+      "grad_norm": 1.23521863864813,
+      "learning_rate": 0.003,
+      "loss": 4.0086,
+      "step": 15240
+    },
+    {
+      "epoch": 0.15241,
+      "grad_norm": 1.0968342906944502,
+      "learning_rate": 0.003,
+      "loss": 4.0389,
+      "step": 15241
+    },
+    {
+      "epoch": 0.15242,
+      "grad_norm": 1.4950372693011262,
+      "learning_rate": 0.003,
+      "loss": 4.0417,
+      "step": 15242
+    },
+    {
+      "epoch": 0.15243,
+      "grad_norm": 1.1002275896772342,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 15243
+    },
+    {
+      "epoch": 0.15244,
+      "grad_norm": 1.3100180086504831,
+      "learning_rate": 0.003,
+      "loss": 4.0359,
+      "step": 15244
+    },
+    {
+      "epoch": 0.15245,
+      "grad_norm": 1.0125840553200571,
+      "learning_rate": 0.003,
+      "loss": 4.0128,
+      "step": 15245
+    },
+    {
+      "epoch": 0.15246,
+      "grad_norm": 1.6922019399605537,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 15246
+    },
+    {
+      "epoch": 0.15247,
+      "grad_norm": 1.0537686111932896,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 15247
+    },
+    {
+      "epoch": 0.15248,
+      "grad_norm": 1.2479280777607784,
+      "learning_rate": 0.003,
+      "loss": 4.0347,
+      "step": 15248
+    },
+    {
+      "epoch": 0.15249,
+      "grad_norm": 1.2439706911893615,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 15249
+    },
+    {
+      "epoch": 0.1525,
+      "grad_norm": 1.2236146810031685,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 15250
+    },
+    {
+      "epoch": 0.15251,
+      "grad_norm": 1.3381460755089774,
+      "learning_rate": 0.003,
+      "loss": 4.0525,
+      "step": 15251
+    },
+    {
+      "epoch": 0.15252,
+      "grad_norm": 1.2956721814013823,
+      "learning_rate": 0.003,
+      "loss": 4.0427,
+      "step": 15252
+    },
+    {
+      "epoch": 0.15253,
+      "grad_norm": 1.1136046665572314,
+      "learning_rate": 0.003,
+      "loss": 3.9848,
+      "step": 15253
+    },
+    {
+      "epoch": 0.15254,
+      "grad_norm": 1.4519019123615422,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 15254
+    },
+    {
+      "epoch": 0.15255,
+      "grad_norm": 1.218519913711217,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 15255
+    },
+    {
+      "epoch": 0.15256,
+      "grad_norm": 1.288323294027511,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 15256
+    },
+    {
+      "epoch": 0.15257,
+      "grad_norm": 1.0673560985444304,
+      "learning_rate": 0.003,
+      "loss": 4.0001,
+      "step": 15257
+    },
+    {
+      "epoch": 0.15258,
+      "grad_norm": 1.4258252725808966,
+      "learning_rate": 0.003,
+      "loss": 4.0222,
+      "step": 15258
+    },
+    {
+      "epoch": 0.15259,
+      "grad_norm": 1.1040006998672398,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 15259
+    },
+    {
+      "epoch": 0.1526,
+      "grad_norm": 1.2309129950569844,
+      "learning_rate": 0.003,
+      "loss": 3.9855,
+      "step": 15260
+    },
+    {
+      "epoch": 0.15261,
+      "grad_norm": 1.1490315177687394,
+      "learning_rate": 0.003,
+      "loss": 4.0172,
+      "step": 15261
+    },
+    {
+      "epoch": 0.15262,
+      "grad_norm": 1.140301200996627,
+      "learning_rate": 0.003,
+      "loss": 4.0149,
+      "step": 15262
+    },
+    {
+      "epoch": 0.15263,
+      "grad_norm": 1.288034047638439,
+      "learning_rate": 0.003,
+      "loss": 4.0056,
+      "step": 15263
+    },
+    {
+      "epoch": 0.15264,
+      "grad_norm": 1.0811920833381112,
+      "learning_rate": 0.003,
+      "loss": 3.9969,
+      "step": 15264
+    },
+    {
+      "epoch": 0.15265,
+      "grad_norm": 1.249639988258038,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 15265
+    },
+    {
+      "epoch": 0.15266,
+      "grad_norm": 1.1603153694948989,
+      "learning_rate": 0.003,
+      "loss": 3.9883,
+      "step": 15266
+    },
+    {
+      "epoch": 0.15267,
+      "grad_norm": 1.3394422598380689,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 15267
+    },
+    {
+      "epoch": 0.15268,
+      "grad_norm": 1.0742268220328304,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 15268
+    },
+    {
+      "epoch": 0.15269,
+      "grad_norm": 1.3683478964654803,
+      "learning_rate": 0.003,
+      "loss": 4.0051,
+      "step": 15269
+    },
+    {
+      "epoch": 0.1527,
+      "grad_norm": 1.2695653022628564,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 15270
+    },
+    {
+      "epoch": 0.15271,
+      "grad_norm": 1.1547539480855118,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 15271
+    },
+    {
+      "epoch": 0.15272,
+      "grad_norm": 1.1942241027035867,
+      "learning_rate": 0.003,
+      "loss": 3.995,
+      "step": 15272
+    },
+    {
+      "epoch": 0.15273,
+      "grad_norm": 1.0934806942778186,
+      "learning_rate": 0.003,
+      "loss": 3.9818,
+      "step": 15273
+    },
+    {
+      "epoch": 0.15274,
+      "grad_norm": 1.2736523352348548,
+      "learning_rate": 0.003,
+      "loss": 3.9949,
+      "step": 15274
+    },
+    {
+      "epoch": 0.15275,
+      "grad_norm": 1.2316668759346256,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 15275
+    },
+    {
+      "epoch": 0.15276,
+      "grad_norm": 0.8866068166018798,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 15276
+    },
+    {
+      "epoch": 0.15277,
+      "grad_norm": 1.0359599314182446,
+      "learning_rate": 0.003,
+      "loss": 3.9741,
+      "step": 15277
+    },
+    {
+      "epoch": 0.15278,
+      "grad_norm": 1.4088391751793254,
+      "learning_rate": 0.003,
+      "loss": 4.0135,
+      "step": 15278
+    },
+    {
+      "epoch": 0.15279,
+      "grad_norm": 1.1532474189545543,
+      "learning_rate": 0.003,
+      "loss": 4.012,
+      "step": 15279
+    },
+    {
+      "epoch": 0.1528,
+      "grad_norm": 1.5209095317461196,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 15280
+    },
+    {
+      "epoch": 0.15281,
+      "grad_norm": 0.9678818222371608,
+      "learning_rate": 0.003,
+      "loss": 3.9866,
+      "step": 15281
+    },
+    {
+      "epoch": 0.15282,
+      "grad_norm": 1.3475866215440784,
+      "learning_rate": 0.003,
+      "loss": 4.0263,
+      "step": 15282
+    },
+    {
+      "epoch": 0.15283,
+      "grad_norm": 1.0007327077437924,
+      "learning_rate": 0.003,
+      "loss": 4.0275,
+      "step": 15283
+    },
+    {
+      "epoch": 0.15284,
+      "grad_norm": 1.5299823759901408,
+      "learning_rate": 0.003,
+      "loss": 4.0308,
+      "step": 15284
+    },
+    {
+      "epoch": 0.15285,
+      "grad_norm": 1.115236900005875,
+      "learning_rate": 0.003,
+      "loss": 4.0301,
+      "step": 15285
+    },
+    {
+      "epoch": 0.15286,
+      "grad_norm": 1.222239233298442,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 15286
+    },
+    {
+      "epoch": 0.15287,
+      "grad_norm": 1.105263151096791,
+      "learning_rate": 0.003,
+      "loss": 4.0032,
+      "step": 15287
+    },
+    {
+      "epoch": 0.15288,
+      "grad_norm": 1.1716505993680435,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 15288
+    },
+    {
+      "epoch": 0.15289,
+      "grad_norm": 1.3563443429362065,
+      "learning_rate": 0.003,
+      "loss": 3.999,
+      "step": 15289
+    },
+    {
+      "epoch": 0.1529,
+      "grad_norm": 1.0872489410089983,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 15290
+    },
+    {
+      "epoch": 0.15291,
+      "grad_norm": 1.644791899750319,
+      "learning_rate": 0.003,
+      "loss": 4.0493,
+      "step": 15291
+    },
+    {
+      "epoch": 0.15292,
+      "grad_norm": 1.006959413935244,
+      "learning_rate": 0.003,
+      "loss": 4.0372,
+      "step": 15292
+    },
+    {
+      "epoch": 0.15293,
+      "grad_norm": 1.598524079754938,
+      "learning_rate": 0.003,
+      "loss": 4.0449,
+      "step": 15293
+    },
+    {
+      "epoch": 0.15294,
+      "grad_norm": 1.0940183399275392,
+      "learning_rate": 0.003,
+      "loss": 4.0402,
+      "step": 15294
+    },
+    {
+      "epoch": 0.15295,
+      "grad_norm": 1.460337046890657,
+      "learning_rate": 0.003,
+      "loss": 4.0351,
+      "step": 15295
+    },
+    {
+      "epoch": 0.15296,
+      "grad_norm": 1.045725212899953,
+      "learning_rate": 0.003,
+      "loss": 4.0471,
+      "step": 15296
+    },
+    {
+      "epoch": 0.15297,
+      "grad_norm": 1.5802895346212449,
+      "learning_rate": 0.003,
+      "loss": 4.0213,
+      "step": 15297
+    },
+    {
+      "epoch": 0.15298,
+      "grad_norm": 1.3837242228352484,
+      "learning_rate": 0.003,
+      "loss": 4.049,
+      "step": 15298
+    },
+    {
+      "epoch": 0.15299,
+      "grad_norm": 1.2728385968327829,
+      "learning_rate": 0.003,
+      "loss": 4.0388,
+      "step": 15299
+    },
+    {
+      "epoch": 0.153,
+      "grad_norm": 1.1823117384581505,
+      "learning_rate": 0.003,
+      "loss": 4.046,
+      "step": 15300
+    },
+    {
+      "epoch": 0.15301,
+      "grad_norm": 1.1320250057435113,
+      "learning_rate": 0.003,
+      "loss": 3.9994,
+      "step": 15301
+    },
+    {
+      "epoch": 0.15302,
+      "grad_norm": 1.4255218577322437,
+      "learning_rate": 0.003,
+      "loss": 3.9897,
+      "step": 15302
+    },
+    {
+      "epoch": 0.15303,
+      "grad_norm": 1.1908400209785568,
+      "learning_rate": 0.003,
+      "loss": 4.0423,
+      "step": 15303
+    },
+    {
+      "epoch": 0.15304,
+      "grad_norm": 1.3528746438276917,
+      "learning_rate": 0.003,
+      "loss": 4.0476,
+      "step": 15304
+    },
+    {
+      "epoch": 0.15305,
+      "grad_norm": 1.1938813749340769,
+      "learning_rate": 0.003,
+      "loss": 4.0085,
+      "step": 15305
+    },
+    {
+      "epoch": 0.15306,
+      "grad_norm": 1.6421325596095928,
+      "learning_rate": 0.003,
+      "loss": 4.0061,
+      "step": 15306
+    },
+    {
+      "epoch": 0.15307,
+      "grad_norm": 0.951216559987826,
+      "learning_rate": 0.003,
+      "loss": 3.9862,
+      "step": 15307
+    },
+    {
+      "epoch": 0.15308,
+      "grad_norm": 1.2691738035815379,
+      "learning_rate": 0.003,
+      "loss": 4.0287,
+      "step": 15308
+    },
+    {
+      "epoch": 0.15309,
+      "grad_norm": 0.9630179201647759,
+      "learning_rate": 0.003,
+      "loss": 4.0194,
+      "step": 15309
+    },
+    {
+      "epoch": 0.1531,
+      "grad_norm": 1.3626385785070245,
+      "learning_rate": 0.003,
+      "loss": 4.0409,
+      "step": 15310
+    },
+    {
+      "epoch": 0.15311,
+      "grad_norm": 1.0798825353592207,
+      "learning_rate": 0.003,
+      "loss": 4.0207,
+      "step": 15311
+    },
+    {
+      "epoch": 0.15312,
+      "grad_norm": 1.2748408005075207,
+      "learning_rate": 0.003,
+      "loss": 4.0181,
+      "step": 15312
+    },
+    {
+      "epoch": 0.15313,
+      "grad_norm": 1.3160143379370914,
+      "learning_rate": 0.003,
+      "loss": 4.0416,
+      "step": 15313
+    },
+    {
+      "epoch": 0.15314,
+      "grad_norm": 1.3381560563598656,
+      "learning_rate": 0.003,
+      "loss": 4.0239,
+      "step": 15314
+    },
+    {
+      "epoch": 0.15315,
+      "grad_norm": 1.1003058742942267,
+      "learning_rate": 0.003,
+      "loss": 4.0344,
+      "step": 15315
+    },
+    {
+      "epoch": 0.15316,
+      "grad_norm": 1.28462379604353,
+      "learning_rate": 0.003,
+      "loss": 4.0438,
+      "step": 15316
+    },
+    {
+      "epoch": 0.15317,
+      "grad_norm": 1.0558852666618743,
+      "learning_rate": 0.003,
+      "loss": 4.0378,
+      "step": 15317
+    },
+    {
+      "epoch": 0.15318,
+      "grad_norm": 1.1925633714135881,
+      "learning_rate": 0.003,
+      "loss": 4.0142,
+      "step": 15318
+    },
+    {
+      "epoch": 0.15319,
+      "grad_norm": 1.1516827476554599,
+      "learning_rate": 0.003,
+      "loss": 3.993,
+      "step": 15319
+    },
+    {
+      "epoch": 0.1532,
+      "grad_norm": 1.1443399520062345,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 15320
+    },
+    {
+      "epoch": 0.15321,
+      "grad_norm": 1.2249706571549668,
+      "learning_rate": 0.003,
+      "loss": 4.0211,
+      "step": 15321
+    },
+    {
+      "epoch": 0.15322,
+      "grad_norm": 1.22913366138089,
+      "learning_rate": 0.003,
+      "loss": 4.0351,
+      "step": 15322
+    },
+    {
+      "epoch": 0.15323,
+      "grad_norm": 1.3641024561312933,
+      "learning_rate": 0.003,
+      "loss": 4.0304,
+      "step": 15323
+    },
+    {
+      "epoch": 0.15324,
+      "grad_norm": 1.1013868436436511,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 15324
+    },
+    {
+      "epoch": 0.15325,
+      "grad_norm": 1.3058725139288032,
+      "learning_rate": 0.003,
+      "loss": 4.0398,
+      "step": 15325
+    },
+    {
+      "epoch": 0.15326,
+      "grad_norm": 1.2659469483828032,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 15326
+    },
+    {
+      "epoch": 0.15327,
+      "grad_norm": 1.261903749150775,
+      "learning_rate": 0.003,
+      "loss": 4.0333,
+      "step": 15327
+    },
+    {
+      "epoch": 0.15328,
+      "grad_norm": 1.0976890628915055,
+      "learning_rate": 0.003,
+      "loss": 3.9818,
+      "step": 15328
+    },
+    {
+      "epoch": 0.15329,
+      "grad_norm": 1.3073593212321624,
+      "learning_rate": 0.003,
+      "loss": 4.0085,
+      "step": 15329
+    },
+    {
+      "epoch": 0.1533,
+      "grad_norm": 1.2307470804948377,
+      "learning_rate": 0.003,
+      "loss": 4.0089,
+      "step": 15330
+    },
+    {
+      "epoch": 0.15331,
+      "grad_norm": 1.1548746627152964,
+      "learning_rate": 0.003,
+      "loss": 3.9999,
+      "step": 15331
+    },
+    {
+      "epoch": 0.15332,
+      "grad_norm": 1.1745992236943885,
+      "learning_rate": 0.003,
+      "loss": 4.0,
+      "step": 15332
+    },
+    {
+      "epoch": 0.15333,
+      "grad_norm": 1.29812710663051,
+      "learning_rate": 0.003,
+      "loss": 4.0234,
+      "step": 15333
+    },
+    {
+      "epoch": 0.15334,
+      "grad_norm": 0.9390035244787395,
+      "learning_rate": 0.003,
+      "loss": 3.9869,
+      "step": 15334
+    },
+    {
+      "epoch": 0.15335,
+      "grad_norm": 1.122648019952921,
+      "learning_rate": 0.003,
+      "loss": 4.0404,
+      "step": 15335
+    },
+    {
+      "epoch": 0.15336,
+      "grad_norm": 1.5149705395129167,
+      "learning_rate": 0.003,
+      "loss": 4.021,
+      "step": 15336
+    },
+    {
+      "epoch": 0.15337,
+      "grad_norm": 1.0013137038070798,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 15337
+    },
+    {
+      "epoch": 0.15338,
+      "grad_norm": 1.299481327762594,
+      "learning_rate": 0.003,
+      "loss": 4.0328,
+      "step": 15338
+    },
+    {
+      "epoch": 0.15339,
+      "grad_norm": 1.4471932995791132,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 15339
+    },
+    {
+      "epoch": 0.1534,
+      "grad_norm": 0.9648166180616551,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 15340
+    },
+    {
+      "epoch": 0.15341,
+      "grad_norm": 1.4509801117246968,
+      "learning_rate": 0.003,
+      "loss": 4.0368,
+      "step": 15341
+    },
+    {
+      "epoch": 0.15342,
+      "grad_norm": 1.1254236266710087,
+      "learning_rate": 0.003,
+      "loss": 4.0223,
+      "step": 15342
+    },
+    {
+      "epoch": 0.15343,
+      "grad_norm": 1.3692642680880276,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 15343
+    },
+    {
+      "epoch": 0.15344,
+      "grad_norm": 1.2619386001461093,
+      "learning_rate": 0.003,
+      "loss": 4.0297,
+      "step": 15344
+    },
+    {
+      "epoch": 0.15345,
+      "grad_norm": 1.2290685149803946,
+      "learning_rate": 0.003,
+      "loss": 4.0348,
+      "step": 15345
+    },
+    {
+      "epoch": 0.15346,
+      "grad_norm": 1.1694162094841682,
+      "learning_rate": 0.003,
+      "loss": 4.0177,
+      "step": 15346
+    },
+    {
+      "epoch": 0.15347,
+      "grad_norm": 1.224932391008914,
+      "learning_rate": 0.003,
+      "loss": 3.9949,
+      "step": 15347
+    },
+    {
+      "epoch": 0.15348,
+      "grad_norm": 1.2410636760246108,
+      "learning_rate": 0.003,
+      "loss": 4.0239,
+      "step": 15348
+    },
+    {
+      "epoch": 0.15349,
+      "grad_norm": 1.118028257618968,
+      "learning_rate": 0.003,
+      "loss": 4.0129,
+      "step": 15349
+    },
+    {
+      "epoch": 0.1535,
+      "grad_norm": 1.2307429212771872,
+      "learning_rate": 0.003,
+      "loss": 4.0242,
+      "step": 15350
+    },
+    {
+      "epoch": 0.15351,
+      "grad_norm": 1.220610134666719,
+      "learning_rate": 0.003,
+      "loss": 3.9772,
+      "step": 15351
+    },
+    {
+      "epoch": 0.15352,
+      "grad_norm": 1.489724020189981,
+      "learning_rate": 0.003,
+      "loss": 4.0142,
+      "step": 15352
+    },
+    {
+      "epoch": 0.15353,
+      "grad_norm": 1.1392495632135529,
+      "learning_rate": 0.003,
+      "loss": 4.0049,
+      "step": 15353
+    },
+    {
+      "epoch": 0.15354,
+      "grad_norm": 1.3664724518429507,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 15354
+    },
+    {
+      "epoch": 0.15355,
+      "grad_norm": 1.0740409634784727,
+      "learning_rate": 0.003,
+      "loss": 4.0261,
+      "step": 15355
+    },
+    {
+      "epoch": 0.15356,
+      "grad_norm": 1.2778573411778194,
+      "learning_rate": 0.003,
+      "loss": 4.007,
+      "step": 15356
+    },
+    {
+      "epoch": 0.15357,
+      "grad_norm": 1.506825805600986,
+      "learning_rate": 0.003,
+      "loss": 4.0389,
+      "step": 15357
+    },
+    {
+      "epoch": 0.15358,
+      "grad_norm": 1.0304333936951433,
+      "learning_rate": 0.003,
+      "loss": 4.0007,
+      "step": 15358
+    },
+    {
+      "epoch": 0.15359,
+      "grad_norm": 1.4557553962981578,
+      "learning_rate": 0.003,
+      "loss": 3.9986,
+      "step": 15359
+    },
+    {
+      "epoch": 0.1536,
+      "grad_norm": 0.9998670788123999,
+      "learning_rate": 0.003,
+      "loss": 4.0081,
+      "step": 15360
+    },
+    {
+      "epoch": 0.15361,
+      "grad_norm": 1.3285068607989634,
+      "learning_rate": 0.003,
+      "loss": 4.0241,
+      "step": 15361
+    },
+    {
+      "epoch": 0.15362,
+      "grad_norm": 1.1524236129558298,
+      "learning_rate": 0.003,
+      "loss": 4.0258,
+      "step": 15362
+    },
+    {
+      "epoch": 0.15363,
+      "grad_norm": 1.3631793589844639,
+      "learning_rate": 0.003,
+      "loss": 4.0496,
+      "step": 15363
+    },
+    {
+      "epoch": 0.15364,
+      "grad_norm": 1.3841052064121926,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 15364
+    },
+    {
+      "epoch": 0.15365,
+      "grad_norm": 1.148427490969218,
+      "learning_rate": 0.003,
+      "loss": 4.0342,
+      "step": 15365
+    },
+    {
+      "epoch": 0.15366,
+      "grad_norm": 1.3036553111173732,
+      "learning_rate": 0.003,
+      "loss": 4.0475,
+      "step": 15366
+    },
+    {
+      "epoch": 0.15367,
+      "grad_norm": 1.0642389435975332,
+      "learning_rate": 0.003,
+      "loss": 3.9935,
+      "step": 15367
+    },
+    {
+      "epoch": 0.15368,
+      "grad_norm": 1.4958489290963009,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 15368
+    },
+    {
+      "epoch": 0.15369,
+      "grad_norm": 0.9916113308781349,
+      "learning_rate": 0.003,
+      "loss": 4.0001,
+      "step": 15369
+    },
+    {
+      "epoch": 0.1537,
+      "grad_norm": 1.521643615228874,
+      "learning_rate": 0.003,
+      "loss": 3.9913,
+      "step": 15370
+    },
+    {
+      "epoch": 0.15371,
+      "grad_norm": 1.3960310353379668,
+      "learning_rate": 0.003,
+      "loss": 4.0307,
+      "step": 15371
+    },
+    {
+      "epoch": 0.15372,
+      "grad_norm": 1.1601899119026602,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 15372
+    },
+    {
+      "epoch": 0.15373,
+      "grad_norm": 1.1807936206065663,
+      "learning_rate": 0.003,
+      "loss": 4.0273,
+      "step": 15373
+    },
+    {
+      "epoch": 0.15374,
+      "grad_norm": 1.127100500721332,
+      "learning_rate": 0.003,
+      "loss": 4.0255,
+      "step": 15374
+    },
+    {
+      "epoch": 0.15375,
+      "grad_norm": 1.031832545244089,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 15375
+    },
+    {
+      "epoch": 0.15376,
+      "grad_norm": 1.4215603656354039,
+      "learning_rate": 0.003,
+      "loss": 4.0348,
+      "step": 15376
+    },
+    {
+      "epoch": 0.15377,
+      "grad_norm": 1.0606115114134587,
+      "learning_rate": 0.003,
+      "loss": 4.0098,
+      "step": 15377
+    },
+    {
+      "epoch": 0.15378,
+      "grad_norm": 1.3744579165162751,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 15378
+    },
+    {
+      "epoch": 0.15379,
+      "grad_norm": 1.2139090258538139,
+      "learning_rate": 0.003,
+      "loss": 4.0502,
+      "step": 15379
+    },
+    {
+      "epoch": 0.1538,
+      "grad_norm": 1.2386901117854758,
+      "learning_rate": 0.003,
+      "loss": 4.0151,
+      "step": 15380
+    },
+    {
+      "epoch": 0.15381,
+      "grad_norm": 1.147397648133237,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 15381
+    },
+    {
+      "epoch": 0.15382,
+      "grad_norm": 1.332374579095073,
+      "learning_rate": 0.003,
+      "loss": 4.0298,
+      "step": 15382
+    },
+    {
+      "epoch": 0.15383,
+      "grad_norm": 1.0059314793160214,
+      "learning_rate": 0.003,
+      "loss": 4.04,
+      "step": 15383
+    },
+    {
+      "epoch": 0.15384,
+      "grad_norm": 1.2971956854525335,
+      "learning_rate": 0.003,
+      "loss": 4.0172,
+      "step": 15384
+    },
+    {
+      "epoch": 0.15385,
+      "grad_norm": 1.1863692569687379,
+      "learning_rate": 0.003,
+      "loss": 4.0357,
+      "step": 15385
+    },
+    {
+      "epoch": 0.15386,
+      "grad_norm": 1.147517167221806,
+      "learning_rate": 0.003,
+      "loss": 3.9865,
+      "step": 15386
+    },
+    {
+      "epoch": 0.15387,
+      "grad_norm": 1.0845869428548975,
+      "learning_rate": 0.003,
+      "loss": 3.9945,
+      "step": 15387
+    },
+    {
+      "epoch": 0.15388,
+      "grad_norm": 1.4415714589859954,
+      "learning_rate": 0.003,
+      "loss": 4.0346,
+      "step": 15388
+    },
+    {
+      "epoch": 0.15389,
+      "grad_norm": 1.01200996419366,
+      "learning_rate": 0.003,
+      "loss": 3.9958,
+      "step": 15389
+    },
+    {
+      "epoch": 0.1539,
+      "grad_norm": 1.1968289569868962,
+      "learning_rate": 0.003,
+      "loss": 4.0461,
+      "step": 15390
+    },
+    {
+      "epoch": 0.15391,
+      "grad_norm": 1.4124652006603855,
+      "learning_rate": 0.003,
+      "loss": 4.0278,
+      "step": 15391
+    },
+    {
+      "epoch": 0.15392,
+      "grad_norm": 1.1429694971029492,
+      "learning_rate": 0.003,
+      "loss": 4.0134,
+      "step": 15392
+    },
+    {
+      "epoch": 0.15393,
+      "grad_norm": 1.3989460206903803,
+      "learning_rate": 0.003,
+      "loss": 4.0319,
+      "step": 15393
+    },
+    {
+      "epoch": 0.15394,
+      "grad_norm": 1.1275817712338951,
+      "learning_rate": 0.003,
+      "loss": 3.9957,
+      "step": 15394
+    },
+    {
+      "epoch": 0.15395,
+      "grad_norm": 1.6527313295290693,
+      "learning_rate": 0.003,
+      "loss": 4.0371,
+      "step": 15395
+    },
+    {
+      "epoch": 0.15396,
+      "grad_norm": 1.1606484889693214,
+      "learning_rate": 0.003,
+      "loss": 3.996,
+      "step": 15396
+    },
+    {
+      "epoch": 0.15397,
+      "grad_norm": 1.3063989661917172,
+      "learning_rate": 0.003,
+      "loss": 4.0521,
+      "step": 15397
+    },
+    {
+      "epoch": 0.15398,
+      "grad_norm": 1.1175556092640544,
+      "learning_rate": 0.003,
+      "loss": 3.9907,
+      "step": 15398
+    },
+    {
+      "epoch": 0.15399,
+      "grad_norm": 1.2611769514814661,
+      "learning_rate": 0.003,
+      "loss": 3.9858,
+      "step": 15399
+    },
+    {
+      "epoch": 0.154,
+      "grad_norm": 1.0227787885978328,
+      "learning_rate": 0.003,
+      "loss": 3.9963,
+      "step": 15400
+    },
+    {
+      "epoch": 0.15401,
+      "grad_norm": 1.175010936201811,
+      "learning_rate": 0.003,
+      "loss": 4.0099,
+      "step": 15401
+    },
+    {
+      "epoch": 0.15402,
+      "grad_norm": 1.301898398708719,
+      "learning_rate": 0.003,
+      "loss": 4.0096,
+      "step": 15402
+    },
+    {
+      "epoch": 0.15403,
+      "grad_norm": 1.0108000392959706,
+      "learning_rate": 0.003,
+      "loss": 3.9907,
+      "step": 15403
+    },
+    {
+      "epoch": 0.15404,
+      "grad_norm": 1.3416860945801337,
+      "learning_rate": 0.003,
+      "loss": 3.9968,
+      "step": 15404
+    },
+    {
+      "epoch": 0.15405,
+      "grad_norm": 1.1570945452891142,
+      "learning_rate": 0.003,
+      "loss": 4.0544,
+      "step": 15405
+    },
+    {
+      "epoch": 0.15406,
+      "grad_norm": 1.3655121369212266,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 15406
+    },
+    {
+      "epoch": 0.15407,
+      "grad_norm": 1.3960011028879589,
+      "learning_rate": 0.003,
+      "loss": 3.9903,
+      "step": 15407
+    },
+    {
+      "epoch": 0.15408,
+      "grad_norm": 1.141425990161296,
+      "learning_rate": 0.003,
+      "loss": 4.0286,
+      "step": 15408
+    },
+    {
+      "epoch": 0.15409,
+      "grad_norm": 1.3061620835789476,
+      "learning_rate": 0.003,
+      "loss": 4.0432,
+      "step": 15409
+    },
+    {
+      "epoch": 0.1541,
+      "grad_norm": 0.9288136755410322,
+      "learning_rate": 0.003,
+      "loss": 4.025,
+      "step": 15410
+    },
+    {
+      "epoch": 0.15411,
+      "grad_norm": 1.2780898944597026,
+      "learning_rate": 0.003,
+      "loss": 4.0184,
+      "step": 15411
+    },
+    {
+      "epoch": 0.15412,
+      "grad_norm": 1.1165309242772365,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 15412
+    },
+    {
+      "epoch": 0.15413,
+      "grad_norm": 1.2588491423612371,
+      "learning_rate": 0.003,
+      "loss": 4.0502,
+      "step": 15413
+    },
+    {
+      "epoch": 0.15414,
+      "grad_norm": 1.153293339632718,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 15414
+    },
+    {
+      "epoch": 0.15415,
+      "grad_norm": 1.2763101314205272,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 15415
+    },
+    {
+      "epoch": 0.15416,
+      "grad_norm": 1.445718992177873,
+      "learning_rate": 0.003,
+      "loss": 4.0363,
+      "step": 15416
+    },
+    {
+      "epoch": 0.15417,
+      "grad_norm": 1.1010078858794556,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 15417
+    },
+    {
+      "epoch": 0.15418,
+      "grad_norm": 1.3389978908349613,
+      "learning_rate": 0.003,
+      "loss": 3.992,
+      "step": 15418
+    },
+    {
+      "epoch": 0.15419,
+      "grad_norm": 1.1161796140807403,
+      "learning_rate": 0.003,
+      "loss": 4.0063,
+      "step": 15419
+    },
+    {
+      "epoch": 0.1542,
+      "grad_norm": 1.2183616590008166,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 15420
+    },
+    {
+      "epoch": 0.15421,
+      "grad_norm": 1.0704559979666672,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 15421
+    },
+    {
+      "epoch": 0.15422,
+      "grad_norm": 1.0742924028397265,
+      "learning_rate": 0.003,
+      "loss": 3.9999,
+      "step": 15422
+    },
+    {
+      "epoch": 0.15423,
+      "grad_norm": 1.2111405828356576,
+      "learning_rate": 0.003,
+      "loss": 3.9702,
+      "step": 15423
+    },
+    {
+      "epoch": 0.15424,
+      "grad_norm": 1.0235465294837345,
+      "learning_rate": 0.003,
+      "loss": 4.0111,
+      "step": 15424
+    },
+    {
+      "epoch": 0.15425,
+      "grad_norm": 1.416013184766158,
+      "learning_rate": 0.003,
+      "loss": 4.0002,
+      "step": 15425
+    },
+    {
+      "epoch": 0.15426,
+      "grad_norm": 1.0329446806219278,
+      "learning_rate": 0.003,
+      "loss": 3.9894,
+      "step": 15426
+    },
+    {
+      "epoch": 0.15427,
+      "grad_norm": 1.3111226902340347,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 15427
+    },
+    {
+      "epoch": 0.15428,
+      "grad_norm": 1.330497005134314,
+      "learning_rate": 0.003,
+      "loss": 4.0357,
+      "step": 15428
+    },
+    {
+      "epoch": 0.15429,
+      "grad_norm": 1.403787119578501,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 15429
+    },
+    {
+      "epoch": 0.1543,
+      "grad_norm": 1.0628919762318099,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 15430
+    },
+    {
+      "epoch": 0.15431,
+      "grad_norm": 1.4918675346184322,
+      "learning_rate": 0.003,
+      "loss": 4.0452,
+      "step": 15431
+    },
+    {
+      "epoch": 0.15432,
+      "grad_norm": 1.0839100804515,
+      "learning_rate": 0.003,
+      "loss": 4.0181,
+      "step": 15432
+    },
+    {
+      "epoch": 0.15433,
+      "grad_norm": 1.3741229327105866,
+      "learning_rate": 0.003,
+      "loss": 4.0212,
+      "step": 15433
+    },
+    {
+      "epoch": 0.15434,
+      "grad_norm": 1.014335655092048,
+      "learning_rate": 0.003,
+      "loss": 3.9948,
+      "step": 15434
+    },
+    {
+      "epoch": 0.15435,
+      "grad_norm": 1.5198132343959803,
+      "learning_rate": 0.003,
+      "loss": 4.0167,
+      "step": 15435
+    },
+    {
+      "epoch": 0.15436,
+      "grad_norm": 1.2232189794341666,
+      "learning_rate": 0.003,
+      "loss": 4.0295,
+      "step": 15436
+    },
+    {
+      "epoch": 0.15437,
+      "grad_norm": 1.1922898524941339,
+      "learning_rate": 0.003,
+      "loss": 4.0086,
+      "step": 15437
+    },
+    {
+      "epoch": 0.15438,
+      "grad_norm": 1.4312060880257944,
+      "learning_rate": 0.003,
+      "loss": 4.049,
+      "step": 15438
+    },
+    {
+      "epoch": 0.15439,
+      "grad_norm": 1.3761522420073637,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 15439
+    },
+    {
+      "epoch": 0.1544,
+      "grad_norm": 1.1878663856256735,
+      "learning_rate": 0.003,
+      "loss": 3.9921,
+      "step": 15440
+    },
+    {
+      "epoch": 0.15441,
+      "grad_norm": 1.409308011730964,
+      "learning_rate": 0.003,
+      "loss": 4.0196,
+      "step": 15441
+    },
+    {
+      "epoch": 0.15442,
+      "grad_norm": 0.8818847587507577,
+      "learning_rate": 0.003,
+      "loss": 3.9957,
+      "step": 15442
+    },
+    {
+      "epoch": 0.15443,
+      "grad_norm": 1.181172095811864,
+      "learning_rate": 0.003,
+      "loss": 3.9859,
+      "step": 15443
+    },
+    {
+      "epoch": 0.15444,
+      "grad_norm": 1.1697211130303764,
+      "learning_rate": 0.003,
+      "loss": 4.0077,
+      "step": 15444
+    },
+    {
+      "epoch": 0.15445,
+      "grad_norm": 1.0309657250127224,
+      "learning_rate": 0.003,
+      "loss": 3.9706,
+      "step": 15445
+    },
+    {
+      "epoch": 0.15446,
+      "grad_norm": 1.3297375349823675,
+      "learning_rate": 0.003,
+      "loss": 4.0059,
+      "step": 15446
+    },
+    {
+      "epoch": 0.15447,
+      "grad_norm": 1.1139099407835233,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 15447
+    },
+    {
+      "epoch": 0.15448,
+      "grad_norm": 1.237655786195009,
+      "learning_rate": 0.003,
+      "loss": 4.0361,
+      "step": 15448
+    },
+    {
+      "epoch": 0.15449,
+      "grad_norm": 1.2789726132717136,
+      "learning_rate": 0.003,
+      "loss": 4.0021,
+      "step": 15449
+    },
+    {
+      "epoch": 0.1545,
+      "grad_norm": 1.2968964530353215,
+      "learning_rate": 0.003,
+      "loss": 4.0247,
+      "step": 15450
+    },
+    {
+      "epoch": 0.15451,
+      "grad_norm": 1.1922491983748065,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 15451
+    },
+    {
+      "epoch": 0.15452,
+      "grad_norm": 1.399508785372692,
+      "learning_rate": 0.003,
+      "loss": 3.9904,
+      "step": 15452
+    },
+    {
+      "epoch": 0.15453,
+      "grad_norm": 0.9931795257761599,
+      "learning_rate": 0.003,
+      "loss": 3.9983,
+      "step": 15453
+    },
+    {
+      "epoch": 0.15454,
+      "grad_norm": 1.343499007254131,
+      "learning_rate": 0.003,
+      "loss": 3.9967,
+      "step": 15454
+    },
+    {
+      "epoch": 0.15455,
+      "grad_norm": 1.0748854282639977,
+      "learning_rate": 0.003,
+      "loss": 4.0377,
+      "step": 15455
+    },
+    {
+      "epoch": 0.15456,
+      "grad_norm": 1.4505517385476958,
+      "learning_rate": 0.003,
+      "loss": 3.9885,
+      "step": 15456
+    },
+    {
+      "epoch": 0.15457,
+      "grad_norm": 1.1089755331302413,
+      "learning_rate": 0.003,
+      "loss": 4.02,
+      "step": 15457
+    },
+    {
+      "epoch": 0.15458,
+      "grad_norm": 1.4647167086584743,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 15458
+    },
+    {
+      "epoch": 0.15459,
+      "grad_norm": 1.0699807413746714,
+      "learning_rate": 0.003,
+      "loss": 4.007,
+      "step": 15459
+    },
+    {
+      "epoch": 0.1546,
+      "grad_norm": 1.425807860008332,
+      "learning_rate": 0.003,
+      "loss": 3.997,
+      "step": 15460
+    },
+    {
+      "epoch": 0.15461,
+      "grad_norm": 1.0401575014221385,
+      "learning_rate": 0.003,
+      "loss": 3.9854,
+      "step": 15461
+    },
+    {
+      "epoch": 0.15462,
+      "grad_norm": 1.3587624825650655,
+      "learning_rate": 0.003,
+      "loss": 3.9992,
+      "step": 15462
+    },
+    {
+      "epoch": 0.15463,
+      "grad_norm": 1.1371008023815934,
+      "learning_rate": 0.003,
+      "loss": 3.9814,
+      "step": 15463
+    },
+    {
+      "epoch": 0.15464,
+      "grad_norm": 1.2999090728439768,
+      "learning_rate": 0.003,
+      "loss": 4.0057,
+      "step": 15464
+    },
+    {
+      "epoch": 0.15465,
+      "grad_norm": 1.2658049390348993,
+      "learning_rate": 0.003,
+      "loss": 4.0006,
+      "step": 15465
+    },
+    {
+      "epoch": 0.15466,
+      "grad_norm": 1.1640506691228396,
+      "learning_rate": 0.003,
+      "loss": 4.0194,
+      "step": 15466
+    },
+    {
+      "epoch": 0.15467,
+      "grad_norm": 1.2667810282519953,
+      "learning_rate": 0.003,
+      "loss": 4.0141,
+      "step": 15467
+    },
+    {
+      "epoch": 0.15468,
+      "grad_norm": 1.0106880311117121,
+      "learning_rate": 0.003,
+      "loss": 3.9915,
+      "step": 15468
+    },
+    {
+      "epoch": 0.15469,
+      "grad_norm": 1.1254762579400168,
+      "learning_rate": 0.003,
+      "loss": 4.0291,
+      "step": 15469
+    },
+    {
+      "epoch": 0.1547,
+      "grad_norm": 1.3597427881980186,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 15470
+    },
+    {
+      "epoch": 0.15471,
+      "grad_norm": 1.0451997736704248,
+      "learning_rate": 0.003,
+      "loss": 4.0118,
+      "step": 15471
+    },
+    {
+      "epoch": 0.15472,
+      "grad_norm": 1.4638421033676579,
+      "learning_rate": 0.003,
+      "loss": 4.0075,
+      "step": 15472
+    },
+    {
+      "epoch": 0.15473,
+      "grad_norm": 0.9818904122801336,
+      "learning_rate": 0.003,
+      "loss": 4.0331,
+      "step": 15473
+    },
+    {
+      "epoch": 0.15474,
+      "grad_norm": 1.3113123003195055,
+      "learning_rate": 0.003,
+      "loss": 4.0068,
+      "step": 15474
+    },
+    {
+      "epoch": 0.15475,
+      "grad_norm": 1.0219220699211995,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 15475
+    },
+    {
+      "epoch": 0.15476,
+      "grad_norm": 1.3721315900566593,
+      "learning_rate": 0.003,
+      "loss": 4.0398,
+      "step": 15476
+    },
+    {
+      "epoch": 0.15477,
+      "grad_norm": 1.0382470290868924,
+      "learning_rate": 0.003,
+      "loss": 4.0223,
+      "step": 15477
+    },
+    {
+      "epoch": 0.15478,
+      "grad_norm": 1.391397070429852,
+      "learning_rate": 0.003,
+      "loss": 4.0154,
+      "step": 15478
+    },
+    {
+      "epoch": 0.15479,
+      "grad_norm": 1.4210267801203937,
+      "learning_rate": 0.003,
+      "loss": 4.0096,
+      "step": 15479
+    },
+    {
+      "epoch": 0.1548,
+      "grad_norm": 1.1612045895435392,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 15480
+    },
+    {
+      "epoch": 0.15481,
+      "grad_norm": 1.0937338117526525,
+      "learning_rate": 0.003,
+      "loss": 4.025,
+      "step": 15481
+    },
+    {
+      "epoch": 0.15482,
+      "grad_norm": 1.3604106267692817,
+      "learning_rate": 0.003,
+      "loss": 4.0149,
+      "step": 15482
+    },
+    {
+      "epoch": 0.15483,
+      "grad_norm": 1.1859626833742631,
+      "learning_rate": 0.003,
+      "loss": 4.0308,
+      "step": 15483
+    },
+    {
+      "epoch": 0.15484,
+      "grad_norm": 1.201582766515893,
+      "learning_rate": 0.003,
+      "loss": 4.0507,
+      "step": 15484
+    },
+    {
+      "epoch": 0.15485,
+      "grad_norm": 1.3486346050306894,
+      "learning_rate": 0.003,
+      "loss": 4.0426,
+      "step": 15485
+    },
+    {
+      "epoch": 0.15486,
+      "grad_norm": 1.0764828931379589,
+      "learning_rate": 0.003,
+      "loss": 4.0312,
+      "step": 15486
+    },
+    {
+      "epoch": 0.15487,
+      "grad_norm": 1.3408549143673125,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 15487
+    },
+    {
+      "epoch": 0.15488,
+      "grad_norm": 1.1300827202543724,
+      "learning_rate": 0.003,
+      "loss": 4.0215,
+      "step": 15488
+    },
+    {
+      "epoch": 0.15489,
+      "grad_norm": 1.1709510441950886,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 15489
+    },
+    {
+      "epoch": 0.1549,
+      "grad_norm": 1.249822152716845,
+      "learning_rate": 0.003,
+      "loss": 4.0212,
+      "step": 15490
+    },
+    {
+      "epoch": 0.15491,
+      "grad_norm": 1.291335979959531,
+      "learning_rate": 0.003,
+      "loss": 4.0006,
+      "step": 15491
+    },
+    {
+      "epoch": 0.15492,
+      "grad_norm": 1.3396259417898158,
+      "learning_rate": 0.003,
+      "loss": 4.0169,
+      "step": 15492
+    },
+    {
+      "epoch": 0.15493,
+      "grad_norm": 1.2814611940312006,
+      "learning_rate": 0.003,
+      "loss": 4.0109,
+      "step": 15493
+    },
+    {
+      "epoch": 0.15494,
+      "grad_norm": 1.1777452240371598,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 15494
+    },
+    {
+      "epoch": 0.15495,
+      "grad_norm": 1.1592037969328344,
+      "learning_rate": 0.003,
+      "loss": 4.0373,
+      "step": 15495
+    },
+    {
+      "epoch": 0.15496,
+      "grad_norm": 1.1044762203900087,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 15496
+    },
+    {
+      "epoch": 0.15497,
+      "grad_norm": 1.2668704762351173,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 15497
+    },
+    {
+      "epoch": 0.15498,
+      "grad_norm": 1.2622274894185679,
+      "learning_rate": 0.003,
+      "loss": 3.9687,
+      "step": 15498
+    },
+    {
+      "epoch": 0.15499,
+      "grad_norm": 1.1551704191426173,
+      "learning_rate": 0.003,
+      "loss": 4.0048,
+      "step": 15499
+    },
+    {
+      "epoch": 0.155,
+      "grad_norm": 1.2922799353230623,
+      "learning_rate": 0.003,
+      "loss": 4.0134,
+      "step": 15500
+    },
+    {
+      "epoch": 0.15501,
+      "grad_norm": 1.2177967791089919,
+      "learning_rate": 0.003,
+      "loss": 4.002,
+      "step": 15501
+    },
+    {
+      "epoch": 0.15502,
+      "grad_norm": 1.1380041505072185,
+      "learning_rate": 0.003,
+      "loss": 4.0177,
+      "step": 15502
+    },
+    {
+      "epoch": 0.15503,
+      "grad_norm": 1.2326803571856018,
+      "learning_rate": 0.003,
+      "loss": 4.0096,
+      "step": 15503
+    },
+    {
+      "epoch": 0.15504,
+      "grad_norm": 1.0677489422471353,
+      "learning_rate": 0.003,
+      "loss": 4.0288,
+      "step": 15504
+    },
+    {
+      "epoch": 0.15505,
+      "grad_norm": 1.4375907997925543,
+      "learning_rate": 0.003,
+      "loss": 4.0078,
+      "step": 15505
+    },
+    {
+      "epoch": 0.15506,
+      "grad_norm": 0.9940022896813783,
+      "learning_rate": 0.003,
+      "loss": 3.9985,
+      "step": 15506
+    },
+    {
+      "epoch": 0.15507,
+      "grad_norm": 1.8163683369923858,
+      "learning_rate": 0.003,
+      "loss": 4.0052,
+      "step": 15507
+    },
+    {
+      "epoch": 0.15508,
+      "grad_norm": 0.9821455908225213,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 15508
+    },
+    {
+      "epoch": 0.15509,
+      "grad_norm": 1.1321623600415103,
+      "learning_rate": 0.003,
+      "loss": 3.9932,
+      "step": 15509
+    },
+    {
+      "epoch": 0.1551,
+      "grad_norm": 1.4757627649908576,
+      "learning_rate": 0.003,
+      "loss": 4.0455,
+      "step": 15510
+    },
+    {
+      "epoch": 0.15511,
+      "grad_norm": 1.1836268113812825,
+      "learning_rate": 0.003,
+      "loss": 4.0479,
+      "step": 15511
+    },
+    {
+      "epoch": 0.15512,
+      "grad_norm": 1.4550460571051078,
+      "learning_rate": 0.003,
+      "loss": 4.052,
+      "step": 15512
+    },
+    {
+      "epoch": 0.15513,
+      "grad_norm": 1.0424281490788763,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 15513
+    },
+    {
+      "epoch": 0.15514,
+      "grad_norm": 1.3060251931644995,
+      "learning_rate": 0.003,
+      "loss": 4.0508,
+      "step": 15514
+    },
+    {
+      "epoch": 0.15515,
+      "grad_norm": 1.1840927008821804,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 15515
+    },
+    {
+      "epoch": 0.15516,
+      "grad_norm": 1.3355984599902573,
+      "learning_rate": 0.003,
+      "loss": 4.0317,
+      "step": 15516
+    },
+    {
+      "epoch": 0.15517,
+      "grad_norm": 1.3854475674976467,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 15517
+    },
+    {
+      "epoch": 0.15518,
+      "grad_norm": 1.1903115013555348,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 15518
+    },
+    {
+      "epoch": 0.15519,
+      "grad_norm": 1.3645758618524382,
+      "learning_rate": 0.003,
+      "loss": 4.003,
+      "step": 15519
+    },
+    {
+      "epoch": 0.1552,
+      "grad_norm": 1.1420658547143592,
+      "learning_rate": 0.003,
+      "loss": 4.0005,
+      "step": 15520
+    },
+    {
+      "epoch": 0.15521,
+      "grad_norm": 1.0788356898283442,
+      "learning_rate": 0.003,
+      "loss": 4.0388,
+      "step": 15521
+    },
+    {
+      "epoch": 0.15522,
+      "grad_norm": 1.2409217539764137,
+      "learning_rate": 0.003,
+      "loss": 4.0414,
+      "step": 15522
+    },
+    {
+      "epoch": 0.15523,
+      "grad_norm": 1.1395473943660315,
+      "learning_rate": 0.003,
+      "loss": 4.0306,
+      "step": 15523
+    },
+    {
+      "epoch": 0.15524,
+      "grad_norm": 1.1662676153801086,
+      "learning_rate": 0.003,
+      "loss": 4.0264,
+      "step": 15524
+    },
+    {
+      "epoch": 0.15525,
+      "grad_norm": 1.2325072549210612,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 15525
+    },
+    {
+      "epoch": 0.15526,
+      "grad_norm": 1.1737878425195596,
+      "learning_rate": 0.003,
+      "loss": 4.0155,
+      "step": 15526
+    },
+    {
+      "epoch": 0.15527,
+      "grad_norm": 1.3796725225222926,
+      "learning_rate": 0.003,
+      "loss": 4.0026,
+      "step": 15527
+    },
+    {
+      "epoch": 0.15528,
+      "grad_norm": 1.2538521278635026,
+      "learning_rate": 0.003,
+      "loss": 3.9899,
+      "step": 15528
+    },
+    {
+      "epoch": 0.15529,
+      "grad_norm": 1.1800656715900641,
+      "learning_rate": 0.003,
+      "loss": 4.0005,
+      "step": 15529
+    },
+    {
+      "epoch": 0.1553,
+      "grad_norm": 1.205066071322585,
+      "learning_rate": 0.003,
+      "loss": 4.0406,
+      "step": 15530
+    },
+    {
+      "epoch": 0.15531,
+      "grad_norm": 1.019614767721003,
+      "learning_rate": 0.003,
+      "loss": 3.9985,
+      "step": 15531
+    },
+    {
+      "epoch": 0.15532,
+      "grad_norm": 1.1759045019842371,
+      "learning_rate": 0.003,
+      "loss": 4.0411,
+      "step": 15532
+    },
+    {
+      "epoch": 0.15533,
+      "grad_norm": 1.1854990433424775,
+      "learning_rate": 0.003,
+      "loss": 4.025,
+      "step": 15533
+    },
+    {
+      "epoch": 0.15534,
+      "grad_norm": 1.196400638478638,
+      "learning_rate": 0.003,
+      "loss": 4.0329,
+      "step": 15534
+    },
+    {
+      "epoch": 0.15535,
+      "grad_norm": 1.5511982731401823,
+      "learning_rate": 0.003,
+      "loss": 3.9901,
+      "step": 15535
+    },
+    {
+      "epoch": 0.15536,
+      "grad_norm": 0.8202768827545235,
+      "learning_rate": 0.003,
+      "loss": 3.9996,
+      "step": 15536
+    },
+    {
+      "epoch": 0.15537,
+      "grad_norm": 1.0404641833258768,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 15537
+    },
+    {
+      "epoch": 0.15538,
+      "grad_norm": 1.3069588417679192,
+      "learning_rate": 0.003,
+      "loss": 4.0073,
+      "step": 15538
+    },
+    {
+      "epoch": 0.15539,
+      "grad_norm": 1.272868134888428,
+      "learning_rate": 0.003,
+      "loss": 4.042,
+      "step": 15539
+    },
+    {
+      "epoch": 0.1554,
+      "grad_norm": 1.427051787014361,
+      "learning_rate": 0.003,
+      "loss": 4.0496,
+      "step": 15540
+    },
+    {
+      "epoch": 0.15541,
+      "grad_norm": 1.1839923373571692,
+      "learning_rate": 0.003,
+      "loss": 4.0354,
+      "step": 15541
+    },
+    {
+      "epoch": 0.15542,
+      "grad_norm": 1.3716600078861727,
+      "learning_rate": 0.003,
+      "loss": 3.9929,
+      "step": 15542
+    },
+    {
+      "epoch": 0.15543,
+      "grad_norm": 1.0728442089606187,
+      "learning_rate": 0.003,
+      "loss": 3.9863,
+      "step": 15543
+    },
+    {
+      "epoch": 0.15544,
+      "grad_norm": 1.3486558614426765,
+      "learning_rate": 0.003,
+      "loss": 4.0608,
+      "step": 15544
+    },
+    {
+      "epoch": 0.15545,
+      "grad_norm": 1.1889424087910203,
+      "learning_rate": 0.003,
+      "loss": 3.9897,
+      "step": 15545
+    },
+    {
+      "epoch": 0.15546,
+      "grad_norm": 1.4268357117744535,
+      "learning_rate": 0.003,
+      "loss": 4.0381,
+      "step": 15546
+    },
+    {
+      "epoch": 0.15547,
+      "grad_norm": 1.1236587367880921,
+      "learning_rate": 0.003,
+      "loss": 4.0149,
+      "step": 15547
+    },
+    {
+      "epoch": 0.15548,
+      "grad_norm": 1.4448499863917188,
+      "learning_rate": 0.003,
+      "loss": 4.0312,
+      "step": 15548
+    },
+    {
+      "epoch": 0.15549,
+      "grad_norm": 0.9604154047060119,
+      "learning_rate": 0.003,
+      "loss": 4.0269,
+      "step": 15549
+    },
+    {
+      "epoch": 0.1555,
+      "grad_norm": 1.257451362138832,
+      "learning_rate": 0.003,
+      "loss": 4.0252,
+      "step": 15550
+    },
+    {
+      "epoch": 0.15551,
+      "grad_norm": 1.1220385706073506,
+      "learning_rate": 0.003,
+      "loss": 4.0308,
+      "step": 15551
+    },
+    {
+      "epoch": 0.15552,
+      "grad_norm": 1.3838135909506402,
+      "learning_rate": 0.003,
+      "loss": 4.0015,
+      "step": 15552
+    },
+    {
+      "epoch": 0.15553,
+      "grad_norm": 0.9600748836538427,
+      "learning_rate": 0.003,
+      "loss": 3.9742,
+      "step": 15553
+    },
+    {
+      "epoch": 0.15554,
+      "grad_norm": 1.1852568034872384,
+      "learning_rate": 0.003,
+      "loss": 4.0026,
+      "step": 15554
+    },
+    {
+      "epoch": 0.15555,
+      "grad_norm": 1.174714423688401,
+      "learning_rate": 0.003,
+      "loss": 3.9802,
+      "step": 15555
+    },
+    {
+      "epoch": 0.15556,
+      "grad_norm": 1.2123765713461248,
+      "learning_rate": 0.003,
+      "loss": 3.9925,
+      "step": 15556
+    },
+    {
+      "epoch": 0.15557,
+      "grad_norm": 1.1638765547588363,
+      "learning_rate": 0.003,
+      "loss": 4.0194,
+      "step": 15557
+    },
+    {
+      "epoch": 0.15558,
+      "grad_norm": 1.0850237294958363,
+      "learning_rate": 0.003,
+      "loss": 4.012,
+      "step": 15558
+    },
+    {
+      "epoch": 0.15559,
+      "grad_norm": 1.5111875014671927,
+      "learning_rate": 0.003,
+      "loss": 4.0113,
+      "step": 15559
+    },
+    {
+      "epoch": 0.1556,
+      "grad_norm": 1.4196270330515373,
+      "learning_rate": 0.003,
+      "loss": 4.0336,
+      "step": 15560
+    },
+    {
+      "epoch": 0.15561,
+      "grad_norm": 1.2845578894800977,
+      "learning_rate": 0.003,
+      "loss": 3.9977,
+      "step": 15561
+    },
+    {
+      "epoch": 0.15562,
+      "grad_norm": 1.107438276853264,
+      "learning_rate": 0.003,
+      "loss": 4.0122,
+      "step": 15562
+    },
+    {
+      "epoch": 0.15563,
+      "grad_norm": 0.9621144226586266,
+      "learning_rate": 0.003,
+      "loss": 4.0299,
+      "step": 15563
+    },
+    {
+      "epoch": 0.15564,
+      "grad_norm": 1.2080448978519494,
+      "learning_rate": 0.003,
+      "loss": 4.0177,
+      "step": 15564
+    },
+    {
+      "epoch": 0.15565,
+      "grad_norm": 1.177355515944768,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 15565
+    },
+    {
+      "epoch": 0.15566,
+      "grad_norm": 1.3013818446067609,
+      "learning_rate": 0.003,
+      "loss": 4.0041,
+      "step": 15566
+    },
+    {
+      "epoch": 0.15567,
+      "grad_norm": 1.4693577918031457,
+      "learning_rate": 0.003,
+      "loss": 4.0135,
+      "step": 15567
+    },
+    {
+      "epoch": 0.15568,
+      "grad_norm": 1.1977300101325452,
+      "learning_rate": 0.003,
+      "loss": 4.0105,
+      "step": 15568
+    },
+    {
+      "epoch": 0.15569,
+      "grad_norm": 1.30836741460951,
+      "learning_rate": 0.003,
+      "loss": 3.9963,
+      "step": 15569
+    },
+    {
+      "epoch": 0.1557,
+      "grad_norm": 1.1907578962424745,
+      "learning_rate": 0.003,
+      "loss": 4.0001,
+      "step": 15570
+    },
+    {
+      "epoch": 0.15571,
+      "grad_norm": 1.4503119718230573,
+      "learning_rate": 0.003,
+      "loss": 4.0035,
+      "step": 15571
+    },
+    {
+      "epoch": 0.15572,
+      "grad_norm": 1.278895214753018,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 15572
+    },
+    {
+      "epoch": 0.15573,
+      "grad_norm": 1.0395796991744881,
+      "learning_rate": 0.003,
+      "loss": 4.0249,
+      "step": 15573
+    },
+    {
+      "epoch": 0.15574,
+      "grad_norm": 1.6556038524578698,
+      "learning_rate": 0.003,
+      "loss": 4.0414,
+      "step": 15574
+    },
+    {
+      "epoch": 0.15575,
+      "grad_norm": 1.0835814542160225,
+      "learning_rate": 0.003,
+      "loss": 4.0041,
+      "step": 15575
+    },
+    {
+      "epoch": 0.15576,
+      "grad_norm": 1.5193905404710812,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 15576
+    },
+    {
+      "epoch": 0.15577,
+      "grad_norm": 1.1346197793117552,
+      "learning_rate": 0.003,
+      "loss": 4.0283,
+      "step": 15577
+    },
+    {
+      "epoch": 0.15578,
+      "grad_norm": 1.1313532216410387,
+      "learning_rate": 0.003,
+      "loss": 4.0194,
+      "step": 15578
+    },
+    {
+      "epoch": 0.15579,
+      "grad_norm": 1.4615977378034375,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 15579
+    },
+    {
+      "epoch": 0.1558,
+      "grad_norm": 1.1023308878737557,
+      "learning_rate": 0.003,
+      "loss": 4.0418,
+      "step": 15580
+    },
+    {
+      "epoch": 0.15581,
+      "grad_norm": 1.3832846645080352,
+      "learning_rate": 0.003,
+      "loss": 4.0121,
+      "step": 15581
+    },
+    {
+      "epoch": 0.15582,
+      "grad_norm": 1.1717556196242371,
+      "learning_rate": 0.003,
+      "loss": 4.0298,
+      "step": 15582
+    },
+    {
+      "epoch": 0.15583,
+      "grad_norm": 1.4744072510739992,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 15583
+    },
+    {
+      "epoch": 0.15584,
+      "grad_norm": 1.052391300109064,
+      "learning_rate": 0.003,
+      "loss": 3.9918,
+      "step": 15584
+    },
+    {
+      "epoch": 0.15585,
+      "grad_norm": 1.3762594933567556,
+      "learning_rate": 0.003,
+      "loss": 4.0486,
+      "step": 15585
+    },
+    {
+      "epoch": 0.15586,
+      "grad_norm": 1.2243391222677933,
+      "learning_rate": 0.003,
+      "loss": 3.9805,
+      "step": 15586
+    },
+    {
+      "epoch": 0.15587,
+      "grad_norm": 1.267717794998198,
+      "learning_rate": 0.003,
+      "loss": 3.996,
+      "step": 15587
+    },
+    {
+      "epoch": 0.15588,
+      "grad_norm": 1.3117570916822947,
+      "learning_rate": 0.003,
+      "loss": 4.0084,
+      "step": 15588
+    },
+    {
+      "epoch": 0.15589,
+      "grad_norm": 1.1515375084571582,
+      "learning_rate": 0.003,
+      "loss": 3.9849,
+      "step": 15589
+    },
+    {
+      "epoch": 0.1559,
+      "grad_norm": 1.2237350512132354,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 15590
+    },
+    {
+      "epoch": 0.15591,
+      "grad_norm": 1.2770771620758707,
+      "learning_rate": 0.003,
+      "loss": 4.0237,
+      "step": 15591
+    },
+    {
+      "epoch": 0.15592,
+      "grad_norm": 1.2947035832215696,
+      "learning_rate": 0.003,
+      "loss": 4.0377,
+      "step": 15592
+    },
+    {
+      "epoch": 0.15593,
+      "grad_norm": 1.131252977144435,
+      "learning_rate": 0.003,
+      "loss": 3.98,
+      "step": 15593
+    },
+    {
+      "epoch": 0.15594,
+      "grad_norm": 1.060910237472633,
+      "learning_rate": 0.003,
+      "loss": 3.9944,
+      "step": 15594
+    },
+    {
+      "epoch": 0.15595,
+      "grad_norm": 1.1386813912532787,
+      "learning_rate": 0.003,
+      "loss": 3.985,
+      "step": 15595
+    },
+    {
+      "epoch": 0.15596,
+      "grad_norm": 1.038484224163973,
+      "learning_rate": 0.003,
+      "loss": 3.9861,
+      "step": 15596
+    },
+    {
+      "epoch": 0.15597,
+      "grad_norm": 1.3268715049524036,
+      "learning_rate": 0.003,
+      "loss": 4.0252,
+      "step": 15597
+    },
+    {
+      "epoch": 0.15598,
+      "grad_norm": 1.1932150584347168,
+      "learning_rate": 0.003,
+      "loss": 4.0073,
+      "step": 15598
+    },
+    {
+      "epoch": 0.15599,
+      "grad_norm": 1.631553122367338,
+      "learning_rate": 0.003,
+      "loss": 4.001,
+      "step": 15599
+    },
+    {
+      "epoch": 0.156,
+      "grad_norm": 1.031280774003672,
+      "learning_rate": 0.003,
+      "loss": 3.9912,
+      "step": 15600
+    },
+    {
+      "epoch": 0.15601,
+      "grad_norm": 1.3259177715354908,
+      "learning_rate": 0.003,
+      "loss": 4.012,
+      "step": 15601
+    },
+    {
+      "epoch": 0.15602,
+      "grad_norm": 1.3552168329577372,
+      "learning_rate": 0.003,
+      "loss": 4.0344,
+      "step": 15602
+    },
+    {
+      "epoch": 0.15603,
+      "grad_norm": 1.05154774472057,
+      "learning_rate": 0.003,
+      "loss": 3.9867,
+      "step": 15603
+    },
+    {
+      "epoch": 0.15604,
+      "grad_norm": 1.4783019636415102,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 15604
+    },
+    {
+      "epoch": 0.15605,
+      "grad_norm": 1.1577328894270635,
+      "learning_rate": 0.003,
+      "loss": 4.0209,
+      "step": 15605
+    },
+    {
+      "epoch": 0.15606,
+      "grad_norm": 1.378054590499981,
+      "learning_rate": 0.003,
+      "loss": 4.0241,
+      "step": 15606
+    },
+    {
+      "epoch": 0.15607,
+      "grad_norm": 1.196110950152625,
+      "learning_rate": 0.003,
+      "loss": 4.0065,
+      "step": 15607
+    },
+    {
+      "epoch": 0.15608,
+      "grad_norm": 1.257660491301664,
+      "learning_rate": 0.003,
+      "loss": 4.02,
+      "step": 15608
+    },
+    {
+      "epoch": 0.15609,
+      "grad_norm": 1.3290291137316042,
+      "learning_rate": 0.003,
+      "loss": 4.0316,
+      "step": 15609
+    },
+    {
+      "epoch": 0.1561,
+      "grad_norm": 1.0273929162359139,
+      "learning_rate": 0.003,
+      "loss": 3.9939,
+      "step": 15610
+    },
+    {
+      "epoch": 0.15611,
+      "grad_norm": 1.1489898023789653,
+      "learning_rate": 0.003,
+      "loss": 4.0234,
+      "step": 15611
+    },
+    {
+      "epoch": 0.15612,
+      "grad_norm": 1.1284407520548465,
+      "learning_rate": 0.003,
+      "loss": 3.9916,
+      "step": 15612
+    },
+    {
+      "epoch": 0.15613,
+      "grad_norm": 1.362815278658877,
+      "learning_rate": 0.003,
+      "loss": 3.9967,
+      "step": 15613
+    },
+    {
+      "epoch": 0.15614,
+      "grad_norm": 1.0728144070534393,
+      "learning_rate": 0.003,
+      "loss": 3.997,
+      "step": 15614
+    },
+    {
+      "epoch": 0.15615,
+      "grad_norm": 1.6027777923758262,
+      "learning_rate": 0.003,
+      "loss": 4.0024,
+      "step": 15615
+    },
+    {
+      "epoch": 0.15616,
+      "grad_norm": 1.0754283703159526,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 15616
+    },
+    {
+      "epoch": 0.15617,
+      "grad_norm": 1.526599269818748,
+      "learning_rate": 0.003,
+      "loss": 4.0442,
+      "step": 15617
+    },
+    {
+      "epoch": 0.15618,
+      "grad_norm": 1.0939402000174385,
+      "learning_rate": 0.003,
+      "loss": 3.9936,
+      "step": 15618
+    },
+    {
+      "epoch": 0.15619,
+      "grad_norm": 1.2494915990304731,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 15619
+    },
+    {
+      "epoch": 0.1562,
+      "grad_norm": 1.226717648159094,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 15620
+    },
+    {
+      "epoch": 0.15621,
+      "grad_norm": 1.3602513260753404,
+      "learning_rate": 0.003,
+      "loss": 4.0001,
+      "step": 15621
+    },
+    {
+      "epoch": 0.15622,
+      "grad_norm": 1.1010272012241384,
+      "learning_rate": 0.003,
+      "loss": 4.0337,
+      "step": 15622
+    },
+    {
+      "epoch": 0.15623,
+      "grad_norm": 1.4585470776423317,
+      "learning_rate": 0.003,
+      "loss": 4.0011,
+      "step": 15623
+    },
+    {
+      "epoch": 0.15624,
+      "grad_norm": 1.0050101911556795,
+      "learning_rate": 0.003,
+      "loss": 4.0406,
+      "step": 15624
+    },
+    {
+      "epoch": 0.15625,
+      "grad_norm": 1.2981447508412527,
+      "learning_rate": 0.003,
+      "loss": 4.0104,
+      "step": 15625
+    },
+    {
+      "epoch": 0.15626,
+      "grad_norm": 1.170417358387042,
+      "learning_rate": 0.003,
+      "loss": 4.0154,
+      "step": 15626
+    },
+    {
+      "epoch": 0.15627,
+      "grad_norm": 1.2314174237285826,
+      "learning_rate": 0.003,
+      "loss": 4.0233,
+      "step": 15627
+    },
+    {
+      "epoch": 0.15628,
+      "grad_norm": 1.1524994522301018,
+      "learning_rate": 0.003,
+      "loss": 3.9894,
+      "step": 15628
+    },
+    {
+      "epoch": 0.15629,
+      "grad_norm": 1.318418554553263,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 15629
+    },
+    {
+      "epoch": 0.1563,
+      "grad_norm": 1.3949926386579614,
+      "learning_rate": 0.003,
+      "loss": 4.0342,
+      "step": 15630
+    },
+    {
+      "epoch": 0.15631,
+      "grad_norm": 1.0556249059216092,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 15631
+    },
+    {
+      "epoch": 0.15632,
+      "grad_norm": 1.2179348218164991,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 15632
+    },
+    {
+      "epoch": 0.15633,
+      "grad_norm": 1.3034648865720937,
+      "learning_rate": 0.003,
+      "loss": 4.0378,
+      "step": 15633
+    },
+    {
+      "epoch": 0.15634,
+      "grad_norm": 1.1642828471343658,
+      "learning_rate": 0.003,
+      "loss": 4.0319,
+      "step": 15634
+    },
+    {
+      "epoch": 0.15635,
+      "grad_norm": 1.3870366758727755,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 15635
+    },
+    {
+      "epoch": 0.15636,
+      "grad_norm": 1.162394387820439,
+      "learning_rate": 0.003,
+      "loss": 4.0113,
+      "step": 15636
+    },
+    {
+      "epoch": 0.15637,
+      "grad_norm": 1.1264573153720103,
+      "learning_rate": 0.003,
+      "loss": 4.0354,
+      "step": 15637
+    },
+    {
+      "epoch": 0.15638,
+      "grad_norm": 1.2210700342418637,
+      "learning_rate": 0.003,
+      "loss": 3.9898,
+      "step": 15638
+    },
+    {
+      "epoch": 0.15639,
+      "grad_norm": 1.3816056938231887,
+      "learning_rate": 0.003,
+      "loss": 3.9979,
+      "step": 15639
+    },
+    {
+      "epoch": 0.1564,
+      "grad_norm": 1.1240051895362912,
+      "learning_rate": 0.003,
+      "loss": 4.0196,
+      "step": 15640
+    },
+    {
+      "epoch": 0.15641,
+      "grad_norm": 1.2286781372547828,
+      "learning_rate": 0.003,
+      "loss": 3.9984,
+      "step": 15641
+    },
+    {
+      "epoch": 0.15642,
+      "grad_norm": 1.0770756354352453,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 15642
+    },
+    {
+      "epoch": 0.15643,
+      "grad_norm": 1.1868289717266771,
+      "learning_rate": 0.003,
+      "loss": 4.0329,
+      "step": 15643
+    },
+    {
+      "epoch": 0.15644,
+      "grad_norm": 1.2934548543139381,
+      "learning_rate": 0.003,
+      "loss": 4.0022,
+      "step": 15644
+    },
+    {
+      "epoch": 0.15645,
+      "grad_norm": 1.0885130534618273,
+      "learning_rate": 0.003,
+      "loss": 4.0295,
+      "step": 15645
+    },
+    {
+      "epoch": 0.15646,
+      "grad_norm": 1.4594782536308275,
+      "learning_rate": 0.003,
+      "loss": 3.986,
+      "step": 15646
+    },
+    {
+      "epoch": 0.15647,
+      "grad_norm": 1.171122790155785,
+      "learning_rate": 0.003,
+      "loss": 3.9981,
+      "step": 15647
+    },
+    {
+      "epoch": 0.15648,
+      "grad_norm": 1.3247598811351102,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 15648
+    },
+    {
+      "epoch": 0.15649,
+      "grad_norm": 1.0884078643809036,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 15649
+    },
+    {
+      "epoch": 0.1565,
+      "grad_norm": 1.2625242738306834,
+      "learning_rate": 0.003,
+      "loss": 3.9649,
+      "step": 15650
+    },
+    {
+      "epoch": 0.15651,
+      "grad_norm": 1.2590093124234436,
+      "learning_rate": 0.003,
+      "loss": 4.0198,
+      "step": 15651
+    },
+    {
+      "epoch": 0.15652,
+      "grad_norm": 1.0386595781942116,
+      "learning_rate": 0.003,
+      "loss": 4.0303,
+      "step": 15652
+    },
+    {
+      "epoch": 0.15653,
+      "grad_norm": 1.4220855535299173,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 15653
+    },
+    {
+      "epoch": 0.15654,
+      "grad_norm": 1.2390563975423328,
+      "learning_rate": 0.003,
+      "loss": 4.0069,
+      "step": 15654
+    },
+    {
+      "epoch": 0.15655,
+      "grad_norm": 1.5156655582788026,
+      "learning_rate": 0.003,
+      "loss": 4.059,
+      "step": 15655
+    },
+    {
+      "epoch": 0.15656,
+      "grad_norm": 1.0683641273964668,
+      "learning_rate": 0.003,
+      "loss": 4.0011,
+      "step": 15656
+    },
+    {
+      "epoch": 0.15657,
+      "grad_norm": 1.3594544693303718,
+      "learning_rate": 0.003,
+      "loss": 4.0186,
+      "step": 15657
+    },
+    {
+      "epoch": 0.15658,
+      "grad_norm": 1.2731269504661384,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 15658
+    },
+    {
+      "epoch": 0.15659,
+      "grad_norm": 1.2439084811529881,
+      "learning_rate": 0.003,
+      "loss": 4.032,
+      "step": 15659
+    },
+    {
+      "epoch": 0.1566,
+      "grad_norm": 1.3989181078805446,
+      "learning_rate": 0.003,
+      "loss": 4.0327,
+      "step": 15660
+    },
+    {
+      "epoch": 0.15661,
+      "grad_norm": 1.1266789991409576,
+      "learning_rate": 0.003,
+      "loss": 4.0046,
+      "step": 15661
+    },
+    {
+      "epoch": 0.15662,
+      "grad_norm": 1.25549306388821,
+      "learning_rate": 0.003,
+      "loss": 4.0019,
+      "step": 15662
+    },
+    {
+      "epoch": 0.15663,
+      "grad_norm": 1.2024359146994443,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 15663
+    },
+    {
+      "epoch": 0.15664,
+      "grad_norm": 1.4445860300332145,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 15664
+    },
+    {
+      "epoch": 0.15665,
+      "grad_norm": 1.0638694520822147,
+      "learning_rate": 0.003,
+      "loss": 4.0149,
+      "step": 15665
+    },
+    {
+      "epoch": 0.15666,
+      "grad_norm": 1.35890651645828,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 15666
+    },
+    {
+      "epoch": 0.15667,
+      "grad_norm": 1.1594542945395403,
+      "learning_rate": 0.003,
+      "loss": 4.0163,
+      "step": 15667
+    },
+    {
+      "epoch": 0.15668,
+      "grad_norm": 1.3836444966834545,
+      "learning_rate": 0.003,
+      "loss": 3.9981,
+      "step": 15668
+    },
+    {
+      "epoch": 0.15669,
+      "grad_norm": 1.2809819371574236,
+      "learning_rate": 0.003,
+      "loss": 4.0105,
+      "step": 15669
+    },
+    {
+      "epoch": 0.1567,
+      "grad_norm": 1.1595670146662453,
+      "learning_rate": 0.003,
+      "loss": 3.9962,
+      "step": 15670
+    },
+    {
+      "epoch": 0.15671,
+      "grad_norm": 1.4693245437056486,
+      "learning_rate": 0.003,
+      "loss": 4.0188,
+      "step": 15671
+    },
+    {
+      "epoch": 0.15672,
+      "grad_norm": 0.9537301411473873,
+      "learning_rate": 0.003,
+      "loss": 4.0031,
+      "step": 15672
+    },
+    {
+      "epoch": 0.15673,
+      "grad_norm": 1.2160253322792343,
+      "learning_rate": 0.003,
+      "loss": 4.036,
+      "step": 15673
+    },
+    {
+      "epoch": 0.15674,
+      "grad_norm": 1.1795293676711307,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 15674
+    },
+    {
+      "epoch": 0.15675,
+      "grad_norm": 1.311684259378266,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 15675
+    },
+    {
+      "epoch": 0.15676,
+      "grad_norm": 1.2582466749306869,
+      "learning_rate": 0.003,
+      "loss": 4.0361,
+      "step": 15676
+    },
+    {
+      "epoch": 0.15677,
+      "grad_norm": 1.3351466316642864,
+      "learning_rate": 0.003,
+      "loss": 3.991,
+      "step": 15677
+    },
+    {
+      "epoch": 0.15678,
+      "grad_norm": 1.0633834566426348,
+      "learning_rate": 0.003,
+      "loss": 4.0172,
+      "step": 15678
+    },
+    {
+      "epoch": 0.15679,
+      "grad_norm": 1.37332463707254,
+      "learning_rate": 0.003,
+      "loss": 4.028,
+      "step": 15679
+    },
+    {
+      "epoch": 0.1568,
+      "grad_norm": 1.1712721225087668,
+      "learning_rate": 0.003,
+      "loss": 4.0049,
+      "step": 15680
+    },
+    {
+      "epoch": 0.15681,
+      "grad_norm": 1.3630506657556807,
+      "learning_rate": 0.003,
+      "loss": 4.0497,
+      "step": 15681
+    },
+    {
+      "epoch": 0.15682,
+      "grad_norm": 1.2004379728704972,
+      "learning_rate": 0.003,
+      "loss": 4.0111,
+      "step": 15682
+    },
+    {
+      "epoch": 0.15683,
+      "grad_norm": 1.2342667491771402,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 15683
+    },
+    {
+      "epoch": 0.15684,
+      "grad_norm": 1.1773798120343846,
+      "learning_rate": 0.003,
+      "loss": 4.025,
+      "step": 15684
+    },
+    {
+      "epoch": 0.15685,
+      "grad_norm": 1.0595683897554626,
+      "learning_rate": 0.003,
+      "loss": 4.0105,
+      "step": 15685
+    },
+    {
+      "epoch": 0.15686,
+      "grad_norm": 1.3224476326633003,
+      "learning_rate": 0.003,
+      "loss": 4.0325,
+      "step": 15686
+    },
+    {
+      "epoch": 0.15687,
+      "grad_norm": 1.191516704767411,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 15687
+    },
+    {
+      "epoch": 0.15688,
+      "grad_norm": 1.3277753161978578,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 15688
+    },
+    {
+      "epoch": 0.15689,
+      "grad_norm": 1.05958924159554,
+      "learning_rate": 0.003,
+      "loss": 4.0307,
+      "step": 15689
+    },
+    {
+      "epoch": 0.1569,
+      "grad_norm": 1.4217155196174134,
+      "learning_rate": 0.003,
+      "loss": 4.0075,
+      "step": 15690
+    },
+    {
+      "epoch": 0.15691,
+      "grad_norm": 1.075912120136982,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 15691
+    },
+    {
+      "epoch": 0.15692,
+      "grad_norm": 1.3995474722503682,
+      "learning_rate": 0.003,
+      "loss": 3.9982,
+      "step": 15692
+    },
+    {
+      "epoch": 0.15693,
+      "grad_norm": 1.0650293846026575,
+      "learning_rate": 0.003,
+      "loss": 4.0017,
+      "step": 15693
+    },
+    {
+      "epoch": 0.15694,
+      "grad_norm": 1.111432765077753,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 15694
+    },
+    {
+      "epoch": 0.15695,
+      "grad_norm": 1.2400496626879633,
+      "learning_rate": 0.003,
+      "loss": 4.0093,
+      "step": 15695
+    },
+    {
+      "epoch": 0.15696,
+      "grad_norm": 1.2820035726545784,
+      "learning_rate": 0.003,
+      "loss": 4.0088,
+      "step": 15696
+    },
+    {
+      "epoch": 0.15697,
+      "grad_norm": 1.144222892582624,
+      "learning_rate": 0.003,
+      "loss": 4.0072,
+      "step": 15697
+    },
+    {
+      "epoch": 0.15698,
+      "grad_norm": 1.2560877918014903,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 15698
+    },
+    {
+      "epoch": 0.15699,
+      "grad_norm": 1.1004322491024552,
+      "learning_rate": 0.003,
+      "loss": 4.0481,
+      "step": 15699
+    },
+    {
+      "epoch": 0.157,
+      "grad_norm": 1.1219445606257716,
+      "learning_rate": 0.003,
+      "loss": 3.9872,
+      "step": 15700
+    },
+    {
+      "epoch": 0.15701,
+      "grad_norm": 1.2534425420426756,
+      "learning_rate": 0.003,
+      "loss": 3.9935,
+      "step": 15701
+    },
+    {
+      "epoch": 0.15702,
+      "grad_norm": 1.0829170645941426,
+      "learning_rate": 0.003,
+      "loss": 4.0037,
+      "step": 15702
+    },
+    {
+      "epoch": 0.15703,
+      "grad_norm": 1.361611697850092,
+      "learning_rate": 0.003,
+      "loss": 4.0187,
+      "step": 15703
+    },
+    {
+      "epoch": 0.15704,
+      "grad_norm": 1.1696394963040575,
+      "learning_rate": 0.003,
+      "loss": 4.0139,
+      "step": 15704
+    },
+    {
+      "epoch": 0.15705,
+      "grad_norm": 1.1887616996703583,
+      "learning_rate": 0.003,
+      "loss": 3.9973,
+      "step": 15705
+    },
+    {
+      "epoch": 0.15706,
+      "grad_norm": 1.2745663354960037,
+      "learning_rate": 0.003,
+      "loss": 3.9953,
+      "step": 15706
+    },
+    {
+      "epoch": 0.15707,
+      "grad_norm": 1.2381550064409779,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 15707
+    },
+    {
+      "epoch": 0.15708,
+      "grad_norm": 1.2739542548057263,
+      "learning_rate": 0.003,
+      "loss": 4.0173,
+      "step": 15708
+    },
+    {
+      "epoch": 0.15709,
+      "grad_norm": 1.2398633286941458,
+      "learning_rate": 0.003,
+      "loss": 4.0132,
+      "step": 15709
+    },
+    {
+      "epoch": 0.1571,
+      "grad_norm": 1.3097628322922201,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 15710
+    },
+    {
+      "epoch": 0.15711,
+      "grad_norm": 1.2036284641889141,
+      "learning_rate": 0.003,
+      "loss": 3.9937,
+      "step": 15711
+    },
+    {
+      "epoch": 0.15712,
+      "grad_norm": 1.1727130198403828,
+      "learning_rate": 0.003,
+      "loss": 4.0142,
+      "step": 15712
+    },
+    {
+      "epoch": 0.15713,
+      "grad_norm": 1.3216356222953638,
+      "learning_rate": 0.003,
+      "loss": 3.9929,
+      "step": 15713
+    },
+    {
+      "epoch": 0.15714,
+      "grad_norm": 1.230233431018454,
+      "learning_rate": 0.003,
+      "loss": 4.0019,
+      "step": 15714
+    },
+    {
+      "epoch": 0.15715,
+      "grad_norm": 1.0479851957765023,
+      "learning_rate": 0.003,
+      "loss": 3.9891,
+      "step": 15715
+    },
+    {
+      "epoch": 0.15716,
+      "grad_norm": 1.510663539638075,
+      "learning_rate": 0.003,
+      "loss": 4.0365,
+      "step": 15716
+    },
+    {
+      "epoch": 0.15717,
+      "grad_norm": 1.003824020129898,
+      "learning_rate": 0.003,
+      "loss": 4.0091,
+      "step": 15717
+    },
+    {
+      "epoch": 0.15718,
+      "grad_norm": 1.6092334246727755,
+      "learning_rate": 0.003,
+      "loss": 4.0216,
+      "step": 15718
+    },
+    {
+      "epoch": 0.15719,
+      "grad_norm": 0.9666745117091241,
+      "learning_rate": 0.003,
+      "loss": 4.0126,
+      "step": 15719
+    },
+    {
+      "epoch": 0.1572,
+      "grad_norm": 1.540739592405409,
+      "learning_rate": 0.003,
+      "loss": 4.0259,
+      "step": 15720
+    },
+    {
+      "epoch": 0.15721,
+      "grad_norm": 1.0197508957957866,
+      "learning_rate": 0.003,
+      "loss": 4.0098,
+      "step": 15721
+    },
+    {
+      "epoch": 0.15722,
+      "grad_norm": 1.4017671085031251,
+      "learning_rate": 0.003,
+      "loss": 4.0089,
+      "step": 15722
+    },
+    {
+      "epoch": 0.15723,
+      "grad_norm": 1.410379383302518,
+      "learning_rate": 0.003,
+      "loss": 4.0485,
+      "step": 15723
+    },
+    {
+      "epoch": 0.15724,
+      "grad_norm": 1.0326787094813932,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 15724
+    },
+    {
+      "epoch": 0.15725,
+      "grad_norm": 1.4714017123876677,
+      "learning_rate": 0.003,
+      "loss": 3.9947,
+      "step": 15725
+    },
+    {
+      "epoch": 0.15726,
+      "grad_norm": 1.1761253766406978,
+      "learning_rate": 0.003,
+      "loss": 3.9947,
+      "step": 15726
+    },
+    {
+      "epoch": 0.15727,
+      "grad_norm": 1.1971053477586984,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 15727
+    },
+    {
+      "epoch": 0.15728,
+      "grad_norm": 1.202301682287537,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 15728
+    },
+    {
+      "epoch": 0.15729,
+      "grad_norm": 1.394061714539972,
+      "learning_rate": 0.003,
+      "loss": 4.0295,
+      "step": 15729
+    },
+    {
+      "epoch": 0.1573,
+      "grad_norm": 1.0839884842598562,
+      "learning_rate": 0.003,
+      "loss": 4.0223,
+      "step": 15730
+    },
+    {
+      "epoch": 0.15731,
+      "grad_norm": 1.3454603881216958,
+      "learning_rate": 0.003,
+      "loss": 3.9985,
+      "step": 15731
+    },
+    {
+      "epoch": 0.15732,
+      "grad_norm": 1.1226471976758234,
+      "learning_rate": 0.003,
+      "loss": 3.9918,
+      "step": 15732
+    },
+    {
+      "epoch": 0.15733,
+      "grad_norm": 1.341650590026742,
+      "learning_rate": 0.003,
+      "loss": 4.005,
+      "step": 15733
+    },
+    {
+      "epoch": 0.15734,
+      "grad_norm": 1.333745219797826,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 15734
+    },
+    {
+      "epoch": 0.15735,
+      "grad_norm": 1.3820529306632514,
+      "learning_rate": 0.003,
+      "loss": 4.0301,
+      "step": 15735
+    },
+    {
+      "epoch": 0.15736,
+      "grad_norm": 1.075898810263639,
+      "learning_rate": 0.003,
+      "loss": 4.0184,
+      "step": 15736
+    },
+    {
+      "epoch": 0.15737,
+      "grad_norm": 1.1927507040975849,
+      "learning_rate": 0.003,
+      "loss": 4.0286,
+      "step": 15737
+    },
+    {
+      "epoch": 0.15738,
+      "grad_norm": 1.1560093494963364,
+      "learning_rate": 0.003,
+      "loss": 4.0086,
+      "step": 15738
+    },
+    {
+      "epoch": 0.15739,
+      "grad_norm": 1.2765791949268857,
+      "learning_rate": 0.003,
+      "loss": 4.0098,
+      "step": 15739
+    },
+    {
+      "epoch": 0.1574,
+      "grad_norm": 1.2261031464072705,
+      "learning_rate": 0.003,
+      "loss": 4.0123,
+      "step": 15740
+    },
+    {
+      "epoch": 0.15741,
+      "grad_norm": 1.3856210456393485,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 15741
+    },
+    {
+      "epoch": 0.15742,
+      "grad_norm": 1.2308173545958936,
+      "learning_rate": 0.003,
+      "loss": 4.0061,
+      "step": 15742
+    },
+    {
+      "epoch": 0.15743,
+      "grad_norm": 1.305667574962459,
+      "learning_rate": 0.003,
+      "loss": 4.0036,
+      "step": 15743
+    },
+    {
+      "epoch": 0.15744,
+      "grad_norm": 1.122296256777786,
+      "learning_rate": 0.003,
+      "loss": 3.9904,
+      "step": 15744
+    },
+    {
+      "epoch": 0.15745,
+      "grad_norm": 1.4580327525916033,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 15745
+    },
+    {
+      "epoch": 0.15746,
+      "grad_norm": 1.014207682133263,
+      "learning_rate": 0.003,
+      "loss": 3.9958,
+      "step": 15746
+    },
+    {
+      "epoch": 0.15747,
+      "grad_norm": 1.2128222596163458,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 15747
+    },
+    {
+      "epoch": 0.15748,
+      "grad_norm": 1.618226744308094,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 15748
+    },
+    {
+      "epoch": 0.15749,
+      "grad_norm": 1.0926705269460224,
+      "learning_rate": 0.003,
+      "loss": 4.0096,
+      "step": 15749
+    },
+    {
+      "epoch": 0.1575,
+      "grad_norm": 1.3084121480238136,
+      "learning_rate": 0.003,
+      "loss": 4.0084,
+      "step": 15750
+    },
+    {
+      "epoch": 0.15751,
+      "grad_norm": 1.0881441144059336,
+      "learning_rate": 0.003,
+      "loss": 3.9987,
+      "step": 15751
+    },
+    {
+      "epoch": 0.15752,
+      "grad_norm": 1.3828878959169328,
+      "learning_rate": 0.003,
+      "loss": 4.0331,
+      "step": 15752
+    },
+    {
+      "epoch": 0.15753,
+      "grad_norm": 1.0582213292468492,
+      "learning_rate": 0.003,
+      "loss": 4.0298,
+      "step": 15753
+    },
+    {
+      "epoch": 0.15754,
+      "grad_norm": 1.2243364731940443,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 15754
+    },
+    {
+      "epoch": 0.15755,
+      "grad_norm": 0.9489447170472749,
+      "learning_rate": 0.003,
+      "loss": 3.999,
+      "step": 15755
+    },
+    {
+      "epoch": 0.15756,
+      "grad_norm": 1.3767375660460266,
+      "learning_rate": 0.003,
+      "loss": 4.0463,
+      "step": 15756
+    },
+    {
+      "epoch": 0.15757,
+      "grad_norm": 1.1513102637885906,
+      "learning_rate": 0.003,
+      "loss": 3.9861,
+      "step": 15757
+    },
+    {
+      "epoch": 0.15758,
+      "grad_norm": 1.2342191344271383,
+      "learning_rate": 0.003,
+      "loss": 4.0298,
+      "step": 15758
+    },
+    {
+      "epoch": 0.15759,
+      "grad_norm": 1.3732495854092959,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 15759
+    },
+    {
+      "epoch": 0.1576,
+      "grad_norm": 1.1450681370573774,
+      "learning_rate": 0.003,
+      "loss": 3.9887,
+      "step": 15760
+    },
+    {
+      "epoch": 0.15761,
+      "grad_norm": 1.1808603286300208,
+      "learning_rate": 0.003,
+      "loss": 3.9715,
+      "step": 15761
+    },
+    {
+      "epoch": 0.15762,
+      "grad_norm": 1.3347176288852123,
+      "learning_rate": 0.003,
+      "loss": 4.0063,
+      "step": 15762
+    },
+    {
+      "epoch": 0.15763,
+      "grad_norm": 1.1791546013231482,
+      "learning_rate": 0.003,
+      "loss": 4.0112,
+      "step": 15763
+    },
+    {
+      "epoch": 0.15764,
+      "grad_norm": 1.4660199504829574,
+      "learning_rate": 0.003,
+      "loss": 3.9816,
+      "step": 15764
+    },
+    {
+      "epoch": 0.15765,
+      "grad_norm": 1.1500449240770518,
+      "learning_rate": 0.003,
+      "loss": 4.0198,
+      "step": 15765
+    },
+    {
+      "epoch": 0.15766,
+      "grad_norm": 1.3741568751174897,
+      "learning_rate": 0.003,
+      "loss": 4.0121,
+      "step": 15766
+    },
+    {
+      "epoch": 0.15767,
+      "grad_norm": 1.1140070005830738,
+      "learning_rate": 0.003,
+      "loss": 4.0343,
+      "step": 15767
+    },
+    {
+      "epoch": 0.15768,
+      "grad_norm": 1.257002863470519,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 15768
+    },
+    {
+      "epoch": 0.15769,
+      "grad_norm": 1.2053429049376951,
+      "learning_rate": 0.003,
+      "loss": 4.0284,
+      "step": 15769
+    },
+    {
+      "epoch": 0.1577,
+      "grad_norm": 1.240049321747172,
+      "learning_rate": 0.003,
+      "loss": 4.0173,
+      "step": 15770
+    },
+    {
+      "epoch": 0.15771,
+      "grad_norm": 1.1955814457159923,
+      "learning_rate": 0.003,
+      "loss": 4.0149,
+      "step": 15771
+    },
+    {
+      "epoch": 0.15772,
+      "grad_norm": 1.1340920104271954,
+      "learning_rate": 0.003,
+      "loss": 4.0424,
+      "step": 15772
+    },
+    {
+      "epoch": 0.15773,
+      "grad_norm": 1.430938579515546,
+      "learning_rate": 0.003,
+      "loss": 4.0273,
+      "step": 15773
+    },
+    {
+      "epoch": 0.15774,
+      "grad_norm": 1.1187858894078215,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 15774
+    },
+    {
+      "epoch": 0.15775,
+      "grad_norm": 1.6429846620387065,
+      "learning_rate": 0.003,
+      "loss": 4.0002,
+      "step": 15775
+    },
+    {
+      "epoch": 0.15776,
+      "grad_norm": 0.9314203897015222,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 15776
+    },
+    {
+      "epoch": 0.15777,
+      "grad_norm": 1.3991092016747086,
+      "learning_rate": 0.003,
+      "loss": 4.0068,
+      "step": 15777
+    },
+    {
+      "epoch": 0.15778,
+      "grad_norm": 1.2251718959934537,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 15778
+    },
+    {
+      "epoch": 0.15779,
+      "grad_norm": 1.3636857510201814,
+      "learning_rate": 0.003,
+      "loss": 4.0059,
+      "step": 15779
+    },
+    {
+      "epoch": 0.1578,
+      "grad_norm": 1.185420826916895,
+      "learning_rate": 0.003,
+      "loss": 4.0339,
+      "step": 15780
+    },
+    {
+      "epoch": 0.15781,
+      "grad_norm": 1.1484718610891285,
+      "learning_rate": 0.003,
+      "loss": 3.9543,
+      "step": 15781
+    },
+    {
+      "epoch": 0.15782,
+      "grad_norm": 1.41598546417389,
+      "learning_rate": 0.003,
+      "loss": 4.0378,
+      "step": 15782
+    },
+    {
+      "epoch": 0.15783,
+      "grad_norm": 1.138196369639764,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 15783
+    },
+    {
+      "epoch": 0.15784,
+      "grad_norm": 1.2565530652564345,
+      "learning_rate": 0.003,
+      "loss": 4.0036,
+      "step": 15784
+    },
+    {
+      "epoch": 0.15785,
+      "grad_norm": 1.1563218182105854,
+      "learning_rate": 0.003,
+      "loss": 4.0241,
+      "step": 15785
+    },
+    {
+      "epoch": 0.15786,
+      "grad_norm": 1.513960434660906,
+      "learning_rate": 0.003,
+      "loss": 4.0002,
+      "step": 15786
+    },
+    {
+      "epoch": 0.15787,
+      "grad_norm": 1.1167315182601023,
+      "learning_rate": 0.003,
+      "loss": 4.0246,
+      "step": 15787
+    },
+    {
+      "epoch": 0.15788,
+      "grad_norm": 1.3727606902010836,
+      "learning_rate": 0.003,
+      "loss": 4.0374,
+      "step": 15788
+    },
+    {
+      "epoch": 0.15789,
+      "grad_norm": 1.205202750661887,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 15789
+    },
+    {
+      "epoch": 0.1579,
+      "grad_norm": 1.2550885048186762,
+      "learning_rate": 0.003,
+      "loss": 4.0141,
+      "step": 15790
+    },
+    {
+      "epoch": 0.15791,
+      "grad_norm": 1.0700136621537168,
+      "learning_rate": 0.003,
+      "loss": 4.0259,
+      "step": 15791
+    },
+    {
+      "epoch": 0.15792,
+      "grad_norm": 1.3657975058002139,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 15792
+    },
+    {
+      "epoch": 0.15793,
+      "grad_norm": 1.3055020328555231,
+      "learning_rate": 0.003,
+      "loss": 4.0486,
+      "step": 15793
+    },
+    {
+      "epoch": 0.15794,
+      "grad_norm": 1.1440251825531926,
+      "learning_rate": 0.003,
+      "loss": 4.0266,
+      "step": 15794
+    },
+    {
+      "epoch": 0.15795,
+      "grad_norm": 1.612311812811049,
+      "learning_rate": 0.003,
+      "loss": 4.0344,
+      "step": 15795
+    },
+    {
+      "epoch": 0.15796,
+      "grad_norm": 0.9678674348930872,
+      "learning_rate": 0.003,
+      "loss": 4.0186,
+      "step": 15796
+    },
+    {
+      "epoch": 0.15797,
+      "grad_norm": 1.3041001041416083,
+      "learning_rate": 0.003,
+      "loss": 3.9825,
+      "step": 15797
+    },
+    {
+      "epoch": 0.15798,
+      "grad_norm": 1.2287015904010035,
+      "learning_rate": 0.003,
+      "loss": 4.035,
+      "step": 15798
+    },
+    {
+      "epoch": 0.15799,
+      "grad_norm": 1.3510588695396477,
+      "learning_rate": 0.003,
+      "loss": 3.9969,
+      "step": 15799
+    },
+    {
+      "epoch": 0.158,
+      "grad_norm": 1.2788150573835075,
+      "learning_rate": 0.003,
+      "loss": 3.9992,
+      "step": 15800
+    },
+    {
+      "epoch": 0.15801,
+      "grad_norm": 1.0967642729913403,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 15801
+    },
+    {
+      "epoch": 0.15802,
+      "grad_norm": 1.4235868887732133,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 15802
+    },
+    {
+      "epoch": 0.15803,
+      "grad_norm": 1.0357255025069987,
+      "learning_rate": 0.003,
+      "loss": 3.9866,
+      "step": 15803
+    },
+    {
+      "epoch": 0.15804,
+      "grad_norm": 1.3479991991782512,
+      "learning_rate": 0.003,
+      "loss": 4.0107,
+      "step": 15804
+    },
+    {
+      "epoch": 0.15805,
+      "grad_norm": 1.2020102114084525,
+      "learning_rate": 0.003,
+      "loss": 4.0351,
+      "step": 15805
+    },
+    {
+      "epoch": 0.15806,
+      "grad_norm": 1.2822871872220123,
+      "learning_rate": 0.003,
+      "loss": 4.0309,
+      "step": 15806
+    },
+    {
+      "epoch": 0.15807,
+      "grad_norm": 1.1520001903352206,
+      "learning_rate": 0.003,
+      "loss": 4.0266,
+      "step": 15807
+    },
+    {
+      "epoch": 0.15808,
+      "grad_norm": 1.0943066946968263,
+      "learning_rate": 0.003,
+      "loss": 3.9917,
+      "step": 15808
+    },
+    {
+      "epoch": 0.15809,
+      "grad_norm": 1.301562984316627,
+      "learning_rate": 0.003,
+      "loss": 4.012,
+      "step": 15809
+    },
+    {
+      "epoch": 0.1581,
+      "grad_norm": 1.2776885758676504,
+      "learning_rate": 0.003,
+      "loss": 4.0141,
+      "step": 15810
+    },
+    {
+      "epoch": 0.15811,
+      "grad_norm": 1.2606813015228042,
+      "learning_rate": 0.003,
+      "loss": 4.0287,
+      "step": 15811
+    },
+    {
+      "epoch": 0.15812,
+      "grad_norm": 1.1857629716473423,
+      "learning_rate": 0.003,
+      "loss": 4.0027,
+      "step": 15812
+    },
+    {
+      "epoch": 0.15813,
+      "grad_norm": 1.1636799706327965,
+      "learning_rate": 0.003,
+      "loss": 4.0167,
+      "step": 15813
+    },
+    {
+      "epoch": 0.15814,
+      "grad_norm": 1.0337612537657892,
+      "learning_rate": 0.003,
+      "loss": 4.0162,
+      "step": 15814
+    },
+    {
+      "epoch": 0.15815,
+      "grad_norm": 1.381683836345086,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 15815
+    },
+    {
+      "epoch": 0.15816,
+      "grad_norm": 1.1577125621268765,
+      "learning_rate": 0.003,
+      "loss": 4.0229,
+      "step": 15816
+    },
+    {
+      "epoch": 0.15817,
+      "grad_norm": 1.5654365347116856,
+      "learning_rate": 0.003,
+      "loss": 4.0051,
+      "step": 15817
+    },
+    {
+      "epoch": 0.15818,
+      "grad_norm": 0.9782645089506308,
+      "learning_rate": 0.003,
+      "loss": 4.0011,
+      "step": 15818
+    },
+    {
+      "epoch": 0.15819,
+      "grad_norm": 1.2096831177291703,
+      "learning_rate": 0.003,
+      "loss": 3.9754,
+      "step": 15819
+    },
+    {
+      "epoch": 0.1582,
+      "grad_norm": 1.1564805628697856,
+      "learning_rate": 0.003,
+      "loss": 4.0518,
+      "step": 15820
+    },
+    {
+      "epoch": 0.15821,
+      "grad_norm": 1.263751306335342,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 15821
+    },
+    {
+      "epoch": 0.15822,
+      "grad_norm": 1.2472942820956714,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 15822
+    },
+    {
+      "epoch": 0.15823,
+      "grad_norm": 1.228267008633194,
+      "learning_rate": 0.003,
+      "loss": 4.0018,
+      "step": 15823
+    },
+    {
+      "epoch": 0.15824,
+      "grad_norm": 1.4449510191156791,
+      "learning_rate": 0.003,
+      "loss": 4.0312,
+      "step": 15824
+    },
+    {
+      "epoch": 0.15825,
+      "grad_norm": 1.1035898511648348,
+      "learning_rate": 0.003,
+      "loss": 4.026,
+      "step": 15825
+    },
+    {
+      "epoch": 0.15826,
+      "grad_norm": 1.5642574174563366,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 15826
+    },
+    {
+      "epoch": 0.15827,
+      "grad_norm": 0.9767703214885841,
+      "learning_rate": 0.003,
+      "loss": 3.9882,
+      "step": 15827
+    },
+    {
+      "epoch": 0.15828,
+      "grad_norm": 1.4108549284549041,
+      "learning_rate": 0.003,
+      "loss": 3.9989,
+      "step": 15828
+    },
+    {
+      "epoch": 0.15829,
+      "grad_norm": 1.377700822184997,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 15829
+    },
+    {
+      "epoch": 0.1583,
+      "grad_norm": 1.4692157047059422,
+      "learning_rate": 0.003,
+      "loss": 4.0316,
+      "step": 15830
+    },
+    {
+      "epoch": 0.15831,
+      "grad_norm": 1.0706932402820692,
+      "learning_rate": 0.003,
+      "loss": 4.0069,
+      "step": 15831
+    },
+    {
+      "epoch": 0.15832,
+      "grad_norm": 1.0949706715343381,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 15832
+    },
+    {
+      "epoch": 0.15833,
+      "grad_norm": 1.3196349499006816,
+      "learning_rate": 0.003,
+      "loss": 3.9927,
+      "step": 15833
+    },
+    {
+      "epoch": 0.15834,
+      "grad_norm": 1.0917848610990246,
+      "learning_rate": 0.003,
+      "loss": 4.0184,
+      "step": 15834
+    },
+    {
+      "epoch": 0.15835,
+      "grad_norm": 1.2928050134155717,
+      "learning_rate": 0.003,
+      "loss": 4.0086,
+      "step": 15835
+    },
+    {
+      "epoch": 0.15836,
+      "grad_norm": 1.1200830203602705,
+      "learning_rate": 0.003,
+      "loss": 4.0069,
+      "step": 15836
+    },
+    {
+      "epoch": 0.15837,
+      "grad_norm": 1.1344965900511697,
+      "learning_rate": 0.003,
+      "loss": 4.0064,
+      "step": 15837
+    },
+    {
+      "epoch": 0.15838,
+      "grad_norm": 1.3132272233496367,
+      "learning_rate": 0.003,
+      "loss": 4.0186,
+      "step": 15838
+    },
+    {
+      "epoch": 0.15839,
+      "grad_norm": 1.0778549547619092,
+      "learning_rate": 0.003,
+      "loss": 3.9956,
+      "step": 15839
+    },
+    {
+      "epoch": 0.1584,
+      "grad_norm": 1.6529929221658228,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 15840
+    },
+    {
+      "epoch": 0.15841,
+      "grad_norm": 1.0816388554578351,
+      "learning_rate": 0.003,
+      "loss": 4.0272,
+      "step": 15841
+    },
+    {
+      "epoch": 0.15842,
+      "grad_norm": 1.4517008573751615,
+      "learning_rate": 0.003,
+      "loss": 4.0209,
+      "step": 15842
+    },
+    {
+      "epoch": 0.15843,
+      "grad_norm": 1.171938801187063,
+      "learning_rate": 0.003,
+      "loss": 4.0255,
+      "step": 15843
+    },
+    {
+      "epoch": 0.15844,
+      "grad_norm": 1.3534559256239043,
+      "learning_rate": 0.003,
+      "loss": 4.0247,
+      "step": 15844
+    },
+    {
+      "epoch": 0.15845,
+      "grad_norm": 1.1834282268056617,
+      "learning_rate": 0.003,
+      "loss": 4.0438,
+      "step": 15845
+    },
+    {
+      "epoch": 0.15846,
+      "grad_norm": 1.305954416737127,
+      "learning_rate": 0.003,
+      "loss": 4.0211,
+      "step": 15846
+    },
+    {
+      "epoch": 0.15847,
+      "grad_norm": 1.0950390047460483,
+      "learning_rate": 0.003,
+      "loss": 4.0056,
+      "step": 15847
+    },
+    {
+      "epoch": 0.15848,
+      "grad_norm": 1.3653974193756213,
+      "learning_rate": 0.003,
+      "loss": 4.0036,
+      "step": 15848
+    },
+    {
+      "epoch": 0.15849,
+      "grad_norm": 1.1928352001024034,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 15849
+    },
+    {
+      "epoch": 0.1585,
+      "grad_norm": 1.3276197716502633,
+      "learning_rate": 0.003,
+      "loss": 4.0461,
+      "step": 15850
+    },
+    {
+      "epoch": 0.15851,
+      "grad_norm": 1.2364669875438297,
+      "learning_rate": 0.003,
+      "loss": 4.0134,
+      "step": 15851
+    },
+    {
+      "epoch": 0.15852,
+      "grad_norm": 1.257690636110746,
+      "learning_rate": 0.003,
+      "loss": 3.991,
+      "step": 15852
+    },
+    {
+      "epoch": 0.15853,
+      "grad_norm": 1.214967178374091,
+      "learning_rate": 0.003,
+      "loss": 4.002,
+      "step": 15853
+    },
+    {
+      "epoch": 0.15854,
+      "grad_norm": 1.4023811054543367,
+      "learning_rate": 0.003,
+      "loss": 3.996,
+      "step": 15854
+    },
+    {
+      "epoch": 0.15855,
+      "grad_norm": 1.1323216534833997,
+      "learning_rate": 0.003,
+      "loss": 4.0122,
+      "step": 15855
+    },
+    {
+      "epoch": 0.15856,
+      "grad_norm": 1.2568759760812227,
+      "learning_rate": 0.003,
+      "loss": 4.0381,
+      "step": 15856
+    },
+    {
+      "epoch": 0.15857,
+      "grad_norm": 1.0837214166550875,
+      "learning_rate": 0.003,
+      "loss": 4.0107,
+      "step": 15857
+    },
+    {
+      "epoch": 0.15858,
+      "grad_norm": 1.136985798690449,
+      "learning_rate": 0.003,
+      "loss": 4.0296,
+      "step": 15858
+    },
+    {
+      "epoch": 0.15859,
+      "grad_norm": 1.1450669993849945,
+      "learning_rate": 0.003,
+      "loss": 4.033,
+      "step": 15859
+    },
+    {
+      "epoch": 0.1586,
+      "grad_norm": 1.347547922002838,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 15860
+    },
+    {
+      "epoch": 0.15861,
+      "grad_norm": 1.0419008819958953,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 15861
+    },
+    {
+      "epoch": 0.15862,
+      "grad_norm": 1.413079565207084,
+      "learning_rate": 0.003,
+      "loss": 4.0427,
+      "step": 15862
+    },
+    {
+      "epoch": 0.15863,
+      "grad_norm": 1.2743728315364327,
+      "learning_rate": 0.003,
+      "loss": 3.987,
+      "step": 15863
+    },
+    {
+      "epoch": 0.15864,
+      "grad_norm": 1.3169789757405919,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 15864
+    },
+    {
+      "epoch": 0.15865,
+      "grad_norm": 1.1232260543531478,
+      "learning_rate": 0.003,
+      "loss": 4.0008,
+      "step": 15865
+    },
+    {
+      "epoch": 0.15866,
+      "grad_norm": 1.4253620354108139,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 15866
+    },
+    {
+      "epoch": 0.15867,
+      "grad_norm": 1.3233768560673893,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 15867
+    },
+    {
+      "epoch": 0.15868,
+      "grad_norm": 1.3844891114669013,
+      "learning_rate": 0.003,
+      "loss": 3.987,
+      "step": 15868
+    },
+    {
+      "epoch": 0.15869,
+      "grad_norm": 1.063792568617661,
+      "learning_rate": 0.003,
+      "loss": 3.9828,
+      "step": 15869
+    },
+    {
+      "epoch": 0.1587,
+      "grad_norm": 1.3356734637445744,
+      "learning_rate": 0.003,
+      "loss": 3.9919,
+      "step": 15870
+    },
+    {
+      "epoch": 0.15871,
+      "grad_norm": 1.269230312806111,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 15871
+    },
+    {
+      "epoch": 0.15872,
+      "grad_norm": 1.5384143518783961,
+      "learning_rate": 0.003,
+      "loss": 4.032,
+      "step": 15872
+    },
+    {
+      "epoch": 0.15873,
+      "grad_norm": 1.107342231744455,
+      "learning_rate": 0.003,
+      "loss": 4.0155,
+      "step": 15873
+    },
+    {
+      "epoch": 0.15874,
+      "grad_norm": 1.1973264901705576,
+      "learning_rate": 0.003,
+      "loss": 3.9895,
+      "step": 15874
+    },
+    {
+      "epoch": 0.15875,
+      "grad_norm": 1.2196899595513655,
+      "learning_rate": 0.003,
+      "loss": 3.9889,
+      "step": 15875
+    },
+    {
+      "epoch": 0.15876,
+      "grad_norm": 1.3007578819437007,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 15876
+    },
+    {
+      "epoch": 0.15877,
+      "grad_norm": 1.119209826200725,
+      "learning_rate": 0.003,
+      "loss": 4.0302,
+      "step": 15877
+    },
+    {
+      "epoch": 0.15878,
+      "grad_norm": 1.4334426482819758,
+      "learning_rate": 0.003,
+      "loss": 4.0019,
+      "step": 15878
+    },
+    {
+      "epoch": 0.15879,
+      "grad_norm": 1.067441178690247,
+      "learning_rate": 0.003,
+      "loss": 4.0188,
+      "step": 15879
+    },
+    {
+      "epoch": 0.1588,
+      "grad_norm": 1.3339181131879854,
+      "learning_rate": 0.003,
+      "loss": 4.0397,
+      "step": 15880
+    },
+    {
+      "epoch": 0.15881,
+      "grad_norm": 1.073006918871515,
+      "learning_rate": 0.003,
+      "loss": 3.9987,
+      "step": 15881
+    },
+    {
+      "epoch": 0.15882,
+      "grad_norm": 1.1485741696188854,
+      "learning_rate": 0.003,
+      "loss": 3.9829,
+      "step": 15882
+    },
+    {
+      "epoch": 0.15883,
+      "grad_norm": 1.2806079384299174,
+      "learning_rate": 0.003,
+      "loss": 3.997,
+      "step": 15883
+    },
+    {
+      "epoch": 0.15884,
+      "grad_norm": 1.230772825912422,
+      "learning_rate": 0.003,
+      "loss": 4.0246,
+      "step": 15884
+    },
+    {
+      "epoch": 0.15885,
+      "grad_norm": 1.3410068056507027,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 15885
+    },
+    {
+      "epoch": 0.15886,
+      "grad_norm": 1.1937651506835003,
+      "learning_rate": 0.003,
+      "loss": 4.0267,
+      "step": 15886
+    },
+    {
+      "epoch": 0.15887,
+      "grad_norm": 1.3281232831561387,
+      "learning_rate": 0.003,
+      "loss": 4.0371,
+      "step": 15887
+    },
+    {
+      "epoch": 0.15888,
+      "grad_norm": 1.3357080580184066,
+      "learning_rate": 0.003,
+      "loss": 4.0069,
+      "step": 15888
+    },
+    {
+      "epoch": 0.15889,
+      "grad_norm": 1.450221699192516,
+      "learning_rate": 0.003,
+      "loss": 4.0413,
+      "step": 15889
+    },
+    {
+      "epoch": 0.1589,
+      "grad_norm": 1.2063283535601301,
+      "learning_rate": 0.003,
+      "loss": 4.0407,
+      "step": 15890
+    },
+    {
+      "epoch": 0.15891,
+      "grad_norm": 1.422104119354722,
+      "learning_rate": 0.003,
+      "loss": 4.0464,
+      "step": 15891
+    },
+    {
+      "epoch": 0.15892,
+      "grad_norm": 1.1148680025652393,
+      "learning_rate": 0.003,
+      "loss": 4.001,
+      "step": 15892
+    },
+    {
+      "epoch": 0.15893,
+      "grad_norm": 1.2580714382446376,
+      "learning_rate": 0.003,
+      "loss": 4.0019,
+      "step": 15893
+    },
+    {
+      "epoch": 0.15894,
+      "grad_norm": 1.1380887118267102,
+      "learning_rate": 0.003,
+      "loss": 3.9995,
+      "step": 15894
+    },
+    {
+      "epoch": 0.15895,
+      "grad_norm": 1.4597095195310927,
+      "learning_rate": 0.003,
+      "loss": 4.0246,
+      "step": 15895
+    },
+    {
+      "epoch": 0.15896,
+      "grad_norm": 1.126224181908572,
+      "learning_rate": 0.003,
+      "loss": 3.9887,
+      "step": 15896
+    },
+    {
+      "epoch": 0.15897,
+      "grad_norm": 1.2223812256140263,
+      "learning_rate": 0.003,
+      "loss": 3.9775,
+      "step": 15897
+    },
+    {
+      "epoch": 0.15898,
+      "grad_norm": 1.270821791198787,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 15898
+    },
+    {
+      "epoch": 0.15899,
+      "grad_norm": 1.5603404791555529,
+      "learning_rate": 0.003,
+      "loss": 3.9812,
+      "step": 15899
+    },
+    {
+      "epoch": 0.159,
+      "grad_norm": 1.3936087745617098,
+      "learning_rate": 0.003,
+      "loss": 3.9851,
+      "step": 15900
+    },
+    {
+      "epoch": 0.15901,
+      "grad_norm": 1.0934864565818274,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 15901
+    },
+    {
+      "epoch": 0.15902,
+      "grad_norm": 1.1888037236970748,
+      "learning_rate": 0.003,
+      "loss": 4.0504,
+      "step": 15902
+    },
+    {
+      "epoch": 0.15903,
+      "grad_norm": 1.2600477409961077,
+      "learning_rate": 0.003,
+      "loss": 4.0154,
+      "step": 15903
+    },
+    {
+      "epoch": 0.15904,
+      "grad_norm": 1.0810865586160259,
+      "learning_rate": 0.003,
+      "loss": 4.0169,
+      "step": 15904
+    },
+    {
+      "epoch": 0.15905,
+      "grad_norm": 1.3802911177096677,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 15905
+    },
+    {
+      "epoch": 0.15906,
+      "grad_norm": 1.1233192397248655,
+      "learning_rate": 0.003,
+      "loss": 4.0313,
+      "step": 15906
+    },
+    {
+      "epoch": 0.15907,
+      "grad_norm": 1.3272323602114968,
+      "learning_rate": 0.003,
+      "loss": 4.0194,
+      "step": 15907
+    },
+    {
+      "epoch": 0.15908,
+      "grad_norm": 1.0506256131069907,
+      "learning_rate": 0.003,
+      "loss": 4.0046,
+      "step": 15908
+    },
+    {
+      "epoch": 0.15909,
+      "grad_norm": 1.2115197839476954,
+      "learning_rate": 0.003,
+      "loss": 4.0224,
+      "step": 15909
+    },
+    {
+      "epoch": 0.1591,
+      "grad_norm": 1.2868681085818645,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 15910
+    },
+    {
+      "epoch": 0.15911,
+      "grad_norm": 1.0897951917112798,
+      "learning_rate": 0.003,
+      "loss": 4.0196,
+      "step": 15911
+    },
+    {
+      "epoch": 0.15912,
+      "grad_norm": 1.3936606217052407,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 15912
+    },
+    {
+      "epoch": 0.15913,
+      "grad_norm": 1.1805285692118155,
+      "learning_rate": 0.003,
+      "loss": 3.9952,
+      "step": 15913
+    },
+    {
+      "epoch": 0.15914,
+      "grad_norm": 1.5115259210744625,
+      "learning_rate": 0.003,
+      "loss": 4.0485,
+      "step": 15914
+    },
+    {
+      "epoch": 0.15915,
+      "grad_norm": 0.976486704028311,
+      "learning_rate": 0.003,
+      "loss": 3.9916,
+      "step": 15915
+    },
+    {
+      "epoch": 0.15916,
+      "grad_norm": 1.5681460695365494,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 15916
+    },
+    {
+      "epoch": 0.15917,
+      "grad_norm": 1.1470059509439832,
+      "learning_rate": 0.003,
+      "loss": 4.0207,
+      "step": 15917
+    },
+    {
+      "epoch": 0.15918,
+      "grad_norm": 1.3069453710337169,
+      "learning_rate": 0.003,
+      "loss": 4.0246,
+      "step": 15918
+    },
+    {
+      "epoch": 0.15919,
+      "grad_norm": 1.0735072095383267,
+      "learning_rate": 0.003,
+      "loss": 4.0366,
+      "step": 15919
+    },
+    {
+      "epoch": 0.1592,
+      "grad_norm": 1.2391610510497388,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 15920
+    },
+    {
+      "epoch": 0.15921,
+      "grad_norm": 1.0875601848350542,
+      "learning_rate": 0.003,
+      "loss": 4.049,
+      "step": 15921
+    },
+    {
+      "epoch": 0.15922,
+      "grad_norm": 1.4961613661364455,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 15922
+    },
+    {
+      "epoch": 0.15923,
+      "grad_norm": 1.198264842703091,
+      "learning_rate": 0.003,
+      "loss": 4.0204,
+      "step": 15923
+    },
+    {
+      "epoch": 0.15924,
+      "grad_norm": 1.251445635848106,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 15924
+    },
+    {
+      "epoch": 0.15925,
+      "grad_norm": 1.0844267077040648,
+      "learning_rate": 0.003,
+      "loss": 3.9945,
+      "step": 15925
+    },
+    {
+      "epoch": 0.15926,
+      "grad_norm": 1.5594659925155585,
+      "learning_rate": 0.003,
+      "loss": 4.0139,
+      "step": 15926
+    },
+    {
+      "epoch": 0.15927,
+      "grad_norm": 1.0809068769756605,
+      "learning_rate": 0.003,
+      "loss": 3.98,
+      "step": 15927
+    },
+    {
+      "epoch": 0.15928,
+      "grad_norm": 1.4555096692438068,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 15928
+    },
+    {
+      "epoch": 0.15929,
+      "grad_norm": 1.1285997467928173,
+      "learning_rate": 0.003,
+      "loss": 4.0294,
+      "step": 15929
+    },
+    {
+      "epoch": 0.1593,
+      "grad_norm": 1.3324282147057867,
+      "learning_rate": 0.003,
+      "loss": 4.0238,
+      "step": 15930
+    },
+    {
+      "epoch": 0.15931,
+      "grad_norm": 1.2380611666099195,
+      "learning_rate": 0.003,
+      "loss": 4.0186,
+      "step": 15931
+    },
+    {
+      "epoch": 0.15932,
+      "grad_norm": 1.1506683674394433,
+      "learning_rate": 0.003,
+      "loss": 4.0089,
+      "step": 15932
+    },
+    {
+      "epoch": 0.15933,
+      "grad_norm": 1.2834021241100766,
+      "learning_rate": 0.003,
+      "loss": 4.0,
+      "step": 15933
+    },
+    {
+      "epoch": 0.15934,
+      "grad_norm": 1.2501225263853923,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 15934
+    },
+    {
+      "epoch": 0.15935,
+      "grad_norm": 1.2661937164721981,
+      "learning_rate": 0.003,
+      "loss": 4.019,
+      "step": 15935
+    },
+    {
+      "epoch": 0.15936,
+      "grad_norm": 1.2736002273731495,
+      "learning_rate": 0.003,
+      "loss": 4.008,
+      "step": 15936
+    },
+    {
+      "epoch": 0.15937,
+      "grad_norm": 1.064288489544132,
+      "learning_rate": 0.003,
+      "loss": 4.0172,
+      "step": 15937
+    },
+    {
+      "epoch": 0.15938,
+      "grad_norm": 1.2084424600212056,
+      "learning_rate": 0.003,
+      "loss": 4.001,
+      "step": 15938
+    },
+    {
+      "epoch": 0.15939,
+      "grad_norm": 1.3309803366665889,
+      "learning_rate": 0.003,
+      "loss": 4.0128,
+      "step": 15939
+    },
+    {
+      "epoch": 0.1594,
+      "grad_norm": 1.032362097892044,
+      "learning_rate": 0.003,
+      "loss": 3.9937,
+      "step": 15940
+    },
+    {
+      "epoch": 0.15941,
+      "grad_norm": 1.2671311295721919,
+      "learning_rate": 0.003,
+      "loss": 3.9978,
+      "step": 15941
+    },
+    {
+      "epoch": 0.15942,
+      "grad_norm": 1.0980979707465504,
+      "learning_rate": 0.003,
+      "loss": 3.974,
+      "step": 15942
+    },
+    {
+      "epoch": 0.15943,
+      "grad_norm": 1.2788912070867908,
+      "learning_rate": 0.003,
+      "loss": 4.0053,
+      "step": 15943
+    },
+    {
+      "epoch": 0.15944,
+      "grad_norm": 1.3409571028024285,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 15944
+    },
+    {
+      "epoch": 0.15945,
+      "grad_norm": 1.250160402820269,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 15945
+    },
+    {
+      "epoch": 0.15946,
+      "grad_norm": 1.084368530449594,
+      "learning_rate": 0.003,
+      "loss": 3.9995,
+      "step": 15946
+    },
+    {
+      "epoch": 0.15947,
+      "grad_norm": 1.3489970350127387,
+      "learning_rate": 0.003,
+      "loss": 4.0033,
+      "step": 15947
+    },
+    {
+      "epoch": 0.15948,
+      "grad_norm": 1.2994259205767582,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 15948
+    },
+    {
+      "epoch": 0.15949,
+      "grad_norm": 1.2973830305893017,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 15949
+    },
+    {
+      "epoch": 0.1595,
+      "grad_norm": 1.2073403007883003,
+      "learning_rate": 0.003,
+      "loss": 4.0164,
+      "step": 15950
+    },
+    {
+      "epoch": 0.15951,
+      "grad_norm": 1.2077527594878215,
+      "learning_rate": 0.003,
+      "loss": 4.0074,
+      "step": 15951
+    },
+    {
+      "epoch": 0.15952,
+      "grad_norm": 1.1281748449482183,
+      "learning_rate": 0.003,
+      "loss": 4.0081,
+      "step": 15952
+    },
+    {
+      "epoch": 0.15953,
+      "grad_norm": 1.2386195576290036,
+      "learning_rate": 0.003,
+      "loss": 4.0282,
+      "step": 15953
+    },
+    {
+      "epoch": 0.15954,
+      "grad_norm": 1.3117440639263585,
+      "learning_rate": 0.003,
+      "loss": 4.0442,
+      "step": 15954
+    },
+    {
+      "epoch": 0.15955,
+      "grad_norm": 1.1144295795884482,
+      "learning_rate": 0.003,
+      "loss": 4.0002,
+      "step": 15955
+    },
+    {
+      "epoch": 0.15956,
+      "grad_norm": 1.3513662104221202,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 15956
+    },
+    {
+      "epoch": 0.15957,
+      "grad_norm": 1.1644768303251782,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 15957
+    },
+    {
+      "epoch": 0.15958,
+      "grad_norm": 1.5178654834472558,
+      "learning_rate": 0.003,
+      "loss": 4.0089,
+      "step": 15958
+    },
+    {
+      "epoch": 0.15959,
+      "grad_norm": 1.009003208684371,
+      "learning_rate": 0.003,
+      "loss": 4.0218,
+      "step": 15959
+    },
+    {
+      "epoch": 0.1596,
+      "grad_norm": 1.747584605838126,
+      "learning_rate": 0.003,
+      "loss": 4.0249,
+      "step": 15960
+    },
+    {
+      "epoch": 0.15961,
+      "grad_norm": 1.2904165960056244,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 15961
+    },
+    {
+      "epoch": 0.15962,
+      "grad_norm": 1.2051900969929517,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 15962
+    },
+    {
+      "epoch": 0.15963,
+      "grad_norm": 1.3437324584870094,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 15963
+    },
+    {
+      "epoch": 0.15964,
+      "grad_norm": 1.0610913888424898,
+      "learning_rate": 0.003,
+      "loss": 3.9899,
+      "step": 15964
+    },
+    {
+      "epoch": 0.15965,
+      "grad_norm": 1.1370997544613122,
+      "learning_rate": 0.003,
+      "loss": 4.024,
+      "step": 15965
+    },
+    {
+      "epoch": 0.15966,
+      "grad_norm": 1.311349509904151,
+      "learning_rate": 0.003,
+      "loss": 3.9967,
+      "step": 15966
+    },
+    {
+      "epoch": 0.15967,
+      "grad_norm": 1.2046874971861148,
+      "learning_rate": 0.003,
+      "loss": 4.012,
+      "step": 15967
+    },
+    {
+      "epoch": 0.15968,
+      "grad_norm": 1.32732408381076,
+      "learning_rate": 0.003,
+      "loss": 4.02,
+      "step": 15968
+    },
+    {
+      "epoch": 0.15969,
+      "grad_norm": 1.0236651951170457,
+      "learning_rate": 0.003,
+      "loss": 4.0025,
+      "step": 15969
+    },
+    {
+      "epoch": 0.1597,
+      "grad_norm": 1.2998798579086002,
+      "learning_rate": 0.003,
+      "loss": 4.0353,
+      "step": 15970
+    },
+    {
+      "epoch": 0.15971,
+      "grad_norm": 1.059402665772283,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 15971
+    },
+    {
+      "epoch": 0.15972,
+      "grad_norm": 1.2886639535877384,
+      "learning_rate": 0.003,
+      "loss": 4.0415,
+      "step": 15972
+    },
+    {
+      "epoch": 0.15973,
+      "grad_norm": 1.4074638650887707,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 15973
+    },
+    {
+      "epoch": 0.15974,
+      "grad_norm": 1.034321743162951,
+      "learning_rate": 0.003,
+      "loss": 4.0298,
+      "step": 15974
+    },
+    {
+      "epoch": 0.15975,
+      "grad_norm": 1.5559879296046673,
+      "learning_rate": 0.003,
+      "loss": 4.0236,
+      "step": 15975
+    },
+    {
+      "epoch": 0.15976,
+      "grad_norm": 0.9732904162293462,
+      "learning_rate": 0.003,
+      "loss": 4.0123,
+      "step": 15976
+    },
+    {
+      "epoch": 0.15977,
+      "grad_norm": 1.2254248219761825,
+      "learning_rate": 0.003,
+      "loss": 4.0204,
+      "step": 15977
+    },
+    {
+      "epoch": 0.15978,
+      "grad_norm": 1.1405023127368015,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 15978
+    },
+    {
+      "epoch": 0.15979,
+      "grad_norm": 1.3818040951816377,
+      "learning_rate": 0.003,
+      "loss": 3.9875,
+      "step": 15979
+    },
+    {
+      "epoch": 0.1598,
+      "grad_norm": 1.1749784241455272,
+      "learning_rate": 0.003,
+      "loss": 4.0058,
+      "step": 15980
+    },
+    {
+      "epoch": 0.15981,
+      "grad_norm": 1.4577035816327795,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 15981
+    },
+    {
+      "epoch": 0.15982,
+      "grad_norm": 1.23743809212499,
+      "learning_rate": 0.003,
+      "loss": 4.0403,
+      "step": 15982
+    },
+    {
+      "epoch": 0.15983,
+      "grad_norm": 1.3583355235631573,
+      "learning_rate": 0.003,
+      "loss": 3.9957,
+      "step": 15983
+    },
+    {
+      "epoch": 0.15984,
+      "grad_norm": 1.1240289417059142,
+      "learning_rate": 0.003,
+      "loss": 3.9951,
+      "step": 15984
+    },
+    {
+      "epoch": 0.15985,
+      "grad_norm": 1.4041849938566662,
+      "learning_rate": 0.003,
+      "loss": 4.0299,
+      "step": 15985
+    },
+    {
+      "epoch": 0.15986,
+      "grad_norm": 1.3684400747095355,
+      "learning_rate": 0.003,
+      "loss": 3.9993,
+      "step": 15986
+    },
+    {
+      "epoch": 0.15987,
+      "grad_norm": 1.210295400952199,
+      "learning_rate": 0.003,
+      "loss": 4.0112,
+      "step": 15987
+    },
+    {
+      "epoch": 0.15988,
+      "grad_norm": 1.557409213131596,
+      "learning_rate": 0.003,
+      "loss": 4.0336,
+      "step": 15988
+    },
+    {
+      "epoch": 0.15989,
+      "grad_norm": 1.2080860727995004,
+      "learning_rate": 0.003,
+      "loss": 4.0072,
+      "step": 15989
+    },
+    {
+      "epoch": 0.1599,
+      "grad_norm": 1.12769210010326,
+      "learning_rate": 0.003,
+      "loss": 3.9954,
+      "step": 15990
+    },
+    {
+      "epoch": 0.15991,
+      "grad_norm": 1.3193033992992307,
+      "learning_rate": 0.003,
+      "loss": 4.0363,
+      "step": 15991
+    },
+    {
+      "epoch": 0.15992,
+      "grad_norm": 1.1530373535937724,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 15992
+    },
+    {
+      "epoch": 0.15993,
+      "grad_norm": 1.3786125097767274,
+      "learning_rate": 0.003,
+      "loss": 4.0246,
+      "step": 15993
+    },
+    {
+      "epoch": 0.15994,
+      "grad_norm": 1.01651306819352,
+      "learning_rate": 0.003,
+      "loss": 4.0132,
+      "step": 15994
+    },
+    {
+      "epoch": 0.15995,
+      "grad_norm": 1.4312493173148968,
+      "learning_rate": 0.003,
+      "loss": 4.0057,
+      "step": 15995
+    },
+    {
+      "epoch": 0.15996,
+      "grad_norm": 0.9999668997559916,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 15996
+    },
+    {
+      "epoch": 0.15997,
+      "grad_norm": 1.3035104354159652,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 15997
+    },
+    {
+      "epoch": 0.15998,
+      "grad_norm": 1.100398140602815,
+      "learning_rate": 0.003,
+      "loss": 4.0089,
+      "step": 15998
+    },
+    {
+      "epoch": 0.15999,
+      "grad_norm": 1.3803552999940325,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 15999
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 1.1605678043519492,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 16000
+    },
+    {
+      "epoch": 0.16001,
+      "grad_norm": 1.2404721333235633,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 16001
+    },
+    {
+      "epoch": 0.16002,
+      "grad_norm": 1.2338390822554808,
+      "learning_rate": 0.003,
+      "loss": 4.007,
+      "step": 16002
+    },
+    {
+      "epoch": 0.16003,
+      "grad_norm": 1.1938666541910892,
+      "learning_rate": 0.003,
+      "loss": 4.0424,
+      "step": 16003
+    },
+    {
+      "epoch": 0.16004,
+      "grad_norm": 1.2229940449205243,
+      "learning_rate": 0.003,
+      "loss": 4.0047,
+      "step": 16004
+    },
+    {
+      "epoch": 0.16005,
+      "grad_norm": 1.5471699933630838,
+      "learning_rate": 0.003,
+      "loss": 4.0056,
+      "step": 16005
+    },
+    {
+      "epoch": 0.16006,
+      "grad_norm": 1.1594371365697964,
+      "learning_rate": 0.003,
+      "loss": 3.9945,
+      "step": 16006
+    },
+    {
+      "epoch": 0.16007,
+      "grad_norm": 1.5214616998953365,
+      "learning_rate": 0.003,
+      "loss": 4.0073,
+      "step": 16007
+    },
+    {
+      "epoch": 0.16008,
+      "grad_norm": 1.134113177523023,
+      "learning_rate": 0.003,
+      "loss": 4.0339,
+      "step": 16008
+    },
+    {
+      "epoch": 0.16009,
+      "grad_norm": 1.2947310732024733,
+      "learning_rate": 0.003,
+      "loss": 4.0003,
+      "step": 16009
+    },
+    {
+      "epoch": 0.1601,
+      "grad_norm": 1.3787130295348902,
+      "learning_rate": 0.003,
+      "loss": 3.9991,
+      "step": 16010
+    },
+    {
+      "epoch": 0.16011,
+      "grad_norm": 1.2233450040254008,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 16011
+    },
+    {
+      "epoch": 0.16012,
+      "grad_norm": 1.188757317575267,
+      "learning_rate": 0.003,
+      "loss": 4.0325,
+      "step": 16012
+    },
+    {
+      "epoch": 0.16013,
+      "grad_norm": 1.2802531708167952,
+      "learning_rate": 0.003,
+      "loss": 4.0487,
+      "step": 16013
+    },
+    {
+      "epoch": 0.16014,
+      "grad_norm": 1.3036325770857242,
+      "learning_rate": 0.003,
+      "loss": 4.0039,
+      "step": 16014
+    },
+    {
+      "epoch": 0.16015,
+      "grad_norm": 1.3075081535296704,
+      "learning_rate": 0.003,
+      "loss": 4.0001,
+      "step": 16015
+    },
+    {
+      "epoch": 0.16016,
+      "grad_norm": 1.2868315758360842,
+      "learning_rate": 0.003,
+      "loss": 3.994,
+      "step": 16016
+    },
+    {
+      "epoch": 0.16017,
+      "grad_norm": 1.2192453725004495,
+      "learning_rate": 0.003,
+      "loss": 4.0198,
+      "step": 16017
+    },
+    {
+      "epoch": 0.16018,
+      "grad_norm": 1.4179947259720587,
+      "learning_rate": 0.003,
+      "loss": 3.9991,
+      "step": 16018
+    },
+    {
+      "epoch": 0.16019,
+      "grad_norm": 0.9933268676060425,
+      "learning_rate": 0.003,
+      "loss": 3.972,
+      "step": 16019
+    },
+    {
+      "epoch": 0.1602,
+      "grad_norm": 1.2677574055498495,
+      "learning_rate": 0.003,
+      "loss": 4.0142,
+      "step": 16020
+    },
+    {
+      "epoch": 0.16021,
+      "grad_norm": 1.04914501921573,
+      "learning_rate": 0.003,
+      "loss": 4.001,
+      "step": 16021
+    },
+    {
+      "epoch": 0.16022,
+      "grad_norm": 1.5658787036199897,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 16022
+    },
+    {
+      "epoch": 0.16023,
+      "grad_norm": 0.9380166563877902,
+      "learning_rate": 0.003,
+      "loss": 4.007,
+      "step": 16023
+    },
+    {
+      "epoch": 0.16024,
+      "grad_norm": 1.3563993540979358,
+      "learning_rate": 0.003,
+      "loss": 4.0313,
+      "step": 16024
+    },
+    {
+      "epoch": 0.16025,
+      "grad_norm": 1.2223720877097546,
+      "learning_rate": 0.003,
+      "loss": 4.0242,
+      "step": 16025
+    },
+    {
+      "epoch": 0.16026,
+      "grad_norm": 1.3958468360337928,
+      "learning_rate": 0.003,
+      "loss": 3.9903,
+      "step": 16026
+    },
+    {
+      "epoch": 0.16027,
+      "grad_norm": 1.1496040425392546,
+      "learning_rate": 0.003,
+      "loss": 3.9909,
+      "step": 16027
+    },
+    {
+      "epoch": 0.16028,
+      "grad_norm": 1.265978581286196,
+      "learning_rate": 0.003,
+      "loss": 4.0305,
+      "step": 16028
+    },
+    {
+      "epoch": 0.16029,
+      "grad_norm": 1.1970656146860956,
+      "learning_rate": 0.003,
+      "loss": 4.0264,
+      "step": 16029
+    },
+    {
+      "epoch": 0.1603,
+      "grad_norm": 1.3719632920241611,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 16030
+    },
+    {
+      "epoch": 0.16031,
+      "grad_norm": 1.4888979743666881,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 16031
+    },
+    {
+      "epoch": 0.16032,
+      "grad_norm": 0.9405753892538025,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 16032
+    },
+    {
+      "epoch": 0.16033,
+      "grad_norm": 1.3972562621926694,
+      "learning_rate": 0.003,
+      "loss": 4.0151,
+      "step": 16033
+    },
+    {
+      "epoch": 0.16034,
+      "grad_norm": 1.2076091171066374,
+      "learning_rate": 0.003,
+      "loss": 4.0146,
+      "step": 16034
+    },
+    {
+      "epoch": 0.16035,
+      "grad_norm": 1.2152831213427373,
+      "learning_rate": 0.003,
+      "loss": 4.026,
+      "step": 16035
+    },
+    {
+      "epoch": 0.16036,
+      "grad_norm": 1.323796096636007,
+      "learning_rate": 0.003,
+      "loss": 4.0035,
+      "step": 16036
+    },
+    {
+      "epoch": 0.16037,
+      "grad_norm": 1.0753704479667336,
+      "learning_rate": 0.003,
+      "loss": 4.0128,
+      "step": 16037
+    },
+    {
+      "epoch": 0.16038,
+      "grad_norm": 1.1599005027463496,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 16038
+    },
+    {
+      "epoch": 0.16039,
+      "grad_norm": 1.2709321988871976,
+      "learning_rate": 0.003,
+      "loss": 3.9987,
+      "step": 16039
+    },
+    {
+      "epoch": 0.1604,
+      "grad_norm": 1.3670081025524143,
+      "learning_rate": 0.003,
+      "loss": 4.0233,
+      "step": 16040
+    },
+    {
+      "epoch": 0.16041,
+      "grad_norm": 0.9935916137257558,
+      "learning_rate": 0.003,
+      "loss": 4.0019,
+      "step": 16041
+    },
+    {
+      "epoch": 0.16042,
+      "grad_norm": 1.1991884036840277,
+      "learning_rate": 0.003,
+      "loss": 4.0336,
+      "step": 16042
+    },
+    {
+      "epoch": 0.16043,
+      "grad_norm": 1.2122572540792345,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 16043
+    },
+    {
+      "epoch": 0.16044,
+      "grad_norm": 1.051149596266568,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 16044
+    },
+    {
+      "epoch": 0.16045,
+      "grad_norm": 1.2696961833905007,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 16045
+    },
+    {
+      "epoch": 0.16046,
+      "grad_norm": 1.136441039790602,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 16046
+    },
+    {
+      "epoch": 0.16047,
+      "grad_norm": 1.2746520841443807,
+      "learning_rate": 0.003,
+      "loss": 4.0258,
+      "step": 16047
+    },
+    {
+      "epoch": 0.16048,
+      "grad_norm": 1.3310326798956609,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 16048
+    },
+    {
+      "epoch": 0.16049,
+      "grad_norm": 1.1906862885352336,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 16049
+    },
+    {
+      "epoch": 0.1605,
+      "grad_norm": 1.4284071734558204,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 16050
+    },
+    {
+      "epoch": 0.16051,
+      "grad_norm": 1.1554474651545776,
+      "learning_rate": 0.003,
+      "loss": 4.0167,
+      "step": 16051
+    },
+    {
+      "epoch": 0.16052,
+      "grad_norm": 1.6483568951641105,
+      "learning_rate": 0.003,
+      "loss": 4.0043,
+      "step": 16052
+    },
+    {
+      "epoch": 0.16053,
+      "grad_norm": 1.050790920334589,
+      "learning_rate": 0.003,
+      "loss": 4.0317,
+      "step": 16053
+    },
+    {
+      "epoch": 0.16054,
+      "grad_norm": 1.1913184336153997,
+      "learning_rate": 0.003,
+      "loss": 3.9843,
+      "step": 16054
+    },
+    {
+      "epoch": 0.16055,
+      "grad_norm": 1.299799165461864,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 16055
+    },
+    {
+      "epoch": 0.16056,
+      "grad_norm": 1.2197434747744236,
+      "learning_rate": 0.003,
+      "loss": 4.0301,
+      "step": 16056
+    },
+    {
+      "epoch": 0.16057,
+      "grad_norm": 1.3485521233853122,
+      "learning_rate": 0.003,
+      "loss": 4.0232,
+      "step": 16057
+    },
+    {
+      "epoch": 0.16058,
+      "grad_norm": 1.2529999632379034,
+      "learning_rate": 0.003,
+      "loss": 4.0064,
+      "step": 16058
+    },
+    {
+      "epoch": 0.16059,
+      "grad_norm": 1.2757609779826649,
+      "learning_rate": 0.003,
+      "loss": 4.0093,
+      "step": 16059
+    },
+    {
+      "epoch": 0.1606,
+      "grad_norm": 1.2624204085329382,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 16060
+    },
+    {
+      "epoch": 0.16061,
+      "grad_norm": 1.01916293730141,
+      "learning_rate": 0.003,
+      "loss": 4.0308,
+      "step": 16061
+    },
+    {
+      "epoch": 0.16062,
+      "grad_norm": 1.3918647677121883,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 16062
+    },
+    {
+      "epoch": 0.16063,
+      "grad_norm": 1.1299985681926117,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 16063
+    },
+    {
+      "epoch": 0.16064,
+      "grad_norm": 1.3989755290151158,
+      "learning_rate": 0.003,
+      "loss": 4.041,
+      "step": 16064
+    },
+    {
+      "epoch": 0.16065,
+      "grad_norm": 1.1444329874813979,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 16065
+    },
+    {
+      "epoch": 0.16066,
+      "grad_norm": 1.2769218025740188,
+      "learning_rate": 0.003,
+      "loss": 3.9989,
+      "step": 16066
+    },
+    {
+      "epoch": 0.16067,
+      "grad_norm": 1.2657726384704442,
+      "learning_rate": 0.003,
+      "loss": 4.0343,
+      "step": 16067
+    },
+    {
+      "epoch": 0.16068,
+      "grad_norm": 1.3584528947680767,
+      "learning_rate": 0.003,
+      "loss": 3.9956,
+      "step": 16068
+    },
+    {
+      "epoch": 0.16069,
+      "grad_norm": 1.3849230623983817,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 16069
+    },
+    {
+      "epoch": 0.1607,
+      "grad_norm": 1.1612567846070265,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 16070
+    },
+    {
+      "epoch": 0.16071,
+      "grad_norm": 0.9850898731815557,
+      "learning_rate": 0.003,
+      "loss": 3.9801,
+      "step": 16071
+    },
+    {
+      "epoch": 0.16072,
+      "grad_norm": 1.3979078076442122,
+      "learning_rate": 0.003,
+      "loss": 4.0327,
+      "step": 16072
+    },
+    {
+      "epoch": 0.16073,
+      "grad_norm": 1.1829207947805287,
+      "learning_rate": 0.003,
+      "loss": 4.016,
+      "step": 16073
+    },
+    {
+      "epoch": 0.16074,
+      "grad_norm": 1.3956568785676735,
+      "learning_rate": 0.003,
+      "loss": 4.0122,
+      "step": 16074
+    },
+    {
+      "epoch": 0.16075,
+      "grad_norm": 1.2601557349528247,
+      "learning_rate": 0.003,
+      "loss": 4.0333,
+      "step": 16075
+    },
+    {
+      "epoch": 0.16076,
+      "grad_norm": 1.175950105019326,
+      "learning_rate": 0.003,
+      "loss": 4.0394,
+      "step": 16076
+    },
+    {
+      "epoch": 0.16077,
+      "grad_norm": 1.2399943688853072,
+      "learning_rate": 0.003,
+      "loss": 3.9971,
+      "step": 16077
+    },
+    {
+      "epoch": 0.16078,
+      "grad_norm": 1.38283249365418,
+      "learning_rate": 0.003,
+      "loss": 3.9812,
+      "step": 16078
+    },
+    {
+      "epoch": 0.16079,
+      "grad_norm": 1.0971987983802745,
+      "learning_rate": 0.003,
+      "loss": 4.0281,
+      "step": 16079
+    },
+    {
+      "epoch": 0.1608,
+      "grad_norm": 1.300851282510104,
+      "learning_rate": 0.003,
+      "loss": 3.9948,
+      "step": 16080
+    },
+    {
+      "epoch": 0.16081,
+      "grad_norm": 1.095857488943779,
+      "learning_rate": 0.003,
+      "loss": 4.0257,
+      "step": 16081
+    },
+    {
+      "epoch": 0.16082,
+      "grad_norm": 1.299630758811905,
+      "learning_rate": 0.003,
+      "loss": 3.9858,
+      "step": 16082
+    },
+    {
+      "epoch": 0.16083,
+      "grad_norm": 1.0260005769947862,
+      "learning_rate": 0.003,
+      "loss": 3.9847,
+      "step": 16083
+    },
+    {
+      "epoch": 0.16084,
+      "grad_norm": 1.3001667629050337,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 16084
+    },
+    {
+      "epoch": 0.16085,
+      "grad_norm": 1.4688412215739906,
+      "learning_rate": 0.003,
+      "loss": 3.9916,
+      "step": 16085
+    },
+    {
+      "epoch": 0.16086,
+      "grad_norm": 1.1117184144622974,
+      "learning_rate": 0.003,
+      "loss": 3.9839,
+      "step": 16086
+    },
+    {
+      "epoch": 0.16087,
+      "grad_norm": 1.370685907343411,
+      "learning_rate": 0.003,
+      "loss": 4.0099,
+      "step": 16087
+    },
+    {
+      "epoch": 0.16088,
+      "grad_norm": 1.202322765052148,
+      "learning_rate": 0.003,
+      "loss": 4.0043,
+      "step": 16088
+    },
+    {
+      "epoch": 0.16089,
+      "grad_norm": 1.2356402782688343,
+      "learning_rate": 0.003,
+      "loss": 4.0003,
+      "step": 16089
+    },
+    {
+      "epoch": 0.1609,
+      "grad_norm": 1.1182959162689072,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 16090
+    },
+    {
+      "epoch": 0.16091,
+      "grad_norm": 1.2642362698117995,
+      "learning_rate": 0.003,
+      "loss": 3.9922,
+      "step": 16091
+    },
+    {
+      "epoch": 0.16092,
+      "grad_norm": 1.2352144019840015,
+      "learning_rate": 0.003,
+      "loss": 3.9797,
+      "step": 16092
+    },
+    {
+      "epoch": 0.16093,
+      "grad_norm": 1.076256655348046,
+      "learning_rate": 0.003,
+      "loss": 3.9722,
+      "step": 16093
+    },
+    {
+      "epoch": 0.16094,
+      "grad_norm": 1.3548711640288833,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 16094
+    },
+    {
+      "epoch": 0.16095,
+      "grad_norm": 1.5575501903390845,
+      "learning_rate": 0.003,
+      "loss": 3.9958,
+      "step": 16095
+    },
+    {
+      "epoch": 0.16096,
+      "grad_norm": 0.9637468052786332,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 16096
+    },
+    {
+      "epoch": 0.16097,
+      "grad_norm": 1.4523002755247842,
+      "learning_rate": 0.003,
+      "loss": 3.9948,
+      "step": 16097
+    },
+    {
+      "epoch": 0.16098,
+      "grad_norm": 1.0730202043900385,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 16098
+    },
+    {
+      "epoch": 0.16099,
+      "grad_norm": 1.3524062057099446,
+      "learning_rate": 0.003,
+      "loss": 4.0207,
+      "step": 16099
+    },
+    {
+      "epoch": 0.161,
+      "grad_norm": 1.1546396738849403,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 16100
+    },
+    {
+      "epoch": 0.16101,
+      "grad_norm": 1.5156718082592469,
+      "learning_rate": 0.003,
+      "loss": 4.0212,
+      "step": 16101
+    },
+    {
+      "epoch": 0.16102,
+      "grad_norm": 1.1864275339496917,
+      "learning_rate": 0.003,
+      "loss": 3.9771,
+      "step": 16102
+    },
+    {
+      "epoch": 0.16103,
+      "grad_norm": 1.15265366212102,
+      "learning_rate": 0.003,
+      "loss": 3.9967,
+      "step": 16103
+    },
+    {
+      "epoch": 0.16104,
+      "grad_norm": 1.3678819136882205,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 16104
+    },
+    {
+      "epoch": 0.16105,
+      "grad_norm": 1.1852836254515984,
+      "learning_rate": 0.003,
+      "loss": 4.004,
+      "step": 16105
+    },
+    {
+      "epoch": 0.16106,
+      "grad_norm": 1.2642708883562752,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 16106
+    },
+    {
+      "epoch": 0.16107,
+      "grad_norm": 1.4118138366225041,
+      "learning_rate": 0.003,
+      "loss": 4.0184,
+      "step": 16107
+    },
+    {
+      "epoch": 0.16108,
+      "grad_norm": 1.2442807137062035,
+      "learning_rate": 0.003,
+      "loss": 4.0319,
+      "step": 16108
+    },
+    {
+      "epoch": 0.16109,
+      "grad_norm": 1.3907189421395418,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 16109
+    },
+    {
+      "epoch": 0.1611,
+      "grad_norm": 1.3454169707594963,
+      "learning_rate": 0.003,
+      "loss": 4.0163,
+      "step": 16110
+    },
+    {
+      "epoch": 0.16111,
+      "grad_norm": 1.1334365616559023,
+      "learning_rate": 0.003,
+      "loss": 4.0112,
+      "step": 16111
+    },
+    {
+      "epoch": 0.16112,
+      "grad_norm": 1.223835544837327,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 16112
+    },
+    {
+      "epoch": 0.16113,
+      "grad_norm": 1.1003578422714257,
+      "learning_rate": 0.003,
+      "loss": 4.024,
+      "step": 16113
+    },
+    {
+      "epoch": 0.16114,
+      "grad_norm": 1.422400823040409,
+      "learning_rate": 0.003,
+      "loss": 4.0085,
+      "step": 16114
+    },
+    {
+      "epoch": 0.16115,
+      "grad_norm": 1.0493058061084093,
+      "learning_rate": 0.003,
+      "loss": 4.016,
+      "step": 16115
+    },
+    {
+      "epoch": 0.16116,
+      "grad_norm": 1.3919970554520462,
+      "learning_rate": 0.003,
+      "loss": 3.9897,
+      "step": 16116
+    },
+    {
+      "epoch": 0.16117,
+      "grad_norm": 1.0525169085581814,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 16117
+    },
+    {
+      "epoch": 0.16118,
+      "grad_norm": 1.5199701224654893,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 16118
+    },
+    {
+      "epoch": 0.16119,
+      "grad_norm": 1.1893543622240004,
+      "learning_rate": 0.003,
+      "loss": 4.0229,
+      "step": 16119
+    },
+    {
+      "epoch": 0.1612,
+      "grad_norm": 1.481317893128953,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 16120
+    },
+    {
+      "epoch": 0.16121,
+      "grad_norm": 1.3689173482711632,
+      "learning_rate": 0.003,
+      "loss": 4.0309,
+      "step": 16121
+    },
+    {
+      "epoch": 0.16122,
+      "grad_norm": 1.3670484911413183,
+      "learning_rate": 0.003,
+      "loss": 4.037,
+      "step": 16122
+    },
+    {
+      "epoch": 0.16123,
+      "grad_norm": 1.2552896372472673,
+      "learning_rate": 0.003,
+      "loss": 4.046,
+      "step": 16123
+    },
+    {
+      "epoch": 0.16124,
+      "grad_norm": 1.3335344033573147,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 16124
+    },
+    {
+      "epoch": 0.16125,
+      "grad_norm": 1.1700116034864976,
+      "learning_rate": 0.003,
+      "loss": 3.9994,
+      "step": 16125
+    },
+    {
+      "epoch": 0.16126,
+      "grad_norm": 1.1471011013024555,
+      "learning_rate": 0.003,
+      "loss": 4.005,
+      "step": 16126
+    },
+    {
+      "epoch": 0.16127,
+      "grad_norm": 1.1549395887326956,
+      "learning_rate": 0.003,
+      "loss": 4.0167,
+      "step": 16127
+    },
+    {
+      "epoch": 0.16128,
+      "grad_norm": 1.189703932631664,
+      "learning_rate": 0.003,
+      "loss": 4.0245,
+      "step": 16128
+    },
+    {
+      "epoch": 0.16129,
+      "grad_norm": 1.3082653225319467,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 16129
+    },
+    {
+      "epoch": 0.1613,
+      "grad_norm": 1.2416398957273194,
+      "learning_rate": 0.003,
+      "loss": 4.0346,
+      "step": 16130
+    },
+    {
+      "epoch": 0.16131,
+      "grad_norm": 1.4663947896272986,
+      "learning_rate": 0.003,
+      "loss": 4.0113,
+      "step": 16131
+    },
+    {
+      "epoch": 0.16132,
+      "grad_norm": 1.2135324744775915,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 16132
+    },
+    {
+      "epoch": 0.16133,
+      "grad_norm": 1.1562967314921637,
+      "learning_rate": 0.003,
+      "loss": 4.0086,
+      "step": 16133
+    },
+    {
+      "epoch": 0.16134,
+      "grad_norm": 1.1763928022748928,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 16134
+    },
+    {
+      "epoch": 0.16135,
+      "grad_norm": 1.2976195079820243,
+      "learning_rate": 0.003,
+      "loss": 4.0234,
+      "step": 16135
+    },
+    {
+      "epoch": 0.16136,
+      "grad_norm": 1.1471018842279348,
+      "learning_rate": 0.003,
+      "loss": 3.9948,
+      "step": 16136
+    },
+    {
+      "epoch": 0.16137,
+      "grad_norm": 1.2413117497347381,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 16137
+    },
+    {
+      "epoch": 0.16138,
+      "grad_norm": 0.9923693305364467,
+      "learning_rate": 0.003,
+      "loss": 4.0288,
+      "step": 16138
+    },
+    {
+      "epoch": 0.16139,
+      "grad_norm": 1.4381837952111263,
+      "learning_rate": 0.003,
+      "loss": 4.0331,
+      "step": 16139
+    },
+    {
+      "epoch": 0.1614,
+      "grad_norm": 0.9472733316173573,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 16140
+    },
+    {
+      "epoch": 0.16141,
+      "grad_norm": 1.33400120176943,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 16141
+    },
+    {
+      "epoch": 0.16142,
+      "grad_norm": 1.2127378779303366,
+      "learning_rate": 0.003,
+      "loss": 4.0039,
+      "step": 16142
+    },
+    {
+      "epoch": 0.16143,
+      "grad_norm": 1.4219686336200243,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 16143
+    },
+    {
+      "epoch": 0.16144,
+      "grad_norm": 1.39719751895515,
+      "learning_rate": 0.003,
+      "loss": 4.0514,
+      "step": 16144
+    },
+    {
+      "epoch": 0.16145,
+      "grad_norm": 1.1694230089176398,
+      "learning_rate": 0.003,
+      "loss": 4.0069,
+      "step": 16145
+    },
+    {
+      "epoch": 0.16146,
+      "grad_norm": 1.3097712187358037,
+      "learning_rate": 0.003,
+      "loss": 4.0182,
+      "step": 16146
+    },
+    {
+      "epoch": 0.16147,
+      "grad_norm": 1.1227197415916592,
+      "learning_rate": 0.003,
+      "loss": 4.0312,
+      "step": 16147
+    },
+    {
+      "epoch": 0.16148,
+      "grad_norm": 1.1213560161179337,
+      "learning_rate": 0.003,
+      "loss": 4.0073,
+      "step": 16148
+    },
+    {
+      "epoch": 0.16149,
+      "grad_norm": 1.3743437454563598,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 16149
+    },
+    {
+      "epoch": 0.1615,
+      "grad_norm": 1.2906143546730646,
+      "learning_rate": 0.003,
+      "loss": 4.0333,
+      "step": 16150
+    },
+    {
+      "epoch": 0.16151,
+      "grad_norm": 1.3214154585018147,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 16151
+    },
+    {
+      "epoch": 0.16152,
+      "grad_norm": 1.0230395562856147,
+      "learning_rate": 0.003,
+      "loss": 4.0093,
+      "step": 16152
+    },
+    {
+      "epoch": 0.16153,
+      "grad_norm": 1.3229402182409677,
+      "learning_rate": 0.003,
+      "loss": 4.0121,
+      "step": 16153
+    },
+    {
+      "epoch": 0.16154,
+      "grad_norm": 1.2864381791192716,
+      "learning_rate": 0.003,
+      "loss": 4.0459,
+      "step": 16154
+    },
+    {
+      "epoch": 0.16155,
+      "grad_norm": 1.0760908943022593,
+      "learning_rate": 0.003,
+      "loss": 4.0021,
+      "step": 16155
+    },
+    {
+      "epoch": 0.16156,
+      "grad_norm": 1.2856867476042058,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 16156
+    },
+    {
+      "epoch": 0.16157,
+      "grad_norm": 1.105141525818919,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 16157
+    },
+    {
+      "epoch": 0.16158,
+      "grad_norm": 1.3770087818300023,
+      "learning_rate": 0.003,
+      "loss": 3.9998,
+      "step": 16158
+    },
+    {
+      "epoch": 0.16159,
+      "grad_norm": 1.0357188749854245,
+      "learning_rate": 0.003,
+      "loss": 4.0356,
+      "step": 16159
+    },
+    {
+      "epoch": 0.1616,
+      "grad_norm": 1.4336389701380712,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 16160
+    },
+    {
+      "epoch": 0.16161,
+      "grad_norm": 1.2736841522738496,
+      "learning_rate": 0.003,
+      "loss": 4.0223,
+      "step": 16161
+    },
+    {
+      "epoch": 0.16162,
+      "grad_norm": 1.2190076877210108,
+      "learning_rate": 0.003,
+      "loss": 3.9845,
+      "step": 16162
+    },
+    {
+      "epoch": 0.16163,
+      "grad_norm": 1.1169781715691094,
+      "learning_rate": 0.003,
+      "loss": 4.0051,
+      "step": 16163
+    },
+    {
+      "epoch": 0.16164,
+      "grad_norm": 1.2873936761081572,
+      "learning_rate": 0.003,
+      "loss": 4.0026,
+      "step": 16164
+    },
+    {
+      "epoch": 0.16165,
+      "grad_norm": 1.0721605161117134,
+      "learning_rate": 0.003,
+      "loss": 4.0009,
+      "step": 16165
+    },
+    {
+      "epoch": 0.16166,
+      "grad_norm": 1.5210271055832423,
+      "learning_rate": 0.003,
+      "loss": 4.0326,
+      "step": 16166
+    },
+    {
+      "epoch": 0.16167,
+      "grad_norm": 1.2603117087981106,
+      "learning_rate": 0.003,
+      "loss": 4.0214,
+      "step": 16167
+    },
+    {
+      "epoch": 0.16168,
+      "grad_norm": 1.574483052352801,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 16168
+    },
+    {
+      "epoch": 0.16169,
+      "grad_norm": 0.8664294582071033,
+      "learning_rate": 0.003,
+      "loss": 3.9874,
+      "step": 16169
+    },
+    {
+      "epoch": 0.1617,
+      "grad_norm": 0.9914409134469694,
+      "learning_rate": 0.003,
+      "loss": 3.9762,
+      "step": 16170
+    },
+    {
+      "epoch": 0.16171,
+      "grad_norm": 1.4567984425664773,
+      "learning_rate": 0.003,
+      "loss": 4.0299,
+      "step": 16171
+    },
+    {
+      "epoch": 0.16172,
+      "grad_norm": 1.2118153987214155,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 16172
+    },
+    {
+      "epoch": 0.16173,
+      "grad_norm": 1.1642355951845538,
+      "learning_rate": 0.003,
+      "loss": 4.0481,
+      "step": 16173
+    },
+    {
+      "epoch": 0.16174,
+      "grad_norm": 1.277516708904266,
+      "learning_rate": 0.003,
+      "loss": 4.0324,
+      "step": 16174
+    },
+    {
+      "epoch": 0.16175,
+      "grad_norm": 1.1793587223376114,
+      "learning_rate": 0.003,
+      "loss": 3.9914,
+      "step": 16175
+    },
+    {
+      "epoch": 0.16176,
+      "grad_norm": 1.3705621332644309,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 16176
+    },
+    {
+      "epoch": 0.16177,
+      "grad_norm": 0.9276949023792542,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 16177
+    },
+    {
+      "epoch": 0.16178,
+      "grad_norm": 1.41636566857606,
+      "learning_rate": 0.003,
+      "loss": 4.0238,
+      "step": 16178
+    },
+    {
+      "epoch": 0.16179,
+      "grad_norm": 1.0679985032966415,
+      "learning_rate": 0.003,
+      "loss": 4.0039,
+      "step": 16179
+    },
+    {
+      "epoch": 0.1618,
+      "grad_norm": 1.5060257983082257,
+      "learning_rate": 0.003,
+      "loss": 4.021,
+      "step": 16180
+    },
+    {
+      "epoch": 0.16181,
+      "grad_norm": 1.0021990238480751,
+      "learning_rate": 0.003,
+      "loss": 3.9983,
+      "step": 16181
+    },
+    {
+      "epoch": 0.16182,
+      "grad_norm": 1.3496364921628858,
+      "learning_rate": 0.003,
+      "loss": 4.0002,
+      "step": 16182
+    },
+    {
+      "epoch": 0.16183,
+      "grad_norm": 1.3215118120120168,
+      "learning_rate": 0.003,
+      "loss": 4.0279,
+      "step": 16183
+    },
+    {
+      "epoch": 0.16184,
+      "grad_norm": 0.9261056839001097,
+      "learning_rate": 0.003,
+      "loss": 3.9984,
+      "step": 16184
+    },
+    {
+      "epoch": 0.16185,
+      "grad_norm": 1.2410333950635242,
+      "learning_rate": 0.003,
+      "loss": 4.044,
+      "step": 16185
+    },
+    {
+      "epoch": 0.16186,
+      "grad_norm": 1.2122607787387762,
+      "learning_rate": 0.003,
+      "loss": 4.0011,
+      "step": 16186
+    },
+    {
+      "epoch": 0.16187,
+      "grad_norm": 1.074138998508649,
+      "learning_rate": 0.003,
+      "loss": 4.0321,
+      "step": 16187
+    },
+    {
+      "epoch": 0.16188,
+      "grad_norm": 1.3439154528218888,
+      "learning_rate": 0.003,
+      "loss": 4.0337,
+      "step": 16188
+    },
+    {
+      "epoch": 0.16189,
+      "grad_norm": 1.2327094920539634,
+      "learning_rate": 0.003,
+      "loss": 4.0294,
+      "step": 16189
+    },
+    {
+      "epoch": 0.1619,
+      "grad_norm": 1.2945842411187498,
+      "learning_rate": 0.003,
+      "loss": 3.9814,
+      "step": 16190
+    },
+    {
+      "epoch": 0.16191,
+      "grad_norm": 1.1839140649632616,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 16191
+    },
+    {
+      "epoch": 0.16192,
+      "grad_norm": 1.3758659634642418,
+      "learning_rate": 0.003,
+      "loss": 4.0135,
+      "step": 16192
+    },
+    {
+      "epoch": 0.16193,
+      "grad_norm": 1.5754222985713842,
+      "learning_rate": 0.003,
+      "loss": 4.0285,
+      "step": 16193
+    },
+    {
+      "epoch": 0.16194,
+      "grad_norm": 1.168003610994215,
+      "learning_rate": 0.003,
+      "loss": 4.0017,
+      "step": 16194
+    },
+    {
+      "epoch": 0.16195,
+      "grad_norm": 1.2598666586763887,
+      "learning_rate": 0.003,
+      "loss": 4.0091,
+      "step": 16195
+    },
+    {
+      "epoch": 0.16196,
+      "grad_norm": 1.2484636707932206,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 16196
+    },
+    {
+      "epoch": 0.16197,
+      "grad_norm": 1.120079978131868,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 16197
+    },
+    {
+      "epoch": 0.16198,
+      "grad_norm": 1.4012787009699093,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 16198
+    },
+    {
+      "epoch": 0.16199,
+      "grad_norm": 1.240228254694417,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 16199
+    },
+    {
+      "epoch": 0.162,
+      "grad_norm": 1.1772413535073507,
+      "learning_rate": 0.003,
+      "loss": 4.0357,
+      "step": 16200
+    },
+    {
+      "epoch": 0.16201,
+      "grad_norm": 1.2900263087529662,
+      "learning_rate": 0.003,
+      "loss": 4.0261,
+      "step": 16201
+    },
+    {
+      "epoch": 0.16202,
+      "grad_norm": 1.257610807599335,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 16202
+    },
+    {
+      "epoch": 0.16203,
+      "grad_norm": 1.175552574679173,
+      "learning_rate": 0.003,
+      "loss": 3.9985,
+      "step": 16203
+    },
+    {
+      "epoch": 0.16204,
+      "grad_norm": 1.4500182220732118,
+      "learning_rate": 0.003,
+      "loss": 4.0431,
+      "step": 16204
+    },
+    {
+      "epoch": 0.16205,
+      "grad_norm": 1.1768076652887332,
+      "learning_rate": 0.003,
+      "loss": 4.0155,
+      "step": 16205
+    },
+    {
+      "epoch": 0.16206,
+      "grad_norm": 1.3175082599762675,
+      "learning_rate": 0.003,
+      "loss": 4.0107,
+      "step": 16206
+    },
+    {
+      "epoch": 0.16207,
+      "grad_norm": 1.083670859157658,
+      "learning_rate": 0.003,
+      "loss": 4.021,
+      "step": 16207
+    },
+    {
+      "epoch": 0.16208,
+      "grad_norm": 1.3829291622680255,
+      "learning_rate": 0.003,
+      "loss": 3.9977,
+      "step": 16208
+    },
+    {
+      "epoch": 0.16209,
+      "grad_norm": 1.1075036833669838,
+      "learning_rate": 0.003,
+      "loss": 4.0214,
+      "step": 16209
+    },
+    {
+      "epoch": 0.1621,
+      "grad_norm": 1.4023288738470112,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 16210
+    },
+    {
+      "epoch": 0.16211,
+      "grad_norm": 1.0568643946962788,
+      "learning_rate": 0.003,
+      "loss": 4.003,
+      "step": 16211
+    },
+    {
+      "epoch": 0.16212,
+      "grad_norm": 1.2822784907103308,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 16212
+    },
+    {
+      "epoch": 0.16213,
+      "grad_norm": 1.0865919739046765,
+      "learning_rate": 0.003,
+      "loss": 4.0063,
+      "step": 16213
+    },
+    {
+      "epoch": 0.16214,
+      "grad_norm": 1.1462534560610254,
+      "learning_rate": 0.003,
+      "loss": 4.0376,
+      "step": 16214
+    },
+    {
+      "epoch": 0.16215,
+      "grad_norm": 1.4350052523536598,
+      "learning_rate": 0.003,
+      "loss": 4.0478,
+      "step": 16215
+    },
+    {
+      "epoch": 0.16216,
+      "grad_norm": 1.2013093101179828,
+      "learning_rate": 0.003,
+      "loss": 3.9976,
+      "step": 16216
+    },
+    {
+      "epoch": 0.16217,
+      "grad_norm": 1.215098202740069,
+      "learning_rate": 0.003,
+      "loss": 4.0344,
+      "step": 16217
+    },
+    {
+      "epoch": 0.16218,
+      "grad_norm": 1.5608825590690358,
+      "learning_rate": 0.003,
+      "loss": 3.9995,
+      "step": 16218
+    },
+    {
+      "epoch": 0.16219,
+      "grad_norm": 0.979091370861085,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 16219
+    },
+    {
+      "epoch": 0.1622,
+      "grad_norm": 1.4947609668871131,
+      "learning_rate": 0.003,
+      "loss": 4.0032,
+      "step": 16220
+    },
+    {
+      "epoch": 0.16221,
+      "grad_norm": 1.0998543006615162,
+      "learning_rate": 0.003,
+      "loss": 4.0171,
+      "step": 16221
+    },
+    {
+      "epoch": 0.16222,
+      "grad_norm": 1.2523996094659415,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 16222
+    },
+    {
+      "epoch": 0.16223,
+      "grad_norm": 1.2217073500770703,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 16223
+    },
+    {
+      "epoch": 0.16224,
+      "grad_norm": 1.5546837574704633,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 16224
+    },
+    {
+      "epoch": 0.16225,
+      "grad_norm": 1.2099323804351048,
+      "learning_rate": 0.003,
+      "loss": 3.9952,
+      "step": 16225
+    },
+    {
+      "epoch": 0.16226,
+      "grad_norm": 1.1681462870412578,
+      "learning_rate": 0.003,
+      "loss": 4.0061,
+      "step": 16226
+    },
+    {
+      "epoch": 0.16227,
+      "grad_norm": 1.2745384192534517,
+      "learning_rate": 0.003,
+      "loss": 4.0112,
+      "step": 16227
+    },
+    {
+      "epoch": 0.16228,
+      "grad_norm": 1.2910726751301218,
+      "learning_rate": 0.003,
+      "loss": 4.0174,
+      "step": 16228
+    },
+    {
+      "epoch": 0.16229,
+      "grad_norm": 1.232591354216802,
+      "learning_rate": 0.003,
+      "loss": 4.0336,
+      "step": 16229
+    },
+    {
+      "epoch": 0.1623,
+      "grad_norm": 1.1829997005740684,
+      "learning_rate": 0.003,
+      "loss": 3.9851,
+      "step": 16230
+    },
+    {
+      "epoch": 0.16231,
+      "grad_norm": 1.3163228024010354,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 16231
+    },
+    {
+      "epoch": 0.16232,
+      "grad_norm": 1.233936463295609,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 16232
+    },
+    {
+      "epoch": 0.16233,
+      "grad_norm": 1.2132889495425785,
+      "learning_rate": 0.003,
+      "loss": 3.9937,
+      "step": 16233
+    },
+    {
+      "epoch": 0.16234,
+      "grad_norm": 1.2822756762471483,
+      "learning_rate": 0.003,
+      "loss": 4.024,
+      "step": 16234
+    },
+    {
+      "epoch": 0.16235,
+      "grad_norm": 1.0788167153224901,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 16235
+    },
+    {
+      "epoch": 0.16236,
+      "grad_norm": 1.3068920944816198,
+      "learning_rate": 0.003,
+      "loss": 4.0109,
+      "step": 16236
+    },
+    {
+      "epoch": 0.16237,
+      "grad_norm": 1.1490896028469753,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 16237
+    },
+    {
+      "epoch": 0.16238,
+      "grad_norm": 1.4329996245008865,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 16238
+    },
+    {
+      "epoch": 0.16239,
+      "grad_norm": 1.0738850110421332,
+      "learning_rate": 0.003,
+      "loss": 4.0321,
+      "step": 16239
+    },
+    {
+      "epoch": 0.1624,
+      "grad_norm": 1.4199152589907686,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 16240
+    },
+    {
+      "epoch": 0.16241,
+      "grad_norm": 1.1707238634143304,
+      "learning_rate": 0.003,
+      "loss": 4.0461,
+      "step": 16241
+    },
+    {
+      "epoch": 0.16242,
+      "grad_norm": 1.1452780655232648,
+      "learning_rate": 0.003,
+      "loss": 4.0126,
+      "step": 16242
+    },
+    {
+      "epoch": 0.16243,
+      "grad_norm": 1.2250158805487876,
+      "learning_rate": 0.003,
+      "loss": 4.0286,
+      "step": 16243
+    },
+    {
+      "epoch": 0.16244,
+      "grad_norm": 1.3050529176307324,
+      "learning_rate": 0.003,
+      "loss": 3.9826,
+      "step": 16244
+    },
+    {
+      "epoch": 0.16245,
+      "grad_norm": 1.2853826796033716,
+      "learning_rate": 0.003,
+      "loss": 3.9998,
+      "step": 16245
+    },
+    {
+      "epoch": 0.16246,
+      "grad_norm": 1.425835266955398,
+      "learning_rate": 0.003,
+      "loss": 4.0215,
+      "step": 16246
+    },
+    {
+      "epoch": 0.16247,
+      "grad_norm": 1.009245169525709,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 16247
+    },
+    {
+      "epoch": 0.16248,
+      "grad_norm": 1.6861590499758379,
+      "learning_rate": 0.003,
+      "loss": 4.0082,
+      "step": 16248
+    },
+    {
+      "epoch": 0.16249,
+      "grad_norm": 1.0409671667743918,
+      "learning_rate": 0.003,
+      "loss": 4.0209,
+      "step": 16249
+    },
+    {
+      "epoch": 0.1625,
+      "grad_norm": 1.4350539872300103,
+      "learning_rate": 0.003,
+      "loss": 3.9991,
+      "step": 16250
+    },
+    {
+      "epoch": 0.16251,
+      "grad_norm": 1.1367477765992338,
+      "learning_rate": 0.003,
+      "loss": 4.0129,
+      "step": 16251
+    },
+    {
+      "epoch": 0.16252,
+      "grad_norm": 1.2107808951722476,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 16252
+    },
+    {
+      "epoch": 0.16253,
+      "grad_norm": 1.1892489577790892,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 16253
+    },
+    {
+      "epoch": 0.16254,
+      "grad_norm": 1.2124577797786444,
+      "learning_rate": 0.003,
+      "loss": 3.99,
+      "step": 16254
+    },
+    {
+      "epoch": 0.16255,
+      "grad_norm": 1.208255102284434,
+      "learning_rate": 0.003,
+      "loss": 4.0015,
+      "step": 16255
+    },
+    {
+      "epoch": 0.16256,
+      "grad_norm": 1.299753068132065,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 16256
+    },
+    {
+      "epoch": 0.16257,
+      "grad_norm": 1.17324629592306,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 16257
+    },
+    {
+      "epoch": 0.16258,
+      "grad_norm": 1.6489190676746355,
+      "learning_rate": 0.003,
+      "loss": 4.0261,
+      "step": 16258
+    },
+    {
+      "epoch": 0.16259,
+      "grad_norm": 0.9070137256189884,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 16259
+    },
+    {
+      "epoch": 0.1626,
+      "grad_norm": 1.323713007881002,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 16260
+    },
+    {
+      "epoch": 0.16261,
+      "grad_norm": 1.2576953485729692,
+      "learning_rate": 0.003,
+      "loss": 3.9876,
+      "step": 16261
+    },
+    {
+      "epoch": 0.16262,
+      "grad_norm": 1.0306207731281276,
+      "learning_rate": 0.003,
+      "loss": 4.0233,
+      "step": 16262
+    },
+    {
+      "epoch": 0.16263,
+      "grad_norm": 1.3418643829455337,
+      "learning_rate": 0.003,
+      "loss": 4.0157,
+      "step": 16263
+    },
+    {
+      "epoch": 0.16264,
+      "grad_norm": 1.0795537290135617,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 16264
+    },
+    {
+      "epoch": 0.16265,
+      "grad_norm": 1.3765411902940559,
+      "learning_rate": 0.003,
+      "loss": 4.0072,
+      "step": 16265
+    },
+    {
+      "epoch": 0.16266,
+      "grad_norm": 1.1264219486711267,
+      "learning_rate": 0.003,
+      "loss": 4.0187,
+      "step": 16266
+    },
+    {
+      "epoch": 0.16267,
+      "grad_norm": 1.5943807470861864,
+      "learning_rate": 0.003,
+      "loss": 4.0085,
+      "step": 16267
+    },
+    {
+      "epoch": 0.16268,
+      "grad_norm": 1.1331519759746556,
+      "learning_rate": 0.003,
+      "loss": 4.0276,
+      "step": 16268
+    },
+    {
+      "epoch": 0.16269,
+      "grad_norm": 1.4076888603028517,
+      "learning_rate": 0.003,
+      "loss": 4.0259,
+      "step": 16269
+    },
+    {
+      "epoch": 0.1627,
+      "grad_norm": 1.1338980685085958,
+      "learning_rate": 0.003,
+      "loss": 4.0292,
+      "step": 16270
+    },
+    {
+      "epoch": 0.16271,
+      "grad_norm": 1.2263541104881175,
+      "learning_rate": 0.003,
+      "loss": 4.0234,
+      "step": 16271
+    },
+    {
+      "epoch": 0.16272,
+      "grad_norm": 1.2477132012700038,
+      "learning_rate": 0.003,
+      "loss": 4.0001,
+      "step": 16272
+    },
+    {
+      "epoch": 0.16273,
+      "grad_norm": 1.4033482768081984,
+      "learning_rate": 0.003,
+      "loss": 4.0116,
+      "step": 16273
+    },
+    {
+      "epoch": 0.16274,
+      "grad_norm": 1.2407092930558066,
+      "learning_rate": 0.003,
+      "loss": 4.0092,
+      "step": 16274
+    },
+    {
+      "epoch": 0.16275,
+      "grad_norm": 1.1690149723541716,
+      "learning_rate": 0.003,
+      "loss": 4.0458,
+      "step": 16275
+    },
+    {
+      "epoch": 0.16276,
+      "grad_norm": 1.4404826682453318,
+      "learning_rate": 0.003,
+      "loss": 3.9994,
+      "step": 16276
+    },
+    {
+      "epoch": 0.16277,
+      "grad_norm": 1.0793605987424126,
+      "learning_rate": 0.003,
+      "loss": 4.0126,
+      "step": 16277
+    },
+    {
+      "epoch": 0.16278,
+      "grad_norm": 1.2410935408130972,
+      "learning_rate": 0.003,
+      "loss": 4.0074,
+      "step": 16278
+    },
+    {
+      "epoch": 0.16279,
+      "grad_norm": 1.1018110785157487,
+      "learning_rate": 0.003,
+      "loss": 4.0052,
+      "step": 16279
+    },
+    {
+      "epoch": 0.1628,
+      "grad_norm": 1.4350516445394483,
+      "learning_rate": 0.003,
+      "loss": 4.0314,
+      "step": 16280
+    },
+    {
+      "epoch": 0.16281,
+      "grad_norm": 1.040440143488395,
+      "learning_rate": 0.003,
+      "loss": 3.999,
+      "step": 16281
+    },
+    {
+      "epoch": 0.16282,
+      "grad_norm": 1.5725596187641915,
+      "learning_rate": 0.003,
+      "loss": 4.0031,
+      "step": 16282
+    },
+    {
+      "epoch": 0.16283,
+      "grad_norm": 1.174488443518014,
+      "learning_rate": 0.003,
+      "loss": 3.9793,
+      "step": 16283
+    },
+    {
+      "epoch": 0.16284,
+      "grad_norm": 1.1418980978133457,
+      "learning_rate": 0.003,
+      "loss": 3.9985,
+      "step": 16284
+    },
+    {
+      "epoch": 0.16285,
+      "grad_norm": 1.3176145082440558,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 16285
+    },
+    {
+      "epoch": 0.16286,
+      "grad_norm": 1.2729396117612566,
+      "learning_rate": 0.003,
+      "loss": 4.031,
+      "step": 16286
+    },
+    {
+      "epoch": 0.16287,
+      "grad_norm": 1.0582851913805245,
+      "learning_rate": 0.003,
+      "loss": 4.0157,
+      "step": 16287
+    },
+    {
+      "epoch": 0.16288,
+      "grad_norm": 1.448962771931629,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 16288
+    },
+    {
+      "epoch": 0.16289,
+      "grad_norm": 1.0817148956757496,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 16289
+    },
+    {
+      "epoch": 0.1629,
+      "grad_norm": 1.2740098023804838,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 16290
+    },
+    {
+      "epoch": 0.16291,
+      "grad_norm": 1.1614133630841075,
+      "learning_rate": 0.003,
+      "loss": 4.0141,
+      "step": 16291
+    },
+    {
+      "epoch": 0.16292,
+      "grad_norm": 1.236868613041996,
+      "learning_rate": 0.003,
+      "loss": 4.0204,
+      "step": 16292
+    },
+    {
+      "epoch": 0.16293,
+      "grad_norm": 1.2869133677979343,
+      "learning_rate": 0.003,
+      "loss": 4.0128,
+      "step": 16293
+    },
+    {
+      "epoch": 0.16294,
+      "grad_norm": 1.2907804379730377,
+      "learning_rate": 0.003,
+      "loss": 3.9981,
+      "step": 16294
+    },
+    {
+      "epoch": 0.16295,
+      "grad_norm": 1.099536496169472,
+      "learning_rate": 0.003,
+      "loss": 4.0025,
+      "step": 16295
+    },
+    {
+      "epoch": 0.16296,
+      "grad_norm": 1.3335936028645292,
+      "learning_rate": 0.003,
+      "loss": 4.0322,
+      "step": 16296
+    },
+    {
+      "epoch": 0.16297,
+      "grad_norm": 1.1645557198097691,
+      "learning_rate": 0.003,
+      "loss": 4.0109,
+      "step": 16297
+    },
+    {
+      "epoch": 0.16298,
+      "grad_norm": 1.3446573090864091,
+      "learning_rate": 0.003,
+      "loss": 3.9985,
+      "step": 16298
+    },
+    {
+      "epoch": 0.16299,
+      "grad_norm": 1.277875127298129,
+      "learning_rate": 0.003,
+      "loss": 4.0316,
+      "step": 16299
+    },
+    {
+      "epoch": 0.163,
+      "grad_norm": 1.360414138348981,
+      "learning_rate": 0.003,
+      "loss": 4.0233,
+      "step": 16300
+    },
+    {
+      "epoch": 0.16301,
+      "grad_norm": 1.056558589312762,
+      "learning_rate": 0.003,
+      "loss": 3.9782,
+      "step": 16301
+    },
+    {
+      "epoch": 0.16302,
+      "grad_norm": 1.2740972430036706,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 16302
+    },
+    {
+      "epoch": 0.16303,
+      "grad_norm": 1.0433904292266207,
+      "learning_rate": 0.003,
+      "loss": 4.0129,
+      "step": 16303
+    },
+    {
+      "epoch": 0.16304,
+      "grad_norm": 1.4690931174665078,
+      "learning_rate": 0.003,
+      "loss": 4.0174,
+      "step": 16304
+    },
+    {
+      "epoch": 0.16305,
+      "grad_norm": 1.3585764808502565,
+      "learning_rate": 0.003,
+      "loss": 4.0234,
+      "step": 16305
+    },
+    {
+      "epoch": 0.16306,
+      "grad_norm": 0.9738797029139665,
+      "learning_rate": 0.003,
+      "loss": 4.0092,
+      "step": 16306
+    },
+    {
+      "epoch": 0.16307,
+      "grad_norm": 1.1886139683680965,
+      "learning_rate": 0.003,
+      "loss": 4.0003,
+      "step": 16307
+    },
+    {
+      "epoch": 0.16308,
+      "grad_norm": 1.4949177477980182,
+      "learning_rate": 0.003,
+      "loss": 4.001,
+      "step": 16308
+    },
+    {
+      "epoch": 0.16309,
+      "grad_norm": 1.2034762337178353,
+      "learning_rate": 0.003,
+      "loss": 4.0086,
+      "step": 16309
+    },
+    {
+      "epoch": 0.1631,
+      "grad_norm": 1.2990119340504764,
+      "learning_rate": 0.003,
+      "loss": 4.0233,
+      "step": 16310
+    },
+    {
+      "epoch": 0.16311,
+      "grad_norm": 1.2898335711830429,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 16311
+    },
+    {
+      "epoch": 0.16312,
+      "grad_norm": 1.1909578646778416,
+      "learning_rate": 0.003,
+      "loss": 4.0553,
+      "step": 16312
+    },
+    {
+      "epoch": 0.16313,
+      "grad_norm": 1.410969870509698,
+      "learning_rate": 0.003,
+      "loss": 4.0088,
+      "step": 16313
+    },
+    {
+      "epoch": 0.16314,
+      "grad_norm": 1.1728459534717985,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 16314
+    },
+    {
+      "epoch": 0.16315,
+      "grad_norm": 1.4326695495326103,
+      "learning_rate": 0.003,
+      "loss": 3.9897,
+      "step": 16315
+    },
+    {
+      "epoch": 0.16316,
+      "grad_norm": 1.200058845173753,
+      "learning_rate": 0.003,
+      "loss": 4.0153,
+      "step": 16316
+    },
+    {
+      "epoch": 0.16317,
+      "grad_norm": 1.1029542964356904,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 16317
+    },
+    {
+      "epoch": 0.16318,
+      "grad_norm": 1.4904739266686984,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 16318
+    },
+    {
+      "epoch": 0.16319,
+      "grad_norm": 1.008823446422317,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 16319
+    },
+    {
+      "epoch": 0.1632,
+      "grad_norm": 1.5958122881545798,
+      "learning_rate": 0.003,
+      "loss": 4.0275,
+      "step": 16320
+    },
+    {
+      "epoch": 0.16321,
+      "grad_norm": 1.0776684869138313,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 16321
+    },
+    {
+      "epoch": 0.16322,
+      "grad_norm": 1.3250262438870306,
+      "learning_rate": 0.003,
+      "loss": 4.0241,
+      "step": 16322
+    },
+    {
+      "epoch": 0.16323,
+      "grad_norm": 1.2879418917363281,
+      "learning_rate": 0.003,
+      "loss": 4.0073,
+      "step": 16323
+    },
+    {
+      "epoch": 0.16324,
+      "grad_norm": 1.277337742300279,
+      "learning_rate": 0.003,
+      "loss": 4.0223,
+      "step": 16324
+    },
+    {
+      "epoch": 0.16325,
+      "grad_norm": 1.2046109700156538,
+      "learning_rate": 0.003,
+      "loss": 3.9934,
+      "step": 16325
+    },
+    {
+      "epoch": 0.16326,
+      "grad_norm": 1.1594972035327338,
+      "learning_rate": 0.003,
+      "loss": 4.02,
+      "step": 16326
+    },
+    {
+      "epoch": 0.16327,
+      "grad_norm": 1.2993757871545841,
+      "learning_rate": 0.003,
+      "loss": 3.9811,
+      "step": 16327
+    },
+    {
+      "epoch": 0.16328,
+      "grad_norm": 0.9459441462719139,
+      "learning_rate": 0.003,
+      "loss": 4.0278,
+      "step": 16328
+    },
+    {
+      "epoch": 0.16329,
+      "grad_norm": 1.089640361789377,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 16329
+    },
+    {
+      "epoch": 0.1633,
+      "grad_norm": 1.357347324378313,
+      "learning_rate": 0.003,
+      "loss": 4.0232,
+      "step": 16330
+    },
+    {
+      "epoch": 0.16331,
+      "grad_norm": 1.3832348141460884,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 16331
+    },
+    {
+      "epoch": 0.16332,
+      "grad_norm": 1.1657345267078214,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 16332
+    },
+    {
+      "epoch": 0.16333,
+      "grad_norm": 1.2352142220832043,
+      "learning_rate": 0.003,
+      "loss": 4.0104,
+      "step": 16333
+    },
+    {
+      "epoch": 0.16334,
+      "grad_norm": 1.3011743066663761,
+      "learning_rate": 0.003,
+      "loss": 4.0334,
+      "step": 16334
+    },
+    {
+      "epoch": 0.16335,
+      "grad_norm": 1.301849855697704,
+      "learning_rate": 0.003,
+      "loss": 4.0446,
+      "step": 16335
+    },
+    {
+      "epoch": 0.16336,
+      "grad_norm": 1.197578674404236,
+      "learning_rate": 0.003,
+      "loss": 4.0312,
+      "step": 16336
+    },
+    {
+      "epoch": 0.16337,
+      "grad_norm": 1.2289120276716436,
+      "learning_rate": 0.003,
+      "loss": 4.0222,
+      "step": 16337
+    },
+    {
+      "epoch": 0.16338,
+      "grad_norm": 1.0665177380441002,
+      "learning_rate": 0.003,
+      "loss": 4.0247,
+      "step": 16338
+    },
+    {
+      "epoch": 0.16339,
+      "grad_norm": 1.4593108939078878,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 16339
+    },
+    {
+      "epoch": 0.1634,
+      "grad_norm": 0.9955358818303156,
+      "learning_rate": 0.003,
+      "loss": 4.0033,
+      "step": 16340
+    },
+    {
+      "epoch": 0.16341,
+      "grad_norm": 1.1762072836442636,
+      "learning_rate": 0.003,
+      "loss": 3.9875,
+      "step": 16341
+    },
+    {
+      "epoch": 0.16342,
+      "grad_norm": 1.2875094008417542,
+      "learning_rate": 0.003,
+      "loss": 4.0029,
+      "step": 16342
+    },
+    {
+      "epoch": 0.16343,
+      "grad_norm": 1.1438620403444335,
+      "learning_rate": 0.003,
+      "loss": 3.9974,
+      "step": 16343
+    },
+    {
+      "epoch": 0.16344,
+      "grad_norm": 1.2411591665010742,
+      "learning_rate": 0.003,
+      "loss": 3.9968,
+      "step": 16344
+    },
+    {
+      "epoch": 0.16345,
+      "grad_norm": 1.344220362883522,
+      "learning_rate": 0.003,
+      "loss": 4.0252,
+      "step": 16345
+    },
+    {
+      "epoch": 0.16346,
+      "grad_norm": 1.1197276916624015,
+      "learning_rate": 0.003,
+      "loss": 4.016,
+      "step": 16346
+    },
+    {
+      "epoch": 0.16347,
+      "grad_norm": 1.2267385558994073,
+      "learning_rate": 0.003,
+      "loss": 4.0387,
+      "step": 16347
+    },
+    {
+      "epoch": 0.16348,
+      "grad_norm": 1.4237698868352837,
+      "learning_rate": 0.003,
+      "loss": 4.036,
+      "step": 16348
+    },
+    {
+      "epoch": 0.16349,
+      "grad_norm": 1.2159721118278595,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 16349
+    },
+    {
+      "epoch": 0.1635,
+      "grad_norm": 1.2293083591735972,
+      "learning_rate": 0.003,
+      "loss": 3.9994,
+      "step": 16350
+    },
+    {
+      "epoch": 0.16351,
+      "grad_norm": 1.2889322429236827,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 16351
+    },
+    {
+      "epoch": 0.16352,
+      "grad_norm": 1.3685760771446185,
+      "learning_rate": 0.003,
+      "loss": 3.9813,
+      "step": 16352
+    },
+    {
+      "epoch": 0.16353,
+      "grad_norm": 1.222184218179719,
+      "learning_rate": 0.003,
+      "loss": 3.9792,
+      "step": 16353
+    },
+    {
+      "epoch": 0.16354,
+      "grad_norm": 1.4062042052693153,
+      "learning_rate": 0.003,
+      "loss": 4.045,
+      "step": 16354
+    },
+    {
+      "epoch": 0.16355,
+      "grad_norm": 1.1982614837664545,
+      "learning_rate": 0.003,
+      "loss": 4.0359,
+      "step": 16355
+    },
+    {
+      "epoch": 0.16356,
+      "grad_norm": 1.0474864519503508,
+      "learning_rate": 0.003,
+      "loss": 3.9771,
+      "step": 16356
+    },
+    {
+      "epoch": 0.16357,
+      "grad_norm": 1.210625814816236,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 16357
+    },
+    {
+      "epoch": 0.16358,
+      "grad_norm": 1.1218492901639943,
+      "learning_rate": 0.003,
+      "loss": 4.0297,
+      "step": 16358
+    },
+    {
+      "epoch": 0.16359,
+      "grad_norm": 1.3571042584631752,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 16359
+    },
+    {
+      "epoch": 0.1636,
+      "grad_norm": 1.097890712606949,
+      "learning_rate": 0.003,
+      "loss": 4.0237,
+      "step": 16360
+    },
+    {
+      "epoch": 0.16361,
+      "grad_norm": 1.322708037059437,
+      "learning_rate": 0.003,
+      "loss": 3.9972,
+      "step": 16361
+    },
+    {
+      "epoch": 0.16362,
+      "grad_norm": 1.465739171631752,
+      "learning_rate": 0.003,
+      "loss": 4.0427,
+      "step": 16362
+    },
+    {
+      "epoch": 0.16363,
+      "grad_norm": 1.2289048539225673,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 16363
+    },
+    {
+      "epoch": 0.16364,
+      "grad_norm": 1.3255134052006838,
+      "learning_rate": 0.003,
+      "loss": 4.0327,
+      "step": 16364
+    },
+    {
+      "epoch": 0.16365,
+      "grad_norm": 1.631364182785758,
+      "learning_rate": 0.003,
+      "loss": 4.0325,
+      "step": 16365
+    },
+    {
+      "epoch": 0.16366,
+      "grad_norm": 1.0307348934901521,
+      "learning_rate": 0.003,
+      "loss": 4.0167,
+      "step": 16366
+    },
+    {
+      "epoch": 0.16367,
+      "grad_norm": 1.5072865545072853,
+      "learning_rate": 0.003,
+      "loss": 4.0016,
+      "step": 16367
+    },
+    {
+      "epoch": 0.16368,
+      "grad_norm": 1.0483240140762637,
+      "learning_rate": 0.003,
+      "loss": 4.0162,
+      "step": 16368
+    },
+    {
+      "epoch": 0.16369,
+      "grad_norm": 1.5864667762571492,
+      "learning_rate": 0.003,
+      "loss": 4.0296,
+      "step": 16369
+    },
+    {
+      "epoch": 0.1637,
+      "grad_norm": 0.9633118780727623,
+      "learning_rate": 0.003,
+      "loss": 4.0025,
+      "step": 16370
+    },
+    {
+      "epoch": 0.16371,
+      "grad_norm": 1.3838750786501524,
+      "learning_rate": 0.003,
+      "loss": 4.041,
+      "step": 16371
+    },
+    {
+      "epoch": 0.16372,
+      "grad_norm": 1.0879613892858553,
+      "learning_rate": 0.003,
+      "loss": 4.0194,
+      "step": 16372
+    },
+    {
+      "epoch": 0.16373,
+      "grad_norm": 1.4076474578927347,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 16373
+    },
+    {
+      "epoch": 0.16374,
+      "grad_norm": 1.2549560835076428,
+      "learning_rate": 0.003,
+      "loss": 4.0113,
+      "step": 16374
+    },
+    {
+      "epoch": 0.16375,
+      "grad_norm": 1.2218421563642234,
+      "learning_rate": 0.003,
+      "loss": 4.0241,
+      "step": 16375
+    },
+    {
+      "epoch": 0.16376,
+      "grad_norm": 1.1068188648679878,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 16376
+    },
+    {
+      "epoch": 0.16377,
+      "grad_norm": 1.356022831472547,
+      "learning_rate": 0.003,
+      "loss": 4.0238,
+      "step": 16377
+    },
+    {
+      "epoch": 0.16378,
+      "grad_norm": 1.2627628500161356,
+      "learning_rate": 0.003,
+      "loss": 3.9957,
+      "step": 16378
+    },
+    {
+      "epoch": 0.16379,
+      "grad_norm": 1.1543103781748223,
+      "learning_rate": 0.003,
+      "loss": 4.0059,
+      "step": 16379
+    },
+    {
+      "epoch": 0.1638,
+      "grad_norm": 1.3650629405227102,
+      "learning_rate": 0.003,
+      "loss": 4.016,
+      "step": 16380
+    },
+    {
+      "epoch": 0.16381,
+      "grad_norm": 1.0863105705930893,
+      "learning_rate": 0.003,
+      "loss": 4.0085,
+      "step": 16381
+    },
+    {
+      "epoch": 0.16382,
+      "grad_norm": 1.442162970226098,
+      "learning_rate": 0.003,
+      "loss": 4.0284,
+      "step": 16382
+    },
+    {
+      "epoch": 0.16383,
+      "grad_norm": 1.2105949431390675,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 16383
+    },
+    {
+      "epoch": 0.16384,
+      "grad_norm": 1.2445309684091068,
+      "learning_rate": 0.003,
+      "loss": 4.0174,
+      "step": 16384
+    },
+    {
+      "epoch": 0.16385,
+      "grad_norm": 1.2627835888797698,
+      "learning_rate": 0.003,
+      "loss": 4.0314,
+      "step": 16385
+    },
+    {
+      "epoch": 0.16386,
+      "grad_norm": 1.2739873244060556,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 16386
+    },
+    {
+      "epoch": 0.16387,
+      "grad_norm": 1.136346771273126,
+      "learning_rate": 0.003,
+      "loss": 4.0151,
+      "step": 16387
+    },
+    {
+      "epoch": 0.16388,
+      "grad_norm": 1.2313390681202687,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 16388
+    },
+    {
+      "epoch": 0.16389,
+      "grad_norm": 1.3446402546969176,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 16389
+    },
+    {
+      "epoch": 0.1639,
+      "grad_norm": 1.1748177586116015,
+      "learning_rate": 0.003,
+      "loss": 4.0321,
+      "step": 16390
+    },
+    {
+      "epoch": 0.16391,
+      "grad_norm": 1.4496101096216325,
+      "learning_rate": 0.003,
+      "loss": 4.0249,
+      "step": 16391
+    },
+    {
+      "epoch": 0.16392,
+      "grad_norm": 1.1392287821141556,
+      "learning_rate": 0.003,
+      "loss": 3.989,
+      "step": 16392
+    },
+    {
+      "epoch": 0.16393,
+      "grad_norm": 1.218511368003899,
+      "learning_rate": 0.003,
+      "loss": 3.9921,
+      "step": 16393
+    },
+    {
+      "epoch": 0.16394,
+      "grad_norm": 1.146180596348099,
+      "learning_rate": 0.003,
+      "loss": 4.0438,
+      "step": 16394
+    },
+    {
+      "epoch": 0.16395,
+      "grad_norm": 1.207602007972604,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 16395
+    },
+    {
+      "epoch": 0.16396,
+      "grad_norm": 1.249250791280474,
+      "learning_rate": 0.003,
+      "loss": 3.9953,
+      "step": 16396
+    },
+    {
+      "epoch": 0.16397,
+      "grad_norm": 1.2974792736158025,
+      "learning_rate": 0.003,
+      "loss": 4.0182,
+      "step": 16397
+    },
+    {
+      "epoch": 0.16398,
+      "grad_norm": 1.385400203784108,
+      "learning_rate": 0.003,
+      "loss": 4.0177,
+      "step": 16398
+    },
+    {
+      "epoch": 0.16399,
+      "grad_norm": 0.9583698373756244,
+      "learning_rate": 0.003,
+      "loss": 4.004,
+      "step": 16399
+    },
+    {
+      "epoch": 0.164,
+      "grad_norm": 1.401565056887489,
+      "learning_rate": 0.003,
+      "loss": 4.0333,
+      "step": 16400
+    },
+    {
+      "epoch": 0.16401,
+      "grad_norm": 1.1642142279096959,
+      "learning_rate": 0.003,
+      "loss": 4.0012,
+      "step": 16401
+    },
+    {
+      "epoch": 0.16402,
+      "grad_norm": 1.2183166639913017,
+      "learning_rate": 0.003,
+      "loss": 4.0473,
+      "step": 16402
+    },
+    {
+      "epoch": 0.16403,
+      "grad_norm": 1.2507916104231493,
+      "learning_rate": 0.003,
+      "loss": 4.0023,
+      "step": 16403
+    },
+    {
+      "epoch": 0.16404,
+      "grad_norm": 1.14410261257001,
+      "learning_rate": 0.003,
+      "loss": 4.0046,
+      "step": 16404
+    },
+    {
+      "epoch": 0.16405,
+      "grad_norm": 1.3882639592151966,
+      "learning_rate": 0.003,
+      "loss": 4.0126,
+      "step": 16405
+    },
+    {
+      "epoch": 0.16406,
+      "grad_norm": 1.2800368116147272,
+      "learning_rate": 0.003,
+      "loss": 4.0141,
+      "step": 16406
+    },
+    {
+      "epoch": 0.16407,
+      "grad_norm": 1.249683573714354,
+      "learning_rate": 0.003,
+      "loss": 4.038,
+      "step": 16407
+    },
+    {
+      "epoch": 0.16408,
+      "grad_norm": 1.1167375177906984,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 16408
+    },
+    {
+      "epoch": 0.16409,
+      "grad_norm": 1.263879094909315,
+      "learning_rate": 0.003,
+      "loss": 3.9855,
+      "step": 16409
+    },
+    {
+      "epoch": 0.1641,
+      "grad_norm": 1.1124381509251915,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 16410
+    },
+    {
+      "epoch": 0.16411,
+      "grad_norm": 1.3936589885324306,
+      "learning_rate": 0.003,
+      "loss": 4.0084,
+      "step": 16411
+    },
+    {
+      "epoch": 0.16412,
+      "grad_norm": 1.12451852583607,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 16412
+    },
+    {
+      "epoch": 0.16413,
+      "grad_norm": 1.460067412372667,
+      "learning_rate": 0.003,
+      "loss": 4.0267,
+      "step": 16413
+    },
+    {
+      "epoch": 0.16414,
+      "grad_norm": 0.9747630232628195,
+      "learning_rate": 0.003,
+      "loss": 3.987,
+      "step": 16414
+    },
+    {
+      "epoch": 0.16415,
+      "grad_norm": 1.410573158295569,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 16415
+    },
+    {
+      "epoch": 0.16416,
+      "grad_norm": 1.1376270299664892,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 16416
+    },
+    {
+      "epoch": 0.16417,
+      "grad_norm": 1.2495604579613826,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 16417
+    },
+    {
+      "epoch": 0.16418,
+      "grad_norm": 1.3823509772498197,
+      "learning_rate": 0.003,
+      "loss": 4.0265,
+      "step": 16418
+    },
+    {
+      "epoch": 0.16419,
+      "grad_norm": 1.2138146849977327,
+      "learning_rate": 0.003,
+      "loss": 4.0315,
+      "step": 16419
+    },
+    {
+      "epoch": 0.1642,
+      "grad_norm": 1.1861034753425292,
+      "learning_rate": 0.003,
+      "loss": 4.0557,
+      "step": 16420
+    },
+    {
+      "epoch": 0.16421,
+      "grad_norm": 1.5003316384806735,
+      "learning_rate": 0.003,
+      "loss": 4.0171,
+      "step": 16421
+    },
+    {
+      "epoch": 0.16422,
+      "grad_norm": 1.1063407146960826,
+      "learning_rate": 0.003,
+      "loss": 3.9828,
+      "step": 16422
+    },
+    {
+      "epoch": 0.16423,
+      "grad_norm": 1.7022747407394883,
+      "learning_rate": 0.003,
+      "loss": 4.0007,
+      "step": 16423
+    },
+    {
+      "epoch": 0.16424,
+      "grad_norm": 0.9854746270716073,
+      "learning_rate": 0.003,
+      "loss": 4.02,
+      "step": 16424
+    },
+    {
+      "epoch": 0.16425,
+      "grad_norm": 1.3461991942792693,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 16425
+    },
+    {
+      "epoch": 0.16426,
+      "grad_norm": 1.2611638827136702,
+      "learning_rate": 0.003,
+      "loss": 4.0454,
+      "step": 16426
+    },
+    {
+      "epoch": 0.16427,
+      "grad_norm": 1.2392649592127025,
+      "learning_rate": 0.003,
+      "loss": 4.0043,
+      "step": 16427
+    },
+    {
+      "epoch": 0.16428,
+      "grad_norm": 1.1600141364720165,
+      "learning_rate": 0.003,
+      "loss": 4.022,
+      "step": 16428
+    },
+    {
+      "epoch": 0.16429,
+      "grad_norm": 1.365743175043391,
+      "learning_rate": 0.003,
+      "loss": 4.021,
+      "step": 16429
+    },
+    {
+      "epoch": 0.1643,
+      "grad_norm": 1.2341725722983907,
+      "learning_rate": 0.003,
+      "loss": 4.0146,
+      "step": 16430
+    },
+    {
+      "epoch": 0.16431,
+      "grad_norm": 1.2664336922254367,
+      "learning_rate": 0.003,
+      "loss": 4.0238,
+      "step": 16431
+    },
+    {
+      "epoch": 0.16432,
+      "grad_norm": 1.162134504903983,
+      "learning_rate": 0.003,
+      "loss": 4.0328,
+      "step": 16432
+    },
+    {
+      "epoch": 0.16433,
+      "grad_norm": 1.1890031067087414,
+      "learning_rate": 0.003,
+      "loss": 4.0321,
+      "step": 16433
+    },
+    {
+      "epoch": 0.16434,
+      "grad_norm": 1.268343217918924,
+      "learning_rate": 0.003,
+      "loss": 4.0142,
+      "step": 16434
+    },
+    {
+      "epoch": 0.16435,
+      "grad_norm": 0.9735238227671814,
+      "learning_rate": 0.003,
+      "loss": 3.9865,
+      "step": 16435
+    },
+    {
+      "epoch": 0.16436,
+      "grad_norm": 1.2501414862931537,
+      "learning_rate": 0.003,
+      "loss": 4.0128,
+      "step": 16436
+    },
+    {
+      "epoch": 0.16437,
+      "grad_norm": 1.1033374229738238,
+      "learning_rate": 0.003,
+      "loss": 4.0302,
+      "step": 16437
+    },
+    {
+      "epoch": 0.16438,
+      "grad_norm": 1.2530212734335278,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 16438
+    },
+    {
+      "epoch": 0.16439,
+      "grad_norm": 1.1415596092894056,
+      "learning_rate": 0.003,
+      "loss": 4.0129,
+      "step": 16439
+    },
+    {
+      "epoch": 0.1644,
+      "grad_norm": 1.258427707531266,
+      "learning_rate": 0.003,
+      "loss": 4.002,
+      "step": 16440
+    },
+    {
+      "epoch": 0.16441,
+      "grad_norm": 1.3504692944034622,
+      "learning_rate": 0.003,
+      "loss": 4.0172,
+      "step": 16441
+    },
+    {
+      "epoch": 0.16442,
+      "grad_norm": 1.0471200830711485,
+      "learning_rate": 0.003,
+      "loss": 3.9828,
+      "step": 16442
+    },
+    {
+      "epoch": 0.16443,
+      "grad_norm": 1.3586376355519827,
+      "learning_rate": 0.003,
+      "loss": 4.0445,
+      "step": 16443
+    },
+    {
+      "epoch": 0.16444,
+      "grad_norm": 1.2291363527030847,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 16444
+    },
+    {
+      "epoch": 0.16445,
+      "grad_norm": 1.2462129480970447,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 16445
+    },
+    {
+      "epoch": 0.16446,
+      "grad_norm": 1.081352439140012,
+      "learning_rate": 0.003,
+      "loss": 3.9987,
+      "step": 16446
+    },
+    {
+      "epoch": 0.16447,
+      "grad_norm": 1.671540047008696,
+      "learning_rate": 0.003,
+      "loss": 4.0385,
+      "step": 16447
+    },
+    {
+      "epoch": 0.16448,
+      "grad_norm": 1.3643640246569684,
+      "learning_rate": 0.003,
+      "loss": 4.0361,
+      "step": 16448
+    },
+    {
+      "epoch": 0.16449,
+      "grad_norm": 1.3453266447923604,
+      "learning_rate": 0.003,
+      "loss": 4.0021,
+      "step": 16449
+    },
+    {
+      "epoch": 0.1645,
+      "grad_norm": 1.20426945020544,
+      "learning_rate": 0.003,
+      "loss": 4.0113,
+      "step": 16450
+    },
+    {
+      "epoch": 0.16451,
+      "grad_norm": 1.2960461000701875,
+      "learning_rate": 0.003,
+      "loss": 4.0196,
+      "step": 16451
+    },
+    {
+      "epoch": 0.16452,
+      "grad_norm": 1.1882929055492957,
+      "learning_rate": 0.003,
+      "loss": 3.9925,
+      "step": 16452
+    },
+    {
+      "epoch": 0.16453,
+      "grad_norm": 1.1786688126968292,
+      "learning_rate": 0.003,
+      "loss": 4.0286,
+      "step": 16453
+    },
+    {
+      "epoch": 0.16454,
+      "grad_norm": 1.3048838114337977,
+      "learning_rate": 0.003,
+      "loss": 4.0347,
+      "step": 16454
+    },
+    {
+      "epoch": 0.16455,
+      "grad_norm": 1.3505352827862118,
+      "learning_rate": 0.003,
+      "loss": 4.0356,
+      "step": 16455
+    },
+    {
+      "epoch": 0.16456,
+      "grad_norm": 1.2457370763871507,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 16456
+    },
+    {
+      "epoch": 0.16457,
+      "grad_norm": 1.1464300008417747,
+      "learning_rate": 0.003,
+      "loss": 4.034,
+      "step": 16457
+    },
+    {
+      "epoch": 0.16458,
+      "grad_norm": 1.268097477324259,
+      "learning_rate": 0.003,
+      "loss": 4.0012,
+      "step": 16458
+    },
+    {
+      "epoch": 0.16459,
+      "grad_norm": 1.4368587052408366,
+      "learning_rate": 0.003,
+      "loss": 4.0141,
+      "step": 16459
+    },
+    {
+      "epoch": 0.1646,
+      "grad_norm": 1.2233996992334433,
+      "learning_rate": 0.003,
+      "loss": 4.0215,
+      "step": 16460
+    },
+    {
+      "epoch": 0.16461,
+      "grad_norm": 1.1727679642417794,
+      "learning_rate": 0.003,
+      "loss": 3.9901,
+      "step": 16461
+    },
+    {
+      "epoch": 0.16462,
+      "grad_norm": 1.3165209833868021,
+      "learning_rate": 0.003,
+      "loss": 3.9994,
+      "step": 16462
+    },
+    {
+      "epoch": 0.16463,
+      "grad_norm": 1.2273269610329756,
+      "learning_rate": 0.003,
+      "loss": 4.0144,
+      "step": 16463
+    },
+    {
+      "epoch": 0.16464,
+      "grad_norm": 1.2464394092353464,
+      "learning_rate": 0.003,
+      "loss": 3.993,
+      "step": 16464
+    },
+    {
+      "epoch": 0.16465,
+      "grad_norm": 1.3187530477988323,
+      "learning_rate": 0.003,
+      "loss": 4.0258,
+      "step": 16465
+    },
+    {
+      "epoch": 0.16466,
+      "grad_norm": 1.1829192865232832,
+      "learning_rate": 0.003,
+      "loss": 3.9985,
+      "step": 16466
+    },
+    {
+      "epoch": 0.16467,
+      "grad_norm": 1.1799295434341366,
+      "learning_rate": 0.003,
+      "loss": 4.0141,
+      "step": 16467
+    },
+    {
+      "epoch": 0.16468,
+      "grad_norm": 1.1506766706819855,
+      "learning_rate": 0.003,
+      "loss": 4.0003,
+      "step": 16468
+    },
+    {
+      "epoch": 0.16469,
+      "grad_norm": 1.415062921477318,
+      "learning_rate": 0.003,
+      "loss": 4.0289,
+      "step": 16469
+    },
+    {
+      "epoch": 0.1647,
+      "grad_norm": 1.472580170523846,
+      "learning_rate": 0.003,
+      "loss": 4.0123,
+      "step": 16470
+    },
+    {
+      "epoch": 0.16471,
+      "grad_norm": 1.2214043721676051,
+      "learning_rate": 0.003,
+      "loss": 4.0043,
+      "step": 16471
+    },
+    {
+      "epoch": 0.16472,
+      "grad_norm": 1.2147205884895964,
+      "learning_rate": 0.003,
+      "loss": 4.0298,
+      "step": 16472
+    },
+    {
+      "epoch": 0.16473,
+      "grad_norm": 1.1776590050316018,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 16473
+    },
+    {
+      "epoch": 0.16474,
+      "grad_norm": 1.2389656898506585,
+      "learning_rate": 0.003,
+      "loss": 4.015,
+      "step": 16474
+    },
+    {
+      "epoch": 0.16475,
+      "grad_norm": 1.176666306433308,
+      "learning_rate": 0.003,
+      "loss": 4.0261,
+      "step": 16475
+    },
+    {
+      "epoch": 0.16476,
+      "grad_norm": 1.3147913418496244,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 16476
+    },
+    {
+      "epoch": 0.16477,
+      "grad_norm": 1.1587640003981066,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 16477
+    },
+    {
+      "epoch": 0.16478,
+      "grad_norm": 1.273223333688864,
+      "learning_rate": 0.003,
+      "loss": 4.0371,
+      "step": 16478
+    },
+    {
+      "epoch": 0.16479,
+      "grad_norm": 1.2291506382173074,
+      "learning_rate": 0.003,
+      "loss": 4.0104,
+      "step": 16479
+    },
+    {
+      "epoch": 0.1648,
+      "grad_norm": 1.2322478623329385,
+      "learning_rate": 0.003,
+      "loss": 3.9898,
+      "step": 16480
+    },
+    {
+      "epoch": 0.16481,
+      "grad_norm": 1.2174792441948241,
+      "learning_rate": 0.003,
+      "loss": 4.0091,
+      "step": 16481
+    },
+    {
+      "epoch": 0.16482,
+      "grad_norm": 1.4378399317818813,
+      "learning_rate": 0.003,
+      "loss": 4.0022,
+      "step": 16482
+    },
+    {
+      "epoch": 0.16483,
+      "grad_norm": 1.1371049837514948,
+      "learning_rate": 0.003,
+      "loss": 3.9882,
+      "step": 16483
+    },
+    {
+      "epoch": 0.16484,
+      "grad_norm": 1.3173930471537727,
+      "learning_rate": 0.003,
+      "loss": 3.9975,
+      "step": 16484
+    },
+    {
+      "epoch": 0.16485,
+      "grad_norm": 1.1046764475930426,
+      "learning_rate": 0.003,
+      "loss": 4.0163,
+      "step": 16485
+    },
+    {
+      "epoch": 0.16486,
+      "grad_norm": 1.239045577924919,
+      "learning_rate": 0.003,
+      "loss": 3.9888,
+      "step": 16486
+    },
+    {
+      "epoch": 0.16487,
+      "grad_norm": 1.4686884132531135,
+      "learning_rate": 0.003,
+      "loss": 4.0212,
+      "step": 16487
+    },
+    {
+      "epoch": 0.16488,
+      "grad_norm": 1.0800050156006988,
+      "learning_rate": 0.003,
+      "loss": 4.0346,
+      "step": 16488
+    },
+    {
+      "epoch": 0.16489,
+      "grad_norm": 1.4134323871973382,
+      "learning_rate": 0.003,
+      "loss": 4.0228,
+      "step": 16489
+    },
+    {
+      "epoch": 0.1649,
+      "grad_norm": 0.9682334441192832,
+      "learning_rate": 0.003,
+      "loss": 4.0113,
+      "step": 16490
+    },
+    {
+      "epoch": 0.16491,
+      "grad_norm": 1.4200881733091222,
+      "learning_rate": 0.003,
+      "loss": 3.9909,
+      "step": 16491
+    },
+    {
+      "epoch": 0.16492,
+      "grad_norm": 1.061585186787891,
+      "learning_rate": 0.003,
+      "loss": 3.995,
+      "step": 16492
+    },
+    {
+      "epoch": 0.16493,
+      "grad_norm": 1.7032178016253496,
+      "learning_rate": 0.003,
+      "loss": 4.0356,
+      "step": 16493
+    },
+    {
+      "epoch": 0.16494,
+      "grad_norm": 1.1359795011220917,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 16494
+    },
+    {
+      "epoch": 0.16495,
+      "grad_norm": 1.2483787020055124,
+      "learning_rate": 0.003,
+      "loss": 4.0442,
+      "step": 16495
+    },
+    {
+      "epoch": 0.16496,
+      "grad_norm": 1.2910680574674094,
+      "learning_rate": 0.003,
+      "loss": 4.007,
+      "step": 16496
+    },
+    {
+      "epoch": 0.16497,
+      "grad_norm": 1.1862278580892176,
+      "learning_rate": 0.003,
+      "loss": 3.9745,
+      "step": 16497
+    },
+    {
+      "epoch": 0.16498,
+      "grad_norm": 1.4112033344203716,
+      "learning_rate": 0.003,
+      "loss": 3.9688,
+      "step": 16498
+    },
+    {
+      "epoch": 0.16499,
+      "grad_norm": 1.1855255839885497,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 16499
+    },
+    {
+      "epoch": 0.165,
+      "grad_norm": 1.4129010225149548,
+      "learning_rate": 0.003,
+      "loss": 3.9995,
+      "step": 16500
+    },
+    {
+      "epoch": 0.16501,
+      "grad_norm": 0.9507245813429811,
+      "learning_rate": 0.003,
+      "loss": 4.026,
+      "step": 16501
+    },
+    {
+      "epoch": 0.16502,
+      "grad_norm": 1.3347143352886812,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 16502
+    },
+    {
+      "epoch": 0.16503,
+      "grad_norm": 1.0347509498235556,
+      "learning_rate": 0.003,
+      "loss": 4.0032,
+      "step": 16503
+    },
+    {
+      "epoch": 0.16504,
+      "grad_norm": 1.423740368524998,
+      "learning_rate": 0.003,
+      "loss": 4.0353,
+      "step": 16504
+    },
+    {
+      "epoch": 0.16505,
+      "grad_norm": 1.122722153433454,
+      "learning_rate": 0.003,
+      "loss": 4.0312,
+      "step": 16505
+    },
+    {
+      "epoch": 0.16506,
+      "grad_norm": 1.294244218597045,
+      "learning_rate": 0.003,
+      "loss": 4.0414,
+      "step": 16506
+    },
+    {
+      "epoch": 0.16507,
+      "grad_norm": 1.1443270649860016,
+      "learning_rate": 0.003,
+      "loss": 4.0238,
+      "step": 16507
+    },
+    {
+      "epoch": 0.16508,
+      "grad_norm": 1.442432469846176,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 16508
+    },
+    {
+      "epoch": 0.16509,
+      "grad_norm": 1.0582122500043307,
+      "learning_rate": 0.003,
+      "loss": 3.9988,
+      "step": 16509
+    },
+    {
+      "epoch": 0.1651,
+      "grad_norm": 1.6553330678007394,
+      "learning_rate": 0.003,
+      "loss": 4.0053,
+      "step": 16510
+    },
+    {
+      "epoch": 0.16511,
+      "grad_norm": 1.2460014109083422,
+      "learning_rate": 0.003,
+      "loss": 4.0254,
+      "step": 16511
+    },
+    {
+      "epoch": 0.16512,
+      "grad_norm": 1.4193887292079423,
+      "learning_rate": 0.003,
+      "loss": 4.0369,
+      "step": 16512
+    },
+    {
+      "epoch": 0.16513,
+      "grad_norm": 0.9212556927907253,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 16513
+    },
+    {
+      "epoch": 0.16514,
+      "grad_norm": 1.2699018818052847,
+      "learning_rate": 0.003,
+      "loss": 3.9974,
+      "step": 16514
+    },
+    {
+      "epoch": 0.16515,
+      "grad_norm": 1.106930909545978,
+      "learning_rate": 0.003,
+      "loss": 4.0162,
+      "step": 16515
+    },
+    {
+      "epoch": 0.16516,
+      "grad_norm": 1.2259637741423628,
+      "learning_rate": 0.003,
+      "loss": 4.0011,
+      "step": 16516
+    },
+    {
+      "epoch": 0.16517,
+      "grad_norm": 1.15824728988598,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 16517
+    },
+    {
+      "epoch": 0.16518,
+      "grad_norm": 1.3487176359210198,
+      "learning_rate": 0.003,
+      "loss": 4.0098,
+      "step": 16518
+    },
+    {
+      "epoch": 0.16519,
+      "grad_norm": 1.0376620933785712,
+      "learning_rate": 0.003,
+      "loss": 4.0275,
+      "step": 16519
+    },
+    {
+      "epoch": 0.1652,
+      "grad_norm": 1.3406151681112863,
+      "learning_rate": 0.003,
+      "loss": 3.9967,
+      "step": 16520
+    },
+    {
+      "epoch": 0.16521,
+      "grad_norm": 1.237189948735632,
+      "learning_rate": 0.003,
+      "loss": 4.0111,
+      "step": 16521
+    },
+    {
+      "epoch": 0.16522,
+      "grad_norm": 1.5870827299658874,
+      "learning_rate": 0.003,
+      "loss": 4.0065,
+      "step": 16522
+    },
+    {
+      "epoch": 0.16523,
+      "grad_norm": 0.996710035102378,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 16523
+    },
+    {
+      "epoch": 0.16524,
+      "grad_norm": 1.3138563369561216,
+      "learning_rate": 0.003,
+      "loss": 3.9973,
+      "step": 16524
+    },
+    {
+      "epoch": 0.16525,
+      "grad_norm": 0.9443266091429152,
+      "learning_rate": 0.003,
+      "loss": 3.9982,
+      "step": 16525
+    },
+    {
+      "epoch": 0.16526,
+      "grad_norm": 1.3288852608092152,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 16526
+    },
+    {
+      "epoch": 0.16527,
+      "grad_norm": 1.1161681754946187,
+      "learning_rate": 0.003,
+      "loss": 4.035,
+      "step": 16527
+    },
+    {
+      "epoch": 0.16528,
+      "grad_norm": 1.2676851233683877,
+      "learning_rate": 0.003,
+      "loss": 3.9891,
+      "step": 16528
+    },
+    {
+      "epoch": 0.16529,
+      "grad_norm": 1.5278489919144347,
+      "learning_rate": 0.003,
+      "loss": 4.0258,
+      "step": 16529
+    },
+    {
+      "epoch": 0.1653,
+      "grad_norm": 1.3333473641523625,
+      "learning_rate": 0.003,
+      "loss": 4.0157,
+      "step": 16530
+    },
+    {
+      "epoch": 0.16531,
+      "grad_norm": 1.0450767874846136,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 16531
+    },
+    {
+      "epoch": 0.16532,
+      "grad_norm": 1.276311673917285,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 16532
+    },
+    {
+      "epoch": 0.16533,
+      "grad_norm": 1.311758439221919,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 16533
+    },
+    {
+      "epoch": 0.16534,
+      "grad_norm": 1.251935222227861,
+      "learning_rate": 0.003,
+      "loss": 4.0289,
+      "step": 16534
+    },
+    {
+      "epoch": 0.16535,
+      "grad_norm": 1.4219610470844877,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 16535
+    },
+    {
+      "epoch": 0.16536,
+      "grad_norm": 1.098314666492021,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 16536
+    },
+    {
+      "epoch": 0.16537,
+      "grad_norm": 1.2524466159575347,
+      "learning_rate": 0.003,
+      "loss": 4.0323,
+      "step": 16537
+    },
+    {
+      "epoch": 0.16538,
+      "grad_norm": 1.1051257760069162,
+      "learning_rate": 0.003,
+      "loss": 4.0144,
+      "step": 16538
+    },
+    {
+      "epoch": 0.16539,
+      "grad_norm": 1.4507028107549453,
+      "learning_rate": 0.003,
+      "loss": 3.9961,
+      "step": 16539
+    },
+    {
+      "epoch": 0.1654,
+      "grad_norm": 1.2015000906469193,
+      "learning_rate": 0.003,
+      "loss": 4.0091,
+      "step": 16540
+    },
+    {
+      "epoch": 0.16541,
+      "grad_norm": 1.5193683138530925,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 16541
+    },
+    {
+      "epoch": 0.16542,
+      "grad_norm": 0.9145876723834636,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 16542
+    },
+    {
+      "epoch": 0.16543,
+      "grad_norm": 1.1502530819129524,
+      "learning_rate": 0.003,
+      "loss": 4.0081,
+      "step": 16543
+    },
+    {
+      "epoch": 0.16544,
+      "grad_norm": 1.3293026862371053,
+      "learning_rate": 0.003,
+      "loss": 3.9962,
+      "step": 16544
+    },
+    {
+      "epoch": 0.16545,
+      "grad_norm": 1.0563595407032713,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 16545
+    },
+    {
+      "epoch": 0.16546,
+      "grad_norm": 1.4948166835840087,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 16546
+    },
+    {
+      "epoch": 0.16547,
+      "grad_norm": 0.9426733011174305,
+      "learning_rate": 0.003,
+      "loss": 4.0153,
+      "step": 16547
+    },
+    {
+      "epoch": 0.16548,
+      "grad_norm": 1.3107429457937985,
+      "learning_rate": 0.003,
+      "loss": 4.003,
+      "step": 16548
+    },
+    {
+      "epoch": 0.16549,
+      "grad_norm": 1.3094077637021049,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 16549
+    },
+    {
+      "epoch": 0.1655,
+      "grad_norm": 1.073261489594188,
+      "learning_rate": 0.003,
+      "loss": 4.0043,
+      "step": 16550
+    },
+    {
+      "epoch": 0.16551,
+      "grad_norm": 1.354807864246254,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 16551
+    },
+    {
+      "epoch": 0.16552,
+      "grad_norm": 1.0112854922645667,
+      "learning_rate": 0.003,
+      "loss": 4.0336,
+      "step": 16552
+    },
+    {
+      "epoch": 0.16553,
+      "grad_norm": 1.59252432958257,
+      "learning_rate": 0.003,
+      "loss": 4.0377,
+      "step": 16553
+    },
+    {
+      "epoch": 0.16554,
+      "grad_norm": 1.2108947227394369,
+      "learning_rate": 0.003,
+      "loss": 4.0121,
+      "step": 16554
+    },
+    {
+      "epoch": 0.16555,
+      "grad_norm": 1.1690231164195901,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 16555
+    },
+    {
+      "epoch": 0.16556,
+      "grad_norm": 1.4264690518283973,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 16556
+    },
+    {
+      "epoch": 0.16557,
+      "grad_norm": 1.124470612180664,
+      "learning_rate": 0.003,
+      "loss": 4.02,
+      "step": 16557
+    },
+    {
+      "epoch": 0.16558,
+      "grad_norm": 1.4096201695258213,
+      "learning_rate": 0.003,
+      "loss": 4.0188,
+      "step": 16558
+    },
+    {
+      "epoch": 0.16559,
+      "grad_norm": 1.1374025708558277,
+      "learning_rate": 0.003,
+      "loss": 4.0359,
+      "step": 16559
+    },
+    {
+      "epoch": 0.1656,
+      "grad_norm": 1.4429274806112555,
+      "learning_rate": 0.003,
+      "loss": 4.0315,
+      "step": 16560
+    },
+    {
+      "epoch": 0.16561,
+      "grad_norm": 1.3442276249320984,
+      "learning_rate": 0.003,
+      "loss": 4.0444,
+      "step": 16561
+    },
+    {
+      "epoch": 0.16562,
+      "grad_norm": 1.480263293504643,
+      "learning_rate": 0.003,
+      "loss": 4.0363,
+      "step": 16562
+    },
+    {
+      "epoch": 0.16563,
+      "grad_norm": 1.1138476514363325,
+      "learning_rate": 0.003,
+      "loss": 4.0154,
+      "step": 16563
+    },
+    {
+      "epoch": 0.16564,
+      "grad_norm": 1.3431345573500961,
+      "learning_rate": 0.003,
+      "loss": 3.9858,
+      "step": 16564
+    },
+    {
+      "epoch": 0.16565,
+      "grad_norm": 1.4352240332625645,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 16565
+    },
+    {
+      "epoch": 0.16566,
+      "grad_norm": 0.9837148136132222,
+      "learning_rate": 0.003,
+      "loss": 4.0405,
+      "step": 16566
+    },
+    {
+      "epoch": 0.16567,
+      "grad_norm": 1.299108251594754,
+      "learning_rate": 0.003,
+      "loss": 3.9997,
+      "step": 16567
+    },
+    {
+      "epoch": 0.16568,
+      "grad_norm": 1.1682095581834722,
+      "learning_rate": 0.003,
+      "loss": 3.9896,
+      "step": 16568
+    },
+    {
+      "epoch": 0.16569,
+      "grad_norm": 1.1768012839694058,
+      "learning_rate": 0.003,
+      "loss": 4.0112,
+      "step": 16569
+    },
+    {
+      "epoch": 0.1657,
+      "grad_norm": 1.1031963756185752,
+      "learning_rate": 0.003,
+      "loss": 4.0064,
+      "step": 16570
+    },
+    {
+      "epoch": 0.16571,
+      "grad_norm": 1.3860222274258607,
+      "learning_rate": 0.003,
+      "loss": 3.997,
+      "step": 16571
+    },
+    {
+      "epoch": 0.16572,
+      "grad_norm": 1.3969337609331909,
+      "learning_rate": 0.003,
+      "loss": 4.0295,
+      "step": 16572
+    },
+    {
+      "epoch": 0.16573,
+      "grad_norm": 1.3063052614852653,
+      "learning_rate": 0.003,
+      "loss": 4.0237,
+      "step": 16573
+    },
+    {
+      "epoch": 0.16574,
+      "grad_norm": 1.0464741451960757,
+      "learning_rate": 0.003,
+      "loss": 3.9865,
+      "step": 16574
+    },
+    {
+      "epoch": 0.16575,
+      "grad_norm": 1.337615973734632,
+      "learning_rate": 0.003,
+      "loss": 4.0437,
+      "step": 16575
+    },
+    {
+      "epoch": 0.16576,
+      "grad_norm": 1.1618907603870425,
+      "learning_rate": 0.003,
+      "loss": 4.0144,
+      "step": 16576
+    },
+    {
+      "epoch": 0.16577,
+      "grad_norm": 1.3200946599609433,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 16577
+    },
+    {
+      "epoch": 0.16578,
+      "grad_norm": 1.0433429753006371,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 16578
+    },
+    {
+      "epoch": 0.16579,
+      "grad_norm": 1.662063329619664,
+      "learning_rate": 0.003,
+      "loss": 4.0042,
+      "step": 16579
+    },
+    {
+      "epoch": 0.1658,
+      "grad_norm": 0.755601682306504,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 16580
+    },
+    {
+      "epoch": 0.16581,
+      "grad_norm": 1.0226406775980381,
+      "learning_rate": 0.003,
+      "loss": 3.9955,
+      "step": 16581
+    },
+    {
+      "epoch": 0.16582,
+      "grad_norm": 1.3870489878903887,
+      "learning_rate": 0.003,
+      "loss": 4.0109,
+      "step": 16582
+    },
+    {
+      "epoch": 0.16583,
+      "grad_norm": 1.2087741859482437,
+      "learning_rate": 0.003,
+      "loss": 4.0198,
+      "step": 16583
+    },
+    {
+      "epoch": 0.16584,
+      "grad_norm": 1.2152029624363838,
+      "learning_rate": 0.003,
+      "loss": 4.0129,
+      "step": 16584
+    },
+    {
+      "epoch": 0.16585,
+      "grad_norm": 1.3252901206130747,
+      "learning_rate": 0.003,
+      "loss": 3.9887,
+      "step": 16585
+    },
+    {
+      "epoch": 0.16586,
+      "grad_norm": 1.1542687790472583,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 16586
+    },
+    {
+      "epoch": 0.16587,
+      "grad_norm": 1.3388694163458916,
+      "learning_rate": 0.003,
+      "loss": 4.0272,
+      "step": 16587
+    },
+    {
+      "epoch": 0.16588,
+      "grad_norm": 1.3452678444742063,
+      "learning_rate": 0.003,
+      "loss": 3.9955,
+      "step": 16588
+    },
+    {
+      "epoch": 0.16589,
+      "grad_norm": 1.1417025425422698,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 16589
+    },
+    {
+      "epoch": 0.1659,
+      "grad_norm": 1.1863637880560758,
+      "learning_rate": 0.003,
+      "loss": 3.9882,
+      "step": 16590
+    },
+    {
+      "epoch": 0.16591,
+      "grad_norm": 1.4649363325974047,
+      "learning_rate": 0.003,
+      "loss": 4.0512,
+      "step": 16591
+    },
+    {
+      "epoch": 0.16592,
+      "grad_norm": 1.1515827924904152,
+      "learning_rate": 0.003,
+      "loss": 4.0184,
+      "step": 16592
+    },
+    {
+      "epoch": 0.16593,
+      "grad_norm": 1.2388456396257521,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 16593
+    },
+    {
+      "epoch": 0.16594,
+      "grad_norm": 1.232311058547632,
+      "learning_rate": 0.003,
+      "loss": 3.9965,
+      "step": 16594
+    },
+    {
+      "epoch": 0.16595,
+      "grad_norm": 1.0999208653989847,
+      "learning_rate": 0.003,
+      "loss": 4.0331,
+      "step": 16595
+    },
+    {
+      "epoch": 0.16596,
+      "grad_norm": 1.255290749615242,
+      "learning_rate": 0.003,
+      "loss": 4.0091,
+      "step": 16596
+    },
+    {
+      "epoch": 0.16597,
+      "grad_norm": 1.2889959301516716,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 16597
+    },
+    {
+      "epoch": 0.16598,
+      "grad_norm": 1.3703577736789996,
+      "learning_rate": 0.003,
+      "loss": 3.9769,
+      "step": 16598
+    },
+    {
+      "epoch": 0.16599,
+      "grad_norm": 1.2711906709860699,
+      "learning_rate": 0.003,
+      "loss": 4.0361,
+      "step": 16599
+    },
+    {
+      "epoch": 0.166,
+      "grad_norm": 1.2141273117003566,
+      "learning_rate": 0.003,
+      "loss": 3.9746,
+      "step": 16600
+    },
+    {
+      "epoch": 0.16601,
+      "grad_norm": 1.256337088331127,
+      "learning_rate": 0.003,
+      "loss": 4.0426,
+      "step": 16601
+    },
+    {
+      "epoch": 0.16602,
+      "grad_norm": 1.2603912995095456,
+      "learning_rate": 0.003,
+      "loss": 3.9973,
+      "step": 16602
+    },
+    {
+      "epoch": 0.16603,
+      "grad_norm": 1.2456616186407852,
+      "learning_rate": 0.003,
+      "loss": 4.039,
+      "step": 16603
+    },
+    {
+      "epoch": 0.16604,
+      "grad_norm": 1.415050378787449,
+      "learning_rate": 0.003,
+      "loss": 4.0073,
+      "step": 16604
+    },
+    {
+      "epoch": 0.16605,
+      "grad_norm": 1.2547809965459051,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 16605
+    },
+    {
+      "epoch": 0.16606,
+      "grad_norm": 1.3211577802082288,
+      "learning_rate": 0.003,
+      "loss": 4.0146,
+      "step": 16606
+    },
+    {
+      "epoch": 0.16607,
+      "grad_norm": 1.3281001478920962,
+      "learning_rate": 0.003,
+      "loss": 4.0008,
+      "step": 16607
+    },
+    {
+      "epoch": 0.16608,
+      "grad_norm": 1.0475477901467427,
+      "learning_rate": 0.003,
+      "loss": 4.006,
+      "step": 16608
+    },
+    {
+      "epoch": 0.16609,
+      "grad_norm": 1.6382789949343959,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 16609
+    },
+    {
+      "epoch": 0.1661,
+      "grad_norm": 0.994687220833452,
+      "learning_rate": 0.003,
+      "loss": 3.9787,
+      "step": 16610
+    },
+    {
+      "epoch": 0.16611,
+      "grad_norm": 1.5715788227614695,
+      "learning_rate": 0.003,
+      "loss": 4.0337,
+      "step": 16611
+    },
+    {
+      "epoch": 0.16612,
+      "grad_norm": 1.0605613837990646,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 16612
+    },
+    {
+      "epoch": 0.16613,
+      "grad_norm": 1.2526526695670748,
+      "learning_rate": 0.003,
+      "loss": 3.9974,
+      "step": 16613
+    },
+    {
+      "epoch": 0.16614,
+      "grad_norm": 1.4338328500378312,
+      "learning_rate": 0.003,
+      "loss": 4.0372,
+      "step": 16614
+    },
+    {
+      "epoch": 0.16615,
+      "grad_norm": 1.0087894789702216,
+      "learning_rate": 0.003,
+      "loss": 3.9763,
+      "step": 16615
+    },
+    {
+      "epoch": 0.16616,
+      "grad_norm": 1.3697634543246178,
+      "learning_rate": 0.003,
+      "loss": 4.0662,
+      "step": 16616
+    },
+    {
+      "epoch": 0.16617,
+      "grad_norm": 1.017535543821651,
+      "learning_rate": 0.003,
+      "loss": 3.9974,
+      "step": 16617
+    },
+    {
+      "epoch": 0.16618,
+      "grad_norm": 1.3581523497537333,
+      "learning_rate": 0.003,
+      "loss": 4.0224,
+      "step": 16618
+    },
+    {
+      "epoch": 0.16619,
+      "grad_norm": 1.1592790899911563,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 16619
+    },
+    {
+      "epoch": 0.1662,
+      "grad_norm": 1.3154600386381228,
+      "learning_rate": 0.003,
+      "loss": 4.0196,
+      "step": 16620
+    },
+    {
+      "epoch": 0.16621,
+      "grad_norm": 1.4131419506857579,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 16621
+    },
+    {
+      "epoch": 0.16622,
+      "grad_norm": 1.120139961517329,
+      "learning_rate": 0.003,
+      "loss": 3.978,
+      "step": 16622
+    },
+    {
+      "epoch": 0.16623,
+      "grad_norm": 1.299129512003256,
+      "learning_rate": 0.003,
+      "loss": 4.0466,
+      "step": 16623
+    },
+    {
+      "epoch": 0.16624,
+      "grad_norm": 1.0853732855861533,
+      "learning_rate": 0.003,
+      "loss": 4.0284,
+      "step": 16624
+    },
+    {
+      "epoch": 0.16625,
+      "grad_norm": 1.2353134895344438,
+      "learning_rate": 0.003,
+      "loss": 4.0213,
+      "step": 16625
+    },
+    {
+      "epoch": 0.16626,
+      "grad_norm": 1.0471620820993004,
+      "learning_rate": 0.003,
+      "loss": 4.0356,
+      "step": 16626
+    },
+    {
+      "epoch": 0.16627,
+      "grad_norm": 1.5102483519391996,
+      "learning_rate": 0.003,
+      "loss": 4.0319,
+      "step": 16627
+    },
+    {
+      "epoch": 0.16628,
+      "grad_norm": 1.3309274545091607,
+      "learning_rate": 0.003,
+      "loss": 4.0314,
+      "step": 16628
+    },
+    {
+      "epoch": 0.16629,
+      "grad_norm": 1.3090873273971353,
+      "learning_rate": 0.003,
+      "loss": 4.055,
+      "step": 16629
+    },
+    {
+      "epoch": 0.1663,
+      "grad_norm": 1.3004116327526425,
+      "learning_rate": 0.003,
+      "loss": 4.0007,
+      "step": 16630
+    },
+    {
+      "epoch": 0.16631,
+      "grad_norm": 1.0823361796499542,
+      "learning_rate": 0.003,
+      "loss": 4.0289,
+      "step": 16631
+    },
+    {
+      "epoch": 0.16632,
+      "grad_norm": 1.2930715193846416,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 16632
+    },
+    {
+      "epoch": 0.16633,
+      "grad_norm": 1.424491690435542,
+      "learning_rate": 0.003,
+      "loss": 4.0314,
+      "step": 16633
+    },
+    {
+      "epoch": 0.16634,
+      "grad_norm": 1.3834142236076787,
+      "learning_rate": 0.003,
+      "loss": 4.0244,
+      "step": 16634
+    },
+    {
+      "epoch": 0.16635,
+      "grad_norm": 1.1052937514110142,
+      "learning_rate": 0.003,
+      "loss": 3.9986,
+      "step": 16635
+    },
+    {
+      "epoch": 0.16636,
+      "grad_norm": 1.2612133046875074,
+      "learning_rate": 0.003,
+      "loss": 3.9637,
+      "step": 16636
+    },
+    {
+      "epoch": 0.16637,
+      "grad_norm": 1.0980924188880004,
+      "learning_rate": 0.003,
+      "loss": 3.9734,
+      "step": 16637
+    },
+    {
+      "epoch": 0.16638,
+      "grad_norm": 1.2526679697383083,
+      "learning_rate": 0.003,
+      "loss": 4.0109,
+      "step": 16638
+    },
+    {
+      "epoch": 0.16639,
+      "grad_norm": 1.452666129554067,
+      "learning_rate": 0.003,
+      "loss": 4.0321,
+      "step": 16639
+    },
+    {
+      "epoch": 0.1664,
+      "grad_norm": 1.2623830226090003,
+      "learning_rate": 0.003,
+      "loss": 4.0387,
+      "step": 16640
+    },
+    {
+      "epoch": 0.16641,
+      "grad_norm": 1.279212006893314,
+      "learning_rate": 0.003,
+      "loss": 4.039,
+      "step": 16641
+    },
+    {
+      "epoch": 0.16642,
+      "grad_norm": 1.247644798505221,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 16642
+    },
+    {
+      "epoch": 0.16643,
+      "grad_norm": 1.3550614752198638,
+      "learning_rate": 0.003,
+      "loss": 3.9942,
+      "step": 16643
+    },
+    {
+      "epoch": 0.16644,
+      "grad_norm": 1.1944425316800222,
+      "learning_rate": 0.003,
+      "loss": 4.0312,
+      "step": 16644
+    },
+    {
+      "epoch": 0.16645,
+      "grad_norm": 1.3047621724565892,
+      "learning_rate": 0.003,
+      "loss": 4.0003,
+      "step": 16645
+    },
+    {
+      "epoch": 0.16646,
+      "grad_norm": 1.1236723102236015,
+      "learning_rate": 0.003,
+      "loss": 4.0011,
+      "step": 16646
+    },
+    {
+      "epoch": 0.16647,
+      "grad_norm": 1.3102114573804153,
+      "learning_rate": 0.003,
+      "loss": 3.9985,
+      "step": 16647
+    },
+    {
+      "epoch": 0.16648,
+      "grad_norm": 1.3853012906961333,
+      "learning_rate": 0.003,
+      "loss": 3.9813,
+      "step": 16648
+    },
+    {
+      "epoch": 0.16649,
+      "grad_norm": 1.204186130236693,
+      "learning_rate": 0.003,
+      "loss": 4.0361,
+      "step": 16649
+    },
+    {
+      "epoch": 0.1665,
+      "grad_norm": 1.2667647582380555,
+      "learning_rate": 0.003,
+      "loss": 4.0218,
+      "step": 16650
+    },
+    {
+      "epoch": 0.16651,
+      "grad_norm": 1.1257390464916401,
+      "learning_rate": 0.003,
+      "loss": 3.9836,
+      "step": 16651
+    },
+    {
+      "epoch": 0.16652,
+      "grad_norm": 1.186721837147446,
+      "learning_rate": 0.003,
+      "loss": 4.0204,
+      "step": 16652
+    },
+    {
+      "epoch": 0.16653,
+      "grad_norm": 1.6462828654146497,
+      "learning_rate": 0.003,
+      "loss": 4.0154,
+      "step": 16653
+    },
+    {
+      "epoch": 0.16654,
+      "grad_norm": 1.0411480294282478,
+      "learning_rate": 0.003,
+      "loss": 4.0112,
+      "step": 16654
+    },
+    {
+      "epoch": 0.16655,
+      "grad_norm": 1.4339898853354354,
+      "learning_rate": 0.003,
+      "loss": 3.9981,
+      "step": 16655
+    },
+    {
+      "epoch": 0.16656,
+      "grad_norm": 1.1739844303369085,
+      "learning_rate": 0.003,
+      "loss": 3.9978,
+      "step": 16656
+    },
+    {
+      "epoch": 0.16657,
+      "grad_norm": 1.317060669414251,
+      "learning_rate": 0.003,
+      "loss": 4.0162,
+      "step": 16657
+    },
+    {
+      "epoch": 0.16658,
+      "grad_norm": 1.126918738365564,
+      "learning_rate": 0.003,
+      "loss": 4.0065,
+      "step": 16658
+    },
+    {
+      "epoch": 0.16659,
+      "grad_norm": 1.3895895177863988,
+      "learning_rate": 0.003,
+      "loss": 4.0367,
+      "step": 16659
+    },
+    {
+      "epoch": 0.1666,
+      "grad_norm": 1.096387786516118,
+      "learning_rate": 0.003,
+      "loss": 4.0089,
+      "step": 16660
+    },
+    {
+      "epoch": 0.16661,
+      "grad_norm": 1.1898746539870575,
+      "learning_rate": 0.003,
+      "loss": 3.9977,
+      "step": 16661
+    },
+    {
+      "epoch": 0.16662,
+      "grad_norm": 1.2605716405733234,
+      "learning_rate": 0.003,
+      "loss": 4.0111,
+      "step": 16662
+    },
+    {
+      "epoch": 0.16663,
+      "grad_norm": 1.462237163983018,
+      "learning_rate": 0.003,
+      "loss": 4.0113,
+      "step": 16663
+    },
+    {
+      "epoch": 0.16664,
+      "grad_norm": 1.3892155212314823,
+      "learning_rate": 0.003,
+      "loss": 4.0296,
+      "step": 16664
+    },
+    {
+      "epoch": 0.16665,
+      "grad_norm": 1.340379892839075,
+      "learning_rate": 0.003,
+      "loss": 4.0413,
+      "step": 16665
+    },
+    {
+      "epoch": 0.16666,
+      "grad_norm": 1.1584760250723478,
+      "learning_rate": 0.003,
+      "loss": 4.0021,
+      "step": 16666
+    },
+    {
+      "epoch": 0.16667,
+      "grad_norm": 1.1991756211519113,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 16667
+    },
+    {
+      "epoch": 0.16668,
+      "grad_norm": 1.3689035096811366,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 16668
+    },
+    {
+      "epoch": 0.16669,
+      "grad_norm": 1.1232920358044445,
+      "learning_rate": 0.003,
+      "loss": 4.0324,
+      "step": 16669
+    },
+    {
+      "epoch": 0.1667,
+      "grad_norm": 1.281615109502724,
+      "learning_rate": 0.003,
+      "loss": 4.004,
+      "step": 16670
+    },
+    {
+      "epoch": 0.16671,
+      "grad_norm": 1.1907936743828131,
+      "learning_rate": 0.003,
+      "loss": 4.0024,
+      "step": 16671
+    },
+    {
+      "epoch": 0.16672,
+      "grad_norm": 1.4193075434273048,
+      "learning_rate": 0.003,
+      "loss": 4.0121,
+      "step": 16672
+    },
+    {
+      "epoch": 0.16673,
+      "grad_norm": 1.0848940734064485,
+      "learning_rate": 0.003,
+      "loss": 3.9763,
+      "step": 16673
+    },
+    {
+      "epoch": 0.16674,
+      "grad_norm": 1.5030624748585042,
+      "learning_rate": 0.003,
+      "loss": 4.0105,
+      "step": 16674
+    },
+    {
+      "epoch": 0.16675,
+      "grad_norm": 1.0582258078957265,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 16675
+    },
+    {
+      "epoch": 0.16676,
+      "grad_norm": 1.4933079963776932,
+      "learning_rate": 0.003,
+      "loss": 3.9985,
+      "step": 16676
+    },
+    {
+      "epoch": 0.16677,
+      "grad_norm": 1.0339320217688897,
+      "learning_rate": 0.003,
+      "loss": 4.0041,
+      "step": 16677
+    },
+    {
+      "epoch": 0.16678,
+      "grad_norm": 1.2165205219680335,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 16678
+    },
+    {
+      "epoch": 0.16679,
+      "grad_norm": 1.2456381676389232,
+      "learning_rate": 0.003,
+      "loss": 4.026,
+      "step": 16679
+    },
+    {
+      "epoch": 0.1668,
+      "grad_norm": 1.1630329563160737,
+      "learning_rate": 0.003,
+      "loss": 4.0265,
+      "step": 16680
+    },
+    {
+      "epoch": 0.16681,
+      "grad_norm": 1.3020999973173273,
+      "learning_rate": 0.003,
+      "loss": 4.0013,
+      "step": 16681
+    },
+    {
+      "epoch": 0.16682,
+      "grad_norm": 1.0243414220786775,
+      "learning_rate": 0.003,
+      "loss": 3.9819,
+      "step": 16682
+    },
+    {
+      "epoch": 0.16683,
+      "grad_norm": 1.302135134365088,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 16683
+    },
+    {
+      "epoch": 0.16684,
+      "grad_norm": 1.1169560404571857,
+      "learning_rate": 0.003,
+      "loss": 3.9996,
+      "step": 16684
+    },
+    {
+      "epoch": 0.16685,
+      "grad_norm": 1.3256062969865654,
+      "learning_rate": 0.003,
+      "loss": 4.0269,
+      "step": 16685
+    },
+    {
+      "epoch": 0.16686,
+      "grad_norm": 1.4763477134276877,
+      "learning_rate": 0.003,
+      "loss": 4.0082,
+      "step": 16686
+    },
+    {
+      "epoch": 0.16687,
+      "grad_norm": 1.1346500313299308,
+      "learning_rate": 0.003,
+      "loss": 4.0308,
+      "step": 16687
+    },
+    {
+      "epoch": 0.16688,
+      "grad_norm": 1.3655555799191739,
+      "learning_rate": 0.003,
+      "loss": 4.0198,
+      "step": 16688
+    },
+    {
+      "epoch": 0.16689,
+      "grad_norm": 1.101426064051465,
+      "learning_rate": 0.003,
+      "loss": 3.9917,
+      "step": 16689
+    },
+    {
+      "epoch": 0.1669,
+      "grad_norm": 1.3874465778661247,
+      "learning_rate": 0.003,
+      "loss": 4.0001,
+      "step": 16690
+    },
+    {
+      "epoch": 0.16691,
+      "grad_norm": 1.0666013667829752,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 16691
+    },
+    {
+      "epoch": 0.16692,
+      "grad_norm": 1.294662136639945,
+      "learning_rate": 0.003,
+      "loss": 4.0061,
+      "step": 16692
+    },
+    {
+      "epoch": 0.16693,
+      "grad_norm": 1.1594748536272677,
+      "learning_rate": 0.003,
+      "loss": 3.9898,
+      "step": 16693
+    },
+    {
+      "epoch": 0.16694,
+      "grad_norm": 1.474360847049748,
+      "learning_rate": 0.003,
+      "loss": 4.0254,
+      "step": 16694
+    },
+    {
+      "epoch": 0.16695,
+      "grad_norm": 1.11696559619686,
+      "learning_rate": 0.003,
+      "loss": 4.0287,
+      "step": 16695
+    },
+    {
+      "epoch": 0.16696,
+      "grad_norm": 1.2671759382966818,
+      "learning_rate": 0.003,
+      "loss": 3.9962,
+      "step": 16696
+    },
+    {
+      "epoch": 0.16697,
+      "grad_norm": 1.1882774659173905,
+      "learning_rate": 0.003,
+      "loss": 4.0089,
+      "step": 16697
+    },
+    {
+      "epoch": 0.16698,
+      "grad_norm": 1.326883885143455,
+      "learning_rate": 0.003,
+      "loss": 4.0174,
+      "step": 16698
+    },
+    {
+      "epoch": 0.16699,
+      "grad_norm": 1.0980149148747356,
+      "learning_rate": 0.003,
+      "loss": 3.9925,
+      "step": 16699
+    },
+    {
+      "epoch": 0.167,
+      "grad_norm": 1.4241149247181872,
+      "learning_rate": 0.003,
+      "loss": 4.0146,
+      "step": 16700
+    },
+    {
+      "epoch": 0.16701,
+      "grad_norm": 1.0844540501513107,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 16701
+    },
+    {
+      "epoch": 0.16702,
+      "grad_norm": 1.5909286938176141,
+      "learning_rate": 0.003,
+      "loss": 3.9909,
+      "step": 16702
+    },
+    {
+      "epoch": 0.16703,
+      "grad_norm": 1.1187009224227151,
+      "learning_rate": 0.003,
+      "loss": 3.98,
+      "step": 16703
+    },
+    {
+      "epoch": 0.16704,
+      "grad_norm": 1.3361911693628177,
+      "learning_rate": 0.003,
+      "loss": 4.046,
+      "step": 16704
+    },
+    {
+      "epoch": 0.16705,
+      "grad_norm": 1.1892842822492835,
+      "learning_rate": 0.003,
+      "loss": 4.0072,
+      "step": 16705
+    },
+    {
+      "epoch": 0.16706,
+      "grad_norm": 1.3129804202129807,
+      "learning_rate": 0.003,
+      "loss": 4.005,
+      "step": 16706
+    },
+    {
+      "epoch": 0.16707,
+      "grad_norm": 1.2040177966034948,
+      "learning_rate": 0.003,
+      "loss": 4.0082,
+      "step": 16707
+    },
+    {
+      "epoch": 0.16708,
+      "grad_norm": 1.3184689484922762,
+      "learning_rate": 0.003,
+      "loss": 4.0005,
+      "step": 16708
+    },
+    {
+      "epoch": 0.16709,
+      "grad_norm": 1.1710476876758964,
+      "learning_rate": 0.003,
+      "loss": 4.0287,
+      "step": 16709
+    },
+    {
+      "epoch": 0.1671,
+      "grad_norm": 1.482230330878662,
+      "learning_rate": 0.003,
+      "loss": 4.0027,
+      "step": 16710
+    },
+    {
+      "epoch": 0.16711,
+      "grad_norm": 1.2272387432789482,
+      "learning_rate": 0.003,
+      "loss": 3.9889,
+      "step": 16711
+    },
+    {
+      "epoch": 0.16712,
+      "grad_norm": 1.0283774893512463,
+      "learning_rate": 0.003,
+      "loss": 3.9649,
+      "step": 16712
+    },
+    {
+      "epoch": 0.16713,
+      "grad_norm": 1.2397209300256942,
+      "learning_rate": 0.003,
+      "loss": 4.0039,
+      "step": 16713
+    },
+    {
+      "epoch": 0.16714,
+      "grad_norm": 1.1778879363882633,
+      "learning_rate": 0.003,
+      "loss": 4.0267,
+      "step": 16714
+    },
+    {
+      "epoch": 0.16715,
+      "grad_norm": 1.4236584642291445,
+      "learning_rate": 0.003,
+      "loss": 4.0245,
+      "step": 16715
+    },
+    {
+      "epoch": 0.16716,
+      "grad_norm": 1.2435713003061704,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 16716
+    },
+    {
+      "epoch": 0.16717,
+      "grad_norm": 1.4285517568570933,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 16717
+    },
+    {
+      "epoch": 0.16718,
+      "grad_norm": 1.165974077606312,
+      "learning_rate": 0.003,
+      "loss": 4.0033,
+      "step": 16718
+    },
+    {
+      "epoch": 0.16719,
+      "grad_norm": 1.2330315988108786,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 16719
+    },
+    {
+      "epoch": 0.1672,
+      "grad_norm": 1.1376708053144504,
+      "learning_rate": 0.003,
+      "loss": 4.0052,
+      "step": 16720
+    },
+    {
+      "epoch": 0.16721,
+      "grad_norm": 1.4924178352557733,
+      "learning_rate": 0.003,
+      "loss": 4.0169,
+      "step": 16721
+    },
+    {
+      "epoch": 0.16722,
+      "grad_norm": 1.1736703334094785,
+      "learning_rate": 0.003,
+      "loss": 3.993,
+      "step": 16722
+    },
+    {
+      "epoch": 0.16723,
+      "grad_norm": 1.4078887935894222,
+      "learning_rate": 0.003,
+      "loss": 4.0022,
+      "step": 16723
+    },
+    {
+      "epoch": 0.16724,
+      "grad_norm": 1.0143058288575488,
+      "learning_rate": 0.003,
+      "loss": 4.0434,
+      "step": 16724
+    },
+    {
+      "epoch": 0.16725,
+      "grad_norm": 1.5416517889434647,
+      "learning_rate": 0.003,
+      "loss": 4.049,
+      "step": 16725
+    },
+    {
+      "epoch": 0.16726,
+      "grad_norm": 1.277683984553053,
+      "learning_rate": 0.003,
+      "loss": 3.9924,
+      "step": 16726
+    },
+    {
+      "epoch": 0.16727,
+      "grad_norm": 1.2943184716801348,
+      "learning_rate": 0.003,
+      "loss": 4.0033,
+      "step": 16727
+    },
+    {
+      "epoch": 0.16728,
+      "grad_norm": 1.295832553200161,
+      "learning_rate": 0.003,
+      "loss": 4.0232,
+      "step": 16728
+    },
+    {
+      "epoch": 0.16729,
+      "grad_norm": 1.0854698151175544,
+      "learning_rate": 0.003,
+      "loss": 4.012,
+      "step": 16729
+    },
+    {
+      "epoch": 0.1673,
+      "grad_norm": 1.2479114469022485,
+      "learning_rate": 0.003,
+      "loss": 4.0609,
+      "step": 16730
+    },
+    {
+      "epoch": 0.16731,
+      "grad_norm": 1.2512266369895562,
+      "learning_rate": 0.003,
+      "loss": 4.0088,
+      "step": 16731
+    },
+    {
+      "epoch": 0.16732,
+      "grad_norm": 1.3643541839221898,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 16732
+    },
+    {
+      "epoch": 0.16733,
+      "grad_norm": 1.2287892480363976,
+      "learning_rate": 0.003,
+      "loss": 4.0052,
+      "step": 16733
+    },
+    {
+      "epoch": 0.16734,
+      "grad_norm": 1.225151030439417,
+      "learning_rate": 0.003,
+      "loss": 4.015,
+      "step": 16734
+    },
+    {
+      "epoch": 0.16735,
+      "grad_norm": 1.385564809223566,
+      "learning_rate": 0.003,
+      "loss": 4.0494,
+      "step": 16735
+    },
+    {
+      "epoch": 0.16736,
+      "grad_norm": 1.195776919613441,
+      "learning_rate": 0.003,
+      "loss": 4.0211,
+      "step": 16736
+    },
+    {
+      "epoch": 0.16737,
+      "grad_norm": 1.3884190285371376,
+      "learning_rate": 0.003,
+      "loss": 4.0109,
+      "step": 16737
+    },
+    {
+      "epoch": 0.16738,
+      "grad_norm": 1.3054771728390893,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 16738
+    },
+    {
+      "epoch": 0.16739,
+      "grad_norm": 1.46399542404328,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 16739
+    },
+    {
+      "epoch": 0.1674,
+      "grad_norm": 1.0718812275028755,
+      "learning_rate": 0.003,
+      "loss": 3.9896,
+      "step": 16740
+    },
+    {
+      "epoch": 0.16741,
+      "grad_norm": 1.1745625466912526,
+      "learning_rate": 0.003,
+      "loss": 4.0327,
+      "step": 16741
+    },
+    {
+      "epoch": 0.16742,
+      "grad_norm": 1.2075247790914365,
+      "learning_rate": 0.003,
+      "loss": 3.9887,
+      "step": 16742
+    },
+    {
+      "epoch": 0.16743,
+      "grad_norm": 1.4867521718033896,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 16743
+    },
+    {
+      "epoch": 0.16744,
+      "grad_norm": 1.065952806673697,
+      "learning_rate": 0.003,
+      "loss": 4.0343,
+      "step": 16744
+    },
+    {
+      "epoch": 0.16745,
+      "grad_norm": 1.3972026360932996,
+      "learning_rate": 0.003,
+      "loss": 4.0057,
+      "step": 16745
+    },
+    {
+      "epoch": 0.16746,
+      "grad_norm": 0.9031064911642256,
+      "learning_rate": 0.003,
+      "loss": 4.0369,
+      "step": 16746
+    },
+    {
+      "epoch": 0.16747,
+      "grad_norm": 1.2482872551552535,
+      "learning_rate": 0.003,
+      "loss": 4.0039,
+      "step": 16747
+    },
+    {
+      "epoch": 0.16748,
+      "grad_norm": 1.0793757898817635,
+      "learning_rate": 0.003,
+      "loss": 4.0285,
+      "step": 16748
+    },
+    {
+      "epoch": 0.16749,
+      "grad_norm": 1.277504587284281,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 16749
+    },
+    {
+      "epoch": 0.1675,
+      "grad_norm": 1.2997001547277538,
+      "learning_rate": 0.003,
+      "loss": 4.0036,
+      "step": 16750
+    },
+    {
+      "epoch": 0.16751,
+      "grad_norm": 1.2662310141494997,
+      "learning_rate": 0.003,
+      "loss": 4.0098,
+      "step": 16751
+    },
+    {
+      "epoch": 0.16752,
+      "grad_norm": 1.2715291357652396,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 16752
+    },
+    {
+      "epoch": 0.16753,
+      "grad_norm": 1.0903498454308935,
+      "learning_rate": 0.003,
+      "loss": 4.0068,
+      "step": 16753
+    },
+    {
+      "epoch": 0.16754,
+      "grad_norm": 1.262414695269733,
+      "learning_rate": 0.003,
+      "loss": 4.022,
+      "step": 16754
+    },
+    {
+      "epoch": 0.16755,
+      "grad_norm": 1.2417549278845725,
+      "learning_rate": 0.003,
+      "loss": 4.0324,
+      "step": 16755
+    },
+    {
+      "epoch": 0.16756,
+      "grad_norm": 1.2435194125675262,
+      "learning_rate": 0.003,
+      "loss": 3.976,
+      "step": 16756
+    },
+    {
+      "epoch": 0.16757,
+      "grad_norm": 1.146735908434143,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 16757
+    },
+    {
+      "epoch": 0.16758,
+      "grad_norm": 1.2602637595972412,
+      "learning_rate": 0.003,
+      "loss": 4.0046,
+      "step": 16758
+    },
+    {
+      "epoch": 0.16759,
+      "grad_norm": 1.3309230491987023,
+      "learning_rate": 0.003,
+      "loss": 3.9897,
+      "step": 16759
+    },
+    {
+      "epoch": 0.1676,
+      "grad_norm": 0.989748089374222,
+      "learning_rate": 0.003,
+      "loss": 4.0141,
+      "step": 16760
+    },
+    {
+      "epoch": 0.16761,
+      "grad_norm": 1.313745719423959,
+      "learning_rate": 0.003,
+      "loss": 4.0287,
+      "step": 16761
+    },
+    {
+      "epoch": 0.16762,
+      "grad_norm": 1.1794458031105426,
+      "learning_rate": 0.003,
+      "loss": 4.0123,
+      "step": 16762
+    },
+    {
+      "epoch": 0.16763,
+      "grad_norm": 1.5006657592196686,
+      "learning_rate": 0.003,
+      "loss": 4.0259,
+      "step": 16763
+    },
+    {
+      "epoch": 0.16764,
+      "grad_norm": 1.0854875211858523,
+      "learning_rate": 0.003,
+      "loss": 3.9827,
+      "step": 16764
+    },
+    {
+      "epoch": 0.16765,
+      "grad_norm": 1.2799489508034012,
+      "learning_rate": 0.003,
+      "loss": 4.0157,
+      "step": 16765
+    },
+    {
+      "epoch": 0.16766,
+      "grad_norm": 1.5385809064040605,
+      "learning_rate": 0.003,
+      "loss": 4.0575,
+      "step": 16766
+    },
+    {
+      "epoch": 0.16767,
+      "grad_norm": 0.9871757576558676,
+      "learning_rate": 0.003,
+      "loss": 3.9963,
+      "step": 16767
+    },
+    {
+      "epoch": 0.16768,
+      "grad_norm": 1.517071967155638,
+      "learning_rate": 0.003,
+      "loss": 4.04,
+      "step": 16768
+    },
+    {
+      "epoch": 0.16769,
+      "grad_norm": 1.1359527581158686,
+      "learning_rate": 0.003,
+      "loss": 4.0123,
+      "step": 16769
+    },
+    {
+      "epoch": 0.1677,
+      "grad_norm": 1.5020135445753076,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 16770
+    },
+    {
+      "epoch": 0.16771,
+      "grad_norm": 0.9101721493263855,
+      "learning_rate": 0.003,
+      "loss": 4.0316,
+      "step": 16771
+    },
+    {
+      "epoch": 0.16772,
+      "grad_norm": 1.0621089618420638,
+      "learning_rate": 0.003,
+      "loss": 4.0022,
+      "step": 16772
+    },
+    {
+      "epoch": 0.16773,
+      "grad_norm": 1.5564499611496503,
+      "learning_rate": 0.003,
+      "loss": 3.9828,
+      "step": 16773
+    },
+    {
+      "epoch": 0.16774,
+      "grad_norm": 1.155688872689453,
+      "learning_rate": 0.003,
+      "loss": 4.0232,
+      "step": 16774
+    },
+    {
+      "epoch": 0.16775,
+      "grad_norm": 1.3845128789778514,
+      "learning_rate": 0.003,
+      "loss": 3.9926,
+      "step": 16775
+    },
+    {
+      "epoch": 0.16776,
+      "grad_norm": 1.360241301172709,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 16776
+    },
+    {
+      "epoch": 0.16777,
+      "grad_norm": 1.4479593937643158,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 16777
+    },
+    {
+      "epoch": 0.16778,
+      "grad_norm": 1.2458349740823786,
+      "learning_rate": 0.003,
+      "loss": 4.0462,
+      "step": 16778
+    },
+    {
+      "epoch": 0.16779,
+      "grad_norm": 1.259420554355854,
+      "learning_rate": 0.003,
+      "loss": 4.0361,
+      "step": 16779
+    },
+    {
+      "epoch": 0.1678,
+      "grad_norm": 1.34903629188709,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 16780
+    },
+    {
+      "epoch": 0.16781,
+      "grad_norm": 1.286025725331968,
+      "learning_rate": 0.003,
+      "loss": 4.0606,
+      "step": 16781
+    },
+    {
+      "epoch": 0.16782,
+      "grad_norm": 1.2291030746001255,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 16782
+    },
+    {
+      "epoch": 0.16783,
+      "grad_norm": 1.0875291955559694,
+      "learning_rate": 0.003,
+      "loss": 4.0324,
+      "step": 16783
+    },
+    {
+      "epoch": 0.16784,
+      "grad_norm": 1.3159002512388083,
+      "learning_rate": 0.003,
+      "loss": 4.0107,
+      "step": 16784
+    },
+    {
+      "epoch": 0.16785,
+      "grad_norm": 1.317527244791968,
+      "learning_rate": 0.003,
+      "loss": 4.0317,
+      "step": 16785
+    },
+    {
+      "epoch": 0.16786,
+      "grad_norm": 1.4311664908854542,
+      "learning_rate": 0.003,
+      "loss": 4.0446,
+      "step": 16786
+    },
+    {
+      "epoch": 0.16787,
+      "grad_norm": 1.1657445541783047,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 16787
+    },
+    {
+      "epoch": 0.16788,
+      "grad_norm": 1.1218297979981173,
+      "learning_rate": 0.003,
+      "loss": 4.039,
+      "step": 16788
+    },
+    {
+      "epoch": 0.16789,
+      "grad_norm": 1.2100040863439783,
+      "learning_rate": 0.003,
+      "loss": 4.0207,
+      "step": 16789
+    },
+    {
+      "epoch": 0.1679,
+      "grad_norm": 1.2551242998126244,
+      "learning_rate": 0.003,
+      "loss": 3.9854,
+      "step": 16790
+    },
+    {
+      "epoch": 0.16791,
+      "grad_norm": 1.1932540463897132,
+      "learning_rate": 0.003,
+      "loss": 4.0299,
+      "step": 16791
+    },
+    {
+      "epoch": 0.16792,
+      "grad_norm": 1.2391779990331424,
+      "learning_rate": 0.003,
+      "loss": 3.9781,
+      "step": 16792
+    },
+    {
+      "epoch": 0.16793,
+      "grad_norm": 1.0922719286288864,
+      "learning_rate": 0.003,
+      "loss": 3.9964,
+      "step": 16793
+    },
+    {
+      "epoch": 0.16794,
+      "grad_norm": 1.27160864630517,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 16794
+    },
+    {
+      "epoch": 0.16795,
+      "grad_norm": 1.1913975588204553,
+      "learning_rate": 0.003,
+      "loss": 4.0029,
+      "step": 16795
+    },
+    {
+      "epoch": 0.16796,
+      "grad_norm": 1.243028596016473,
+      "learning_rate": 0.003,
+      "loss": 3.9788,
+      "step": 16796
+    },
+    {
+      "epoch": 0.16797,
+      "grad_norm": 1.0818591658569379,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 16797
+    },
+    {
+      "epoch": 0.16798,
+      "grad_norm": 1.5692826015776473,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 16798
+    },
+    {
+      "epoch": 0.16799,
+      "grad_norm": 1.3033905320702153,
+      "learning_rate": 0.003,
+      "loss": 4.0287,
+      "step": 16799
+    },
+    {
+      "epoch": 0.168,
+      "grad_norm": 1.270301971839668,
+      "learning_rate": 0.003,
+      "loss": 4.0052,
+      "step": 16800
+    },
+    {
+      "epoch": 0.16801,
+      "grad_norm": 1.2606117354223052,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 16801
+    },
+    {
+      "epoch": 0.16802,
+      "grad_norm": 1.1290385301338544,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 16802
+    },
+    {
+      "epoch": 0.16803,
+      "grad_norm": 1.3409426616809526,
+      "learning_rate": 0.003,
+      "loss": 4.051,
+      "step": 16803
+    },
+    {
+      "epoch": 0.16804,
+      "grad_norm": 1.1059978263354993,
+      "learning_rate": 0.003,
+      "loss": 4.0186,
+      "step": 16804
+    },
+    {
+      "epoch": 0.16805,
+      "grad_norm": 1.3947787113601666,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 16805
+    },
+    {
+      "epoch": 0.16806,
+      "grad_norm": 0.9963101775114525,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 16806
+    },
+    {
+      "epoch": 0.16807,
+      "grad_norm": 1.5634406150316704,
+      "learning_rate": 0.003,
+      "loss": 3.9952,
+      "step": 16807
+    },
+    {
+      "epoch": 0.16808,
+      "grad_norm": 1.2678649753363247,
+      "learning_rate": 0.003,
+      "loss": 4.016,
+      "step": 16808
+    },
+    {
+      "epoch": 0.16809,
+      "grad_norm": 1.11171865107745,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 16809
+    },
+    {
+      "epoch": 0.1681,
+      "grad_norm": 1.2064696063825564,
+      "learning_rate": 0.003,
+      "loss": 4.0303,
+      "step": 16810
+    },
+    {
+      "epoch": 0.16811,
+      "grad_norm": 1.2685610617105252,
+      "learning_rate": 0.003,
+      "loss": 4.039,
+      "step": 16811
+    },
+    {
+      "epoch": 0.16812,
+      "grad_norm": 1.242413676772281,
+      "learning_rate": 0.003,
+      "loss": 3.9921,
+      "step": 16812
+    },
+    {
+      "epoch": 0.16813,
+      "grad_norm": 1.1500362337080925,
+      "learning_rate": 0.003,
+      "loss": 4.0011,
+      "step": 16813
+    },
+    {
+      "epoch": 0.16814,
+      "grad_norm": 1.3192617974063514,
+      "learning_rate": 0.003,
+      "loss": 3.9896,
+      "step": 16814
+    },
+    {
+      "epoch": 0.16815,
+      "grad_norm": 1.230514206708865,
+      "learning_rate": 0.003,
+      "loss": 4.0541,
+      "step": 16815
+    },
+    {
+      "epoch": 0.16816,
+      "grad_norm": 1.4023838619816673,
+      "learning_rate": 0.003,
+      "loss": 4.0082,
+      "step": 16816
+    },
+    {
+      "epoch": 0.16817,
+      "grad_norm": 1.09004068408355,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 16817
+    },
+    {
+      "epoch": 0.16818,
+      "grad_norm": 1.2087619042288684,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 16818
+    },
+    {
+      "epoch": 0.16819,
+      "grad_norm": 1.0940419536959587,
+      "learning_rate": 0.003,
+      "loss": 3.9882,
+      "step": 16819
+    },
+    {
+      "epoch": 0.1682,
+      "grad_norm": 1.356788098447559,
+      "learning_rate": 0.003,
+      "loss": 4.0084,
+      "step": 16820
+    },
+    {
+      "epoch": 0.16821,
+      "grad_norm": 1.1814260113034483,
+      "learning_rate": 0.003,
+      "loss": 3.9808,
+      "step": 16821
+    },
+    {
+      "epoch": 0.16822,
+      "grad_norm": 1.083041113799058,
+      "learning_rate": 0.003,
+      "loss": 4.0026,
+      "step": 16822
+    },
+    {
+      "epoch": 0.16823,
+      "grad_norm": 1.3845447100976924,
+      "learning_rate": 0.003,
+      "loss": 4.0025,
+      "step": 16823
+    },
+    {
+      "epoch": 0.16824,
+      "grad_norm": 1.083888232716743,
+      "learning_rate": 0.003,
+      "loss": 4.0244,
+      "step": 16824
+    },
+    {
+      "epoch": 0.16825,
+      "grad_norm": 1.5547189734994098,
+      "learning_rate": 0.003,
+      "loss": 4.0234,
+      "step": 16825
+    },
+    {
+      "epoch": 0.16826,
+      "grad_norm": 1.1505415556402658,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 16826
+    },
+    {
+      "epoch": 0.16827,
+      "grad_norm": 1.516656079533665,
+      "learning_rate": 0.003,
+      "loss": 4.0116,
+      "step": 16827
+    },
+    {
+      "epoch": 0.16828,
+      "grad_norm": 1.0255557130551507,
+      "learning_rate": 0.003,
+      "loss": 4.0169,
+      "step": 16828
+    },
+    {
+      "epoch": 0.16829,
+      "grad_norm": 1.3575562934723178,
+      "learning_rate": 0.003,
+      "loss": 4.0216,
+      "step": 16829
+    },
+    {
+      "epoch": 0.1683,
+      "grad_norm": 1.1988117222499541,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 16830
+    },
+    {
+      "epoch": 0.16831,
+      "grad_norm": 1.4874532719939633,
+      "learning_rate": 0.003,
+      "loss": 3.994,
+      "step": 16831
+    },
+    {
+      "epoch": 0.16832,
+      "grad_norm": 1.1065575081516017,
+      "learning_rate": 0.003,
+      "loss": 4.0132,
+      "step": 16832
+    },
+    {
+      "epoch": 0.16833,
+      "grad_norm": 1.361580708209208,
+      "learning_rate": 0.003,
+      "loss": 3.9885,
+      "step": 16833
+    },
+    {
+      "epoch": 0.16834,
+      "grad_norm": 1.078641506329546,
+      "learning_rate": 0.003,
+      "loss": 3.9975,
+      "step": 16834
+    },
+    {
+      "epoch": 0.16835,
+      "grad_norm": 1.2498711449421978,
+      "learning_rate": 0.003,
+      "loss": 3.9655,
+      "step": 16835
+    },
+    {
+      "epoch": 0.16836,
+      "grad_norm": 1.2074394083708289,
+      "learning_rate": 0.003,
+      "loss": 4.0254,
+      "step": 16836
+    },
+    {
+      "epoch": 0.16837,
+      "grad_norm": 1.4289220670388905,
+      "learning_rate": 0.003,
+      "loss": 4.0116,
+      "step": 16837
+    },
+    {
+      "epoch": 0.16838,
+      "grad_norm": 1.1106440851025856,
+      "learning_rate": 0.003,
+      "loss": 4.0402,
+      "step": 16838
+    },
+    {
+      "epoch": 0.16839,
+      "grad_norm": 1.4335390418677294,
+      "learning_rate": 0.003,
+      "loss": 4.0096,
+      "step": 16839
+    },
+    {
+      "epoch": 0.1684,
+      "grad_norm": 1.0488019557253343,
+      "learning_rate": 0.003,
+      "loss": 3.9986,
+      "step": 16840
+    },
+    {
+      "epoch": 0.16841,
+      "grad_norm": 1.3042148326398717,
+      "learning_rate": 0.003,
+      "loss": 4.016,
+      "step": 16841
+    },
+    {
+      "epoch": 0.16842,
+      "grad_norm": 1.4603238658870208,
+      "learning_rate": 0.003,
+      "loss": 4.0209,
+      "step": 16842
+    },
+    {
+      "epoch": 0.16843,
+      "grad_norm": 1.060291926461328,
+      "learning_rate": 0.003,
+      "loss": 4.0245,
+      "step": 16843
+    },
+    {
+      "epoch": 0.16844,
+      "grad_norm": 1.405258245277971,
+      "learning_rate": 0.003,
+      "loss": 4.0446,
+      "step": 16844
+    },
+    {
+      "epoch": 0.16845,
+      "grad_norm": 1.1607577698893636,
+      "learning_rate": 0.003,
+      "loss": 4.0317,
+      "step": 16845
+    },
+    {
+      "epoch": 0.16846,
+      "grad_norm": 1.4783227381620572,
+      "learning_rate": 0.003,
+      "loss": 4.0413,
+      "step": 16846
+    },
+    {
+      "epoch": 0.16847,
+      "grad_norm": 1.1648476762438198,
+      "learning_rate": 0.003,
+      "loss": 4.0016,
+      "step": 16847
+    },
+    {
+      "epoch": 0.16848,
+      "grad_norm": 1.421633129727707,
+      "learning_rate": 0.003,
+      "loss": 4.0078,
+      "step": 16848
+    },
+    {
+      "epoch": 0.16849,
+      "grad_norm": 1.3381995418471722,
+      "learning_rate": 0.003,
+      "loss": 4.0036,
+      "step": 16849
+    },
+    {
+      "epoch": 0.1685,
+      "grad_norm": 1.0817309204605705,
+      "learning_rate": 0.003,
+      "loss": 4.0292,
+      "step": 16850
+    },
+    {
+      "epoch": 0.16851,
+      "grad_norm": 1.2280418994634874,
+      "learning_rate": 0.003,
+      "loss": 4.0001,
+      "step": 16851
+    },
+    {
+      "epoch": 0.16852,
+      "grad_norm": 1.1433143202647051,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 16852
+    },
+    {
+      "epoch": 0.16853,
+      "grad_norm": 1.374223930656413,
+      "learning_rate": 0.003,
+      "loss": 4.0405,
+      "step": 16853
+    },
+    {
+      "epoch": 0.16854,
+      "grad_norm": 1.1483790544696622,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 16854
+    },
+    {
+      "epoch": 0.16855,
+      "grad_norm": 1.3528775235819215,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 16855
+    },
+    {
+      "epoch": 0.16856,
+      "grad_norm": 1.177944326397307,
+      "learning_rate": 0.003,
+      "loss": 4.0174,
+      "step": 16856
+    },
+    {
+      "epoch": 0.16857,
+      "grad_norm": 1.308171100592795,
+      "learning_rate": 0.003,
+      "loss": 3.9745,
+      "step": 16857
+    },
+    {
+      "epoch": 0.16858,
+      "grad_norm": 1.1529652930812384,
+      "learning_rate": 0.003,
+      "loss": 3.9817,
+      "step": 16858
+    },
+    {
+      "epoch": 0.16859,
+      "grad_norm": 1.2731702112081706,
+      "learning_rate": 0.003,
+      "loss": 4.0086,
+      "step": 16859
+    },
+    {
+      "epoch": 0.1686,
+      "grad_norm": 1.31032159477305,
+      "learning_rate": 0.003,
+      "loss": 4.028,
+      "step": 16860
+    },
+    {
+      "epoch": 0.16861,
+      "grad_norm": 1.2036047048130094,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 16861
+    },
+    {
+      "epoch": 0.16862,
+      "grad_norm": 1.339073288144215,
+      "learning_rate": 0.003,
+      "loss": 4.0129,
+      "step": 16862
+    },
+    {
+      "epoch": 0.16863,
+      "grad_norm": 1.3109035788782717,
+      "learning_rate": 0.003,
+      "loss": 4.0011,
+      "step": 16863
+    },
+    {
+      "epoch": 0.16864,
+      "grad_norm": 1.304758696665521,
+      "learning_rate": 0.003,
+      "loss": 4.0142,
+      "step": 16864
+    },
+    {
+      "epoch": 0.16865,
+      "grad_norm": 1.2074240967263565,
+      "learning_rate": 0.003,
+      "loss": 3.9844,
+      "step": 16865
+    },
+    {
+      "epoch": 0.16866,
+      "grad_norm": 1.1209272913654764,
+      "learning_rate": 0.003,
+      "loss": 4.0118,
+      "step": 16866
+    },
+    {
+      "epoch": 0.16867,
+      "grad_norm": 1.4518110921716785,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 16867
+    },
+    {
+      "epoch": 0.16868,
+      "grad_norm": 1.0774290770428272,
+      "learning_rate": 0.003,
+      "loss": 4.0008,
+      "step": 16868
+    },
+    {
+      "epoch": 0.16869,
+      "grad_norm": 1.312204142314037,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 16869
+    },
+    {
+      "epoch": 0.1687,
+      "grad_norm": 1.1416925160063203,
+      "learning_rate": 0.003,
+      "loss": 4.0342,
+      "step": 16870
+    },
+    {
+      "epoch": 0.16871,
+      "grad_norm": 1.5112168839952957,
+      "learning_rate": 0.003,
+      "loss": 4.0379,
+      "step": 16871
+    },
+    {
+      "epoch": 0.16872,
+      "grad_norm": 1.1897944524848985,
+      "learning_rate": 0.003,
+      "loss": 4.0032,
+      "step": 16872
+    },
+    {
+      "epoch": 0.16873,
+      "grad_norm": 1.446510519618927,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 16873
+    },
+    {
+      "epoch": 0.16874,
+      "grad_norm": 1.396135666890526,
+      "learning_rate": 0.003,
+      "loss": 3.9984,
+      "step": 16874
+    },
+    {
+      "epoch": 0.16875,
+      "grad_norm": 1.433971567663403,
+      "learning_rate": 0.003,
+      "loss": 4.0326,
+      "step": 16875
+    },
+    {
+      "epoch": 0.16876,
+      "grad_norm": 1.0044949337191382,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 16876
+    },
+    {
+      "epoch": 0.16877,
+      "grad_norm": 1.4266779672159549,
+      "learning_rate": 0.003,
+      "loss": 4.0003,
+      "step": 16877
+    },
+    {
+      "epoch": 0.16878,
+      "grad_norm": 1.29967644899772,
+      "learning_rate": 0.003,
+      "loss": 3.9951,
+      "step": 16878
+    },
+    {
+      "epoch": 0.16879,
+      "grad_norm": 1.120701139574282,
+      "learning_rate": 0.003,
+      "loss": 3.9812,
+      "step": 16879
+    },
+    {
+      "epoch": 0.1688,
+      "grad_norm": 1.2225522998809213,
+      "learning_rate": 0.003,
+      "loss": 4.0384,
+      "step": 16880
+    },
+    {
+      "epoch": 0.16881,
+      "grad_norm": 1.2109024989470603,
+      "learning_rate": 0.003,
+      "loss": 4.0011,
+      "step": 16881
+    },
+    {
+      "epoch": 0.16882,
+      "grad_norm": 1.4632096241684103,
+      "learning_rate": 0.003,
+      "loss": 4.0191,
+      "step": 16882
+    },
+    {
+      "epoch": 0.16883,
+      "grad_norm": 1.2485484778905063,
+      "learning_rate": 0.003,
+      "loss": 4.0276,
+      "step": 16883
+    },
+    {
+      "epoch": 0.16884,
+      "grad_norm": 1.1186898652467716,
+      "learning_rate": 0.003,
+      "loss": 4.0139,
+      "step": 16884
+    },
+    {
+      "epoch": 0.16885,
+      "grad_norm": 1.24133162504021,
+      "learning_rate": 0.003,
+      "loss": 3.9853,
+      "step": 16885
+    },
+    {
+      "epoch": 0.16886,
+      "grad_norm": 1.3050455876523348,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 16886
+    },
+    {
+      "epoch": 0.16887,
+      "grad_norm": 1.362781051037715,
+      "learning_rate": 0.003,
+      "loss": 4.0363,
+      "step": 16887
+    },
+    {
+      "epoch": 0.16888,
+      "grad_norm": 1.3892189347450552,
+      "learning_rate": 0.003,
+      "loss": 4.0423,
+      "step": 16888
+    },
+    {
+      "epoch": 0.16889,
+      "grad_norm": 1.1407710712129773,
+      "learning_rate": 0.003,
+      "loss": 4.0091,
+      "step": 16889
+    },
+    {
+      "epoch": 0.1689,
+      "grad_norm": 1.206006069080571,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 16890
+    },
+    {
+      "epoch": 0.16891,
+      "grad_norm": 1.1342045777811507,
+      "learning_rate": 0.003,
+      "loss": 3.9932,
+      "step": 16891
+    },
+    {
+      "epoch": 0.16892,
+      "grad_norm": 1.2619875409492707,
+      "learning_rate": 0.003,
+      "loss": 3.9845,
+      "step": 16892
+    },
+    {
+      "epoch": 0.16893,
+      "grad_norm": 1.2205092992378006,
+      "learning_rate": 0.003,
+      "loss": 4.0237,
+      "step": 16893
+    },
+    {
+      "epoch": 0.16894,
+      "grad_norm": 1.244931242041419,
+      "learning_rate": 0.003,
+      "loss": 4.0075,
+      "step": 16894
+    },
+    {
+      "epoch": 0.16895,
+      "grad_norm": 1.2651379871481943,
+      "learning_rate": 0.003,
+      "loss": 4.0343,
+      "step": 16895
+    },
+    {
+      "epoch": 0.16896,
+      "grad_norm": 1.2366124296381513,
+      "learning_rate": 0.003,
+      "loss": 4.0088,
+      "step": 16896
+    },
+    {
+      "epoch": 0.16897,
+      "grad_norm": 1.0590663890418193,
+      "learning_rate": 0.003,
+      "loss": 3.9921,
+      "step": 16897
+    },
+    {
+      "epoch": 0.16898,
+      "grad_norm": 1.3479594975330957,
+      "learning_rate": 0.003,
+      "loss": 4.0084,
+      "step": 16898
+    },
+    {
+      "epoch": 0.16899,
+      "grad_norm": 1.110658655779992,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 16899
+    },
+    {
+      "epoch": 0.169,
+      "grad_norm": 1.2805799269670264,
+      "learning_rate": 0.003,
+      "loss": 3.9875,
+      "step": 16900
+    },
+    {
+      "epoch": 0.16901,
+      "grad_norm": 1.1028430432312288,
+      "learning_rate": 0.003,
+      "loss": 4.0033,
+      "step": 16901
+    },
+    {
+      "epoch": 0.16902,
+      "grad_norm": 1.5531230378318743,
+      "learning_rate": 0.003,
+      "loss": 4.0151,
+      "step": 16902
+    },
+    {
+      "epoch": 0.16903,
+      "grad_norm": 1.112848851912049,
+      "learning_rate": 0.003,
+      "loss": 3.9987,
+      "step": 16903
+    },
+    {
+      "epoch": 0.16904,
+      "grad_norm": 1.3508659986287932,
+      "learning_rate": 0.003,
+      "loss": 4.0516,
+      "step": 16904
+    },
+    {
+      "epoch": 0.16905,
+      "grad_norm": 1.3904795812482704,
+      "learning_rate": 0.003,
+      "loss": 4.0187,
+      "step": 16905
+    },
+    {
+      "epoch": 0.16906,
+      "grad_norm": 1.3812560919461059,
+      "learning_rate": 0.003,
+      "loss": 4.0115,
+      "step": 16906
+    },
+    {
+      "epoch": 0.16907,
+      "grad_norm": 1.2833538687555124,
+      "learning_rate": 0.003,
+      "loss": 4.0012,
+      "step": 16907
+    },
+    {
+      "epoch": 0.16908,
+      "grad_norm": 1.2529061133847317,
+      "learning_rate": 0.003,
+      "loss": 4.012,
+      "step": 16908
+    },
+    {
+      "epoch": 0.16909,
+      "grad_norm": 1.1306501723012512,
+      "learning_rate": 0.003,
+      "loss": 3.9993,
+      "step": 16909
+    },
+    {
+      "epoch": 0.1691,
+      "grad_norm": 1.145615530866832,
+      "learning_rate": 0.003,
+      "loss": 3.9635,
+      "step": 16910
+    },
+    {
+      "epoch": 0.16911,
+      "grad_norm": 1.257679014582806,
+      "learning_rate": 0.003,
+      "loss": 4.0018,
+      "step": 16911
+    },
+    {
+      "epoch": 0.16912,
+      "grad_norm": 1.1571753241793494,
+      "learning_rate": 0.003,
+      "loss": 4.0245,
+      "step": 16912
+    },
+    {
+      "epoch": 0.16913,
+      "grad_norm": 1.2552940210106678,
+      "learning_rate": 0.003,
+      "loss": 4.0049,
+      "step": 16913
+    },
+    {
+      "epoch": 0.16914,
+      "grad_norm": 1.384337884789972,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 16914
+    },
+    {
+      "epoch": 0.16915,
+      "grad_norm": 1.2197693395854996,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 16915
+    },
+    {
+      "epoch": 0.16916,
+      "grad_norm": 1.2880217635284237,
+      "learning_rate": 0.003,
+      "loss": 4.0265,
+      "step": 16916
+    },
+    {
+      "epoch": 0.16917,
+      "grad_norm": 1.3384699793351602,
+      "learning_rate": 0.003,
+      "loss": 3.9979,
+      "step": 16917
+    },
+    {
+      "epoch": 0.16918,
+      "grad_norm": 1.2616668365937014,
+      "learning_rate": 0.003,
+      "loss": 4.0247,
+      "step": 16918
+    },
+    {
+      "epoch": 0.16919,
+      "grad_norm": 1.3222589677064205,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 16919
+    },
+    {
+      "epoch": 0.1692,
+      "grad_norm": 1.178864504225822,
+      "learning_rate": 0.003,
+      "loss": 3.9843,
+      "step": 16920
+    },
+    {
+      "epoch": 0.16921,
+      "grad_norm": 1.2981216950894976,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 16921
+    },
+    {
+      "epoch": 0.16922,
+      "grad_norm": 1.5438283634887369,
+      "learning_rate": 0.003,
+      "loss": 4.0164,
+      "step": 16922
+    },
+    {
+      "epoch": 0.16923,
+      "grad_norm": 1.152152700697969,
+      "learning_rate": 0.003,
+      "loss": 4.0104,
+      "step": 16923
+    },
+    {
+      "epoch": 0.16924,
+      "grad_norm": 1.2619466292301122,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 16924
+    },
+    {
+      "epoch": 0.16925,
+      "grad_norm": 1.2677460366996953,
+      "learning_rate": 0.003,
+      "loss": 4.0249,
+      "step": 16925
+    },
+    {
+      "epoch": 0.16926,
+      "grad_norm": 1.0390056602589441,
+      "learning_rate": 0.003,
+      "loss": 4.0029,
+      "step": 16926
+    },
+    {
+      "epoch": 0.16927,
+      "grad_norm": 1.3574529434547262,
+      "learning_rate": 0.003,
+      "loss": 3.9909,
+      "step": 16927
+    },
+    {
+      "epoch": 0.16928,
+      "grad_norm": 1.0928639897909367,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 16928
+    },
+    {
+      "epoch": 0.16929,
+      "grad_norm": 1.3934850961846503,
+      "learning_rate": 0.003,
+      "loss": 3.9983,
+      "step": 16929
+    },
+    {
+      "epoch": 0.1693,
+      "grad_norm": 1.285281464957926,
+      "learning_rate": 0.003,
+      "loss": 4.0173,
+      "step": 16930
+    },
+    {
+      "epoch": 0.16931,
+      "grad_norm": 1.2455841870968274,
+      "learning_rate": 0.003,
+      "loss": 4.0327,
+      "step": 16931
+    },
+    {
+      "epoch": 0.16932,
+      "grad_norm": 1.3341301746229626,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 16932
+    },
+    {
+      "epoch": 0.16933,
+      "grad_norm": 1.1781447497056439,
+      "learning_rate": 0.003,
+      "loss": 4.025,
+      "step": 16933
+    },
+    {
+      "epoch": 0.16934,
+      "grad_norm": 1.2428939057922601,
+      "learning_rate": 0.003,
+      "loss": 4.0272,
+      "step": 16934
+    },
+    {
+      "epoch": 0.16935,
+      "grad_norm": 1.2483314679414697,
+      "learning_rate": 0.003,
+      "loss": 4.0263,
+      "step": 16935
+    },
+    {
+      "epoch": 0.16936,
+      "grad_norm": 1.314712637897631,
+      "learning_rate": 0.003,
+      "loss": 3.9888,
+      "step": 16936
+    },
+    {
+      "epoch": 0.16937,
+      "grad_norm": 1.0492346671508817,
+      "learning_rate": 0.003,
+      "loss": 3.9845,
+      "step": 16937
+    },
+    {
+      "epoch": 0.16938,
+      "grad_norm": 1.3815854838892725,
+      "learning_rate": 0.003,
+      "loss": 4.0072,
+      "step": 16938
+    },
+    {
+      "epoch": 0.16939,
+      "grad_norm": 1.2392019868274606,
+      "learning_rate": 0.003,
+      "loss": 4.0135,
+      "step": 16939
+    },
+    {
+      "epoch": 0.1694,
+      "grad_norm": 1.5056024070904612,
+      "learning_rate": 0.003,
+      "loss": 4.0295,
+      "step": 16940
+    },
+    {
+      "epoch": 0.16941,
+      "grad_norm": 1.2201284842797908,
+      "learning_rate": 0.003,
+      "loss": 4.0281,
+      "step": 16941
+    },
+    {
+      "epoch": 0.16942,
+      "grad_norm": 1.3929057023313056,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 16942
+    },
+    {
+      "epoch": 0.16943,
+      "grad_norm": 1.2383122178551467,
+      "learning_rate": 0.003,
+      "loss": 3.9776,
+      "step": 16943
+    },
+    {
+      "epoch": 0.16944,
+      "grad_norm": 1.101071882798595,
+      "learning_rate": 0.003,
+      "loss": 4.0115,
+      "step": 16944
+    },
+    {
+      "epoch": 0.16945,
+      "grad_norm": 1.4592890179827827,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 16945
+    },
+    {
+      "epoch": 0.16946,
+      "grad_norm": 1.176261448397904,
+      "learning_rate": 0.003,
+      "loss": 4.0218,
+      "step": 16946
+    },
+    {
+      "epoch": 0.16947,
+      "grad_norm": 1.1938508900306732,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 16947
+    },
+    {
+      "epoch": 0.16948,
+      "grad_norm": 1.2417706899331502,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 16948
+    },
+    {
+      "epoch": 0.16949,
+      "grad_norm": 1.119059964256359,
+      "learning_rate": 0.003,
+      "loss": 3.9685,
+      "step": 16949
+    },
+    {
+      "epoch": 0.1695,
+      "grad_norm": 1.1741223880758327,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 16950
+    },
+    {
+      "epoch": 0.16951,
+      "grad_norm": 1.3448040518697508,
+      "learning_rate": 0.003,
+      "loss": 4.007,
+      "step": 16951
+    },
+    {
+      "epoch": 0.16952,
+      "grad_norm": 1.0488312214388276,
+      "learning_rate": 0.003,
+      "loss": 3.9971,
+      "step": 16952
+    },
+    {
+      "epoch": 0.16953,
+      "grad_norm": 1.4062247086633848,
+      "learning_rate": 0.003,
+      "loss": 4.016,
+      "step": 16953
+    },
+    {
+      "epoch": 0.16954,
+      "grad_norm": 1.2078394336426208,
+      "learning_rate": 0.003,
+      "loss": 4.0482,
+      "step": 16954
+    },
+    {
+      "epoch": 0.16955,
+      "grad_norm": 1.2369747127025712,
+      "learning_rate": 0.003,
+      "loss": 4.0276,
+      "step": 16955
+    },
+    {
+      "epoch": 0.16956,
+      "grad_norm": 1.2197357412428835,
+      "learning_rate": 0.003,
+      "loss": 4.0109,
+      "step": 16956
+    },
+    {
+      "epoch": 0.16957,
+      "grad_norm": 1.4609469443691683,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 16957
+    },
+    {
+      "epoch": 0.16958,
+      "grad_norm": 0.9557802790163199,
+      "learning_rate": 0.003,
+      "loss": 3.9899,
+      "step": 16958
+    },
+    {
+      "epoch": 0.16959,
+      "grad_norm": 1.24108490074871,
+      "learning_rate": 0.003,
+      "loss": 3.9897,
+      "step": 16959
+    },
+    {
+      "epoch": 0.1696,
+      "grad_norm": 1.4668437065833444,
+      "learning_rate": 0.003,
+      "loss": 4.0091,
+      "step": 16960
+    },
+    {
+      "epoch": 0.16961,
+      "grad_norm": 1.1743810113446638,
+      "learning_rate": 0.003,
+      "loss": 3.9943,
+      "step": 16961
+    },
+    {
+      "epoch": 0.16962,
+      "grad_norm": 1.4978543000574625,
+      "learning_rate": 0.003,
+      "loss": 4.0426,
+      "step": 16962
+    },
+    {
+      "epoch": 0.16963,
+      "grad_norm": 1.0906658346730245,
+      "learning_rate": 0.003,
+      "loss": 4.0314,
+      "step": 16963
+    },
+    {
+      "epoch": 0.16964,
+      "grad_norm": 1.4468427659755054,
+      "learning_rate": 0.003,
+      "loss": 4.0351,
+      "step": 16964
+    },
+    {
+      "epoch": 0.16965,
+      "grad_norm": 1.27416762346165,
+      "learning_rate": 0.003,
+      "loss": 3.988,
+      "step": 16965
+    },
+    {
+      "epoch": 0.16966,
+      "grad_norm": 1.331194166487556,
+      "learning_rate": 0.003,
+      "loss": 4.0333,
+      "step": 16966
+    },
+    {
+      "epoch": 0.16967,
+      "grad_norm": 1.2625953737664501,
+      "learning_rate": 0.003,
+      "loss": 4.0115,
+      "step": 16967
+    },
+    {
+      "epoch": 0.16968,
+      "grad_norm": 1.080363195070113,
+      "learning_rate": 0.003,
+      "loss": 4.0216,
+      "step": 16968
+    },
+    {
+      "epoch": 0.16969,
+      "grad_norm": 1.5447983936107297,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 16969
+    },
+    {
+      "epoch": 0.1697,
+      "grad_norm": 1.2006316216642772,
+      "learning_rate": 0.003,
+      "loss": 4.0155,
+      "step": 16970
+    },
+    {
+      "epoch": 0.16971,
+      "grad_norm": 1.3860194333678044,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 16971
+    },
+    {
+      "epoch": 0.16972,
+      "grad_norm": 1.2485380657128105,
+      "learning_rate": 0.003,
+      "loss": 4.0222,
+      "step": 16972
+    },
+    {
+      "epoch": 0.16973,
+      "grad_norm": 1.1469982508816605,
+      "learning_rate": 0.003,
+      "loss": 4.0264,
+      "step": 16973
+    },
+    {
+      "epoch": 0.16974,
+      "grad_norm": 1.2098045638378874,
+      "learning_rate": 0.003,
+      "loss": 3.9966,
+      "step": 16974
+    },
+    {
+      "epoch": 0.16975,
+      "grad_norm": 1.2238529275774659,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 16975
+    },
+    {
+      "epoch": 0.16976,
+      "grad_norm": 1.0835686022976578,
+      "learning_rate": 0.003,
+      "loss": 3.9671,
+      "step": 16976
+    },
+    {
+      "epoch": 0.16977,
+      "grad_norm": 1.5463250511671391,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 16977
+    },
+    {
+      "epoch": 0.16978,
+      "grad_norm": 1.156525371142893,
+      "learning_rate": 0.003,
+      "loss": 3.9964,
+      "step": 16978
+    },
+    {
+      "epoch": 0.16979,
+      "grad_norm": 1.4567991308225594,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 16979
+    },
+    {
+      "epoch": 0.1698,
+      "grad_norm": 1.0656381636333407,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 16980
+    },
+    {
+      "epoch": 0.16981,
+      "grad_norm": 1.3190589658406147,
+      "learning_rate": 0.003,
+      "loss": 3.9878,
+      "step": 16981
+    },
+    {
+      "epoch": 0.16982,
+      "grad_norm": 1.11372006687511,
+      "learning_rate": 0.003,
+      "loss": 4.0362,
+      "step": 16982
+    },
+    {
+      "epoch": 0.16983,
+      "grad_norm": 1.363892577636926,
+      "learning_rate": 0.003,
+      "loss": 4.0198,
+      "step": 16983
+    },
+    {
+      "epoch": 0.16984,
+      "grad_norm": 1.2820694465744142,
+      "learning_rate": 0.003,
+      "loss": 4.0416,
+      "step": 16984
+    },
+    {
+      "epoch": 0.16985,
+      "grad_norm": 1.151936227728988,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 16985
+    },
+    {
+      "epoch": 0.16986,
+      "grad_norm": 1.2019003952022753,
+      "learning_rate": 0.003,
+      "loss": 4.0154,
+      "step": 16986
+    },
+    {
+      "epoch": 0.16987,
+      "grad_norm": 1.2916031244486534,
+      "learning_rate": 0.003,
+      "loss": 4.0513,
+      "step": 16987
+    },
+    {
+      "epoch": 0.16988,
+      "grad_norm": 1.3963732146288383,
+      "learning_rate": 0.003,
+      "loss": 4.0164,
+      "step": 16988
+    },
+    {
+      "epoch": 0.16989,
+      "grad_norm": 1.2685891835699157,
+      "learning_rate": 0.003,
+      "loss": 4.0025,
+      "step": 16989
+    },
+    {
+      "epoch": 0.1699,
+      "grad_norm": 1.2951575610634358,
+      "learning_rate": 0.003,
+      "loss": 4.0121,
+      "step": 16990
+    },
+    {
+      "epoch": 0.16991,
+      "grad_norm": 1.2921496230383875,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 16991
+    },
+    {
+      "epoch": 0.16992,
+      "grad_norm": 1.1691614288218513,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 16992
+    },
+    {
+      "epoch": 0.16993,
+      "grad_norm": 1.3505549787048354,
+      "learning_rate": 0.003,
+      "loss": 4.0249,
+      "step": 16993
+    },
+    {
+      "epoch": 0.16994,
+      "grad_norm": 1.234698424787448,
+      "learning_rate": 0.003,
+      "loss": 4.026,
+      "step": 16994
+    },
+    {
+      "epoch": 0.16995,
+      "grad_norm": 1.1635611711772356,
+      "learning_rate": 0.003,
+      "loss": 3.9766,
+      "step": 16995
+    },
+    {
+      "epoch": 0.16996,
+      "grad_norm": 1.3932235075072121,
+      "learning_rate": 0.003,
+      "loss": 3.9999,
+      "step": 16996
+    },
+    {
+      "epoch": 0.16997,
+      "grad_norm": 1.2329785004756628,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 16997
+    },
+    {
+      "epoch": 0.16998,
+      "grad_norm": 1.6130306233485228,
+      "learning_rate": 0.003,
+      "loss": 4.039,
+      "step": 16998
+    },
+    {
+      "epoch": 0.16999,
+      "grad_norm": 0.987298280415107,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 16999
+    },
+    {
+      "epoch": 0.17,
+      "grad_norm": 1.363353855910285,
+      "learning_rate": 0.003,
+      "loss": 3.9801,
+      "step": 17000
+    },
+    {
+      "epoch": 0.17001,
+      "grad_norm": 1.072170500021738,
+      "learning_rate": 0.003,
+      "loss": 4.0019,
+      "step": 17001
+    },
+    {
+      "epoch": 0.17002,
+      "grad_norm": 1.2286663236450293,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 17002
+    },
+    {
+      "epoch": 0.17003,
+      "grad_norm": 1.4430427644893247,
+      "learning_rate": 0.003,
+      "loss": 3.9904,
+      "step": 17003
+    },
+    {
+      "epoch": 0.17004,
+      "grad_norm": 1.19606160541874,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 17004
+    },
+    {
+      "epoch": 0.17005,
+      "grad_norm": 1.1893825102433737,
+      "learning_rate": 0.003,
+      "loss": 4.0051,
+      "step": 17005
+    },
+    {
+      "epoch": 0.17006,
+      "grad_norm": 1.4068763541598062,
+      "learning_rate": 0.003,
+      "loss": 4.0393,
+      "step": 17006
+    },
+    {
+      "epoch": 0.17007,
+      "grad_norm": 1.2142864586823239,
+      "learning_rate": 0.003,
+      "loss": 3.9944,
+      "step": 17007
+    },
+    {
+      "epoch": 0.17008,
+      "grad_norm": 1.2635706429106455,
+      "learning_rate": 0.003,
+      "loss": 4.0406,
+      "step": 17008
+    },
+    {
+      "epoch": 0.17009,
+      "grad_norm": 1.1968596536847875,
+      "learning_rate": 0.003,
+      "loss": 3.9996,
+      "step": 17009
+    },
+    {
+      "epoch": 0.1701,
+      "grad_norm": 1.2526564209820548,
+      "learning_rate": 0.003,
+      "loss": 4.0081,
+      "step": 17010
+    },
+    {
+      "epoch": 0.17011,
+      "grad_norm": 1.1844228990235934,
+      "learning_rate": 0.003,
+      "loss": 4.0084,
+      "step": 17011
+    },
+    {
+      "epoch": 0.17012,
+      "grad_norm": 1.1039154276178442,
+      "learning_rate": 0.003,
+      "loss": 4.0255,
+      "step": 17012
+    },
+    {
+      "epoch": 0.17013,
+      "grad_norm": 1.54947273181041,
+      "learning_rate": 0.003,
+      "loss": 4.0122,
+      "step": 17013
+    },
+    {
+      "epoch": 0.17014,
+      "grad_norm": 1.228717172082972,
+      "learning_rate": 0.003,
+      "loss": 3.9951,
+      "step": 17014
+    },
+    {
+      "epoch": 0.17015,
+      "grad_norm": 1.4232491508453435,
+      "learning_rate": 0.003,
+      "loss": 4.0072,
+      "step": 17015
+    },
+    {
+      "epoch": 0.17016,
+      "grad_norm": 1.330801365943393,
+      "learning_rate": 0.003,
+      "loss": 4.0023,
+      "step": 17016
+    },
+    {
+      "epoch": 0.17017,
+      "grad_norm": 1.1359725998637589,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 17017
+    },
+    {
+      "epoch": 0.17018,
+      "grad_norm": 1.2227729610468783,
+      "learning_rate": 0.003,
+      "loss": 4.0286,
+      "step": 17018
+    },
+    {
+      "epoch": 0.17019,
+      "grad_norm": 1.1216839963037502,
+      "learning_rate": 0.003,
+      "loss": 3.9893,
+      "step": 17019
+    },
+    {
+      "epoch": 0.1702,
+      "grad_norm": 1.4395486441580458,
+      "learning_rate": 0.003,
+      "loss": 4.0169,
+      "step": 17020
+    },
+    {
+      "epoch": 0.17021,
+      "grad_norm": 1.457913775333108,
+      "learning_rate": 0.003,
+      "loss": 4.005,
+      "step": 17021
+    },
+    {
+      "epoch": 0.17022,
+      "grad_norm": 1.2730261991335328,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 17022
+    },
+    {
+      "epoch": 0.17023,
+      "grad_norm": 1.309025191561289,
+      "learning_rate": 0.003,
+      "loss": 4.0186,
+      "step": 17023
+    },
+    {
+      "epoch": 0.17024,
+      "grad_norm": 1.222707520022976,
+      "learning_rate": 0.003,
+      "loss": 4.0008,
+      "step": 17024
+    },
+    {
+      "epoch": 0.17025,
+      "grad_norm": 1.3516155725992163,
+      "learning_rate": 0.003,
+      "loss": 3.9796,
+      "step": 17025
+    },
+    {
+      "epoch": 0.17026,
+      "grad_norm": 1.1329837729671406,
+      "learning_rate": 0.003,
+      "loss": 3.9766,
+      "step": 17026
+    },
+    {
+      "epoch": 0.17027,
+      "grad_norm": 1.3633630624929551,
+      "learning_rate": 0.003,
+      "loss": 3.9968,
+      "step": 17027
+    },
+    {
+      "epoch": 0.17028,
+      "grad_norm": 1.1815339013729904,
+      "learning_rate": 0.003,
+      "loss": 4.0015,
+      "step": 17028
+    },
+    {
+      "epoch": 0.17029,
+      "grad_norm": 1.303776361036439,
+      "learning_rate": 0.003,
+      "loss": 4.031,
+      "step": 17029
+    },
+    {
+      "epoch": 0.1703,
+      "grad_norm": 1.1116647282472578,
+      "learning_rate": 0.003,
+      "loss": 3.9978,
+      "step": 17030
+    },
+    {
+      "epoch": 0.17031,
+      "grad_norm": 1.6837890162486113,
+      "learning_rate": 0.003,
+      "loss": 4.033,
+      "step": 17031
+    },
+    {
+      "epoch": 0.17032,
+      "grad_norm": 1.0973423997807117,
+      "learning_rate": 0.003,
+      "loss": 4.0029,
+      "step": 17032
+    },
+    {
+      "epoch": 0.17033,
+      "grad_norm": 1.4553041409424958,
+      "learning_rate": 0.003,
+      "loss": 4.0115,
+      "step": 17033
+    },
+    {
+      "epoch": 0.17034,
+      "grad_norm": 1.067326827134977,
+      "learning_rate": 0.003,
+      "loss": 4.0042,
+      "step": 17034
+    },
+    {
+      "epoch": 0.17035,
+      "grad_norm": 1.3964409257344448,
+      "learning_rate": 0.003,
+      "loss": 3.9908,
+      "step": 17035
+    },
+    {
+      "epoch": 0.17036,
+      "grad_norm": 1.26893825218419,
+      "learning_rate": 0.003,
+      "loss": 3.9993,
+      "step": 17036
+    },
+    {
+      "epoch": 0.17037,
+      "grad_norm": 1.1702590465709417,
+      "learning_rate": 0.003,
+      "loss": 4.0006,
+      "step": 17037
+    },
+    {
+      "epoch": 0.17038,
+      "grad_norm": 1.2759678360880577,
+      "learning_rate": 0.003,
+      "loss": 4.0107,
+      "step": 17038
+    },
+    {
+      "epoch": 0.17039,
+      "grad_norm": 1.2481788520067871,
+      "learning_rate": 0.003,
+      "loss": 4.0014,
+      "step": 17039
+    },
+    {
+      "epoch": 0.1704,
+      "grad_norm": 1.3008871738106516,
+      "learning_rate": 0.003,
+      "loss": 4.0364,
+      "step": 17040
+    },
+    {
+      "epoch": 0.17041,
+      "grad_norm": 0.993458540634306,
+      "learning_rate": 0.003,
+      "loss": 4.0096,
+      "step": 17041
+    },
+    {
+      "epoch": 0.17042,
+      "grad_norm": 1.6483842502616208,
+      "learning_rate": 0.003,
+      "loss": 4.0033,
+      "step": 17042
+    },
+    {
+      "epoch": 0.17043,
+      "grad_norm": 1.1823012906917088,
+      "learning_rate": 0.003,
+      "loss": 4.0021,
+      "step": 17043
+    },
+    {
+      "epoch": 0.17044,
+      "grad_norm": 1.2834132877499729,
+      "learning_rate": 0.003,
+      "loss": 4.0283,
+      "step": 17044
+    },
+    {
+      "epoch": 0.17045,
+      "grad_norm": 1.3017727577823928,
+      "learning_rate": 0.003,
+      "loss": 4.0277,
+      "step": 17045
+    },
+    {
+      "epoch": 0.17046,
+      "grad_norm": 1.2997819468279899,
+      "learning_rate": 0.003,
+      "loss": 4.0316,
+      "step": 17046
+    },
+    {
+      "epoch": 0.17047,
+      "grad_norm": 0.9669334493330405,
+      "learning_rate": 0.003,
+      "loss": 4.0164,
+      "step": 17047
+    },
+    {
+      "epoch": 0.17048,
+      "grad_norm": 1.1175094646396293,
+      "learning_rate": 0.003,
+      "loss": 4.0345,
+      "step": 17048
+    },
+    {
+      "epoch": 0.17049,
+      "grad_norm": 1.243380875464536,
+      "learning_rate": 0.003,
+      "loss": 3.985,
+      "step": 17049
+    },
+    {
+      "epoch": 0.1705,
+      "grad_norm": 1.2167661535688556,
+      "learning_rate": 0.003,
+      "loss": 4.005,
+      "step": 17050
+    },
+    {
+      "epoch": 0.17051,
+      "grad_norm": 1.4073761882344242,
+      "learning_rate": 0.003,
+      "loss": 4.0081,
+      "step": 17051
+    },
+    {
+      "epoch": 0.17052,
+      "grad_norm": 1.2079374224606234,
+      "learning_rate": 0.003,
+      "loss": 4.0339,
+      "step": 17052
+    },
+    {
+      "epoch": 0.17053,
+      "grad_norm": 1.1232919573416105,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 17053
+    },
+    {
+      "epoch": 0.17054,
+      "grad_norm": 1.6895509107983848,
+      "learning_rate": 0.003,
+      "loss": 4.0552,
+      "step": 17054
+    },
+    {
+      "epoch": 0.17055,
+      "grad_norm": 1.1542580858819775,
+      "learning_rate": 0.003,
+      "loss": 4.0323,
+      "step": 17055
+    },
+    {
+      "epoch": 0.17056,
+      "grad_norm": 1.208307073301473,
+      "learning_rate": 0.003,
+      "loss": 4.0132,
+      "step": 17056
+    },
+    {
+      "epoch": 0.17057,
+      "grad_norm": 1.3242158599574259,
+      "learning_rate": 0.003,
+      "loss": 3.9957,
+      "step": 17057
+    },
+    {
+      "epoch": 0.17058,
+      "grad_norm": 1.3255330368926068,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 17058
+    },
+    {
+      "epoch": 0.17059,
+      "grad_norm": 1.1849168511846713,
+      "learning_rate": 0.003,
+      "loss": 3.9829,
+      "step": 17059
+    },
+    {
+      "epoch": 0.1706,
+      "grad_norm": 1.3666009882279797,
+      "learning_rate": 0.003,
+      "loss": 4.0134,
+      "step": 17060
+    },
+    {
+      "epoch": 0.17061,
+      "grad_norm": 1.3262779207828668,
+      "learning_rate": 0.003,
+      "loss": 4.0328,
+      "step": 17061
+    },
+    {
+      "epoch": 0.17062,
+      "grad_norm": 1.0173614017135213,
+      "learning_rate": 0.003,
+      "loss": 3.9673,
+      "step": 17062
+    },
+    {
+      "epoch": 0.17063,
+      "grad_norm": 1.2363760775920116,
+      "learning_rate": 0.003,
+      "loss": 4.0004,
+      "step": 17063
+    },
+    {
+      "epoch": 0.17064,
+      "grad_norm": 1.0837262558950076,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 17064
+    },
+    {
+      "epoch": 0.17065,
+      "grad_norm": 1.3559097648251028,
+      "learning_rate": 0.003,
+      "loss": 4.0139,
+      "step": 17065
+    },
+    {
+      "epoch": 0.17066,
+      "grad_norm": 1.1819821915593431,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 17066
+    },
+    {
+      "epoch": 0.17067,
+      "grad_norm": 1.386641191079537,
+      "learning_rate": 0.003,
+      "loss": 4.0327,
+      "step": 17067
+    },
+    {
+      "epoch": 0.17068,
+      "grad_norm": 1.1292876769780475,
+      "learning_rate": 0.003,
+      "loss": 4.0285,
+      "step": 17068
+    },
+    {
+      "epoch": 0.17069,
+      "grad_norm": 1.3903157711241252,
+      "learning_rate": 0.003,
+      "loss": 4.0406,
+      "step": 17069
+    },
+    {
+      "epoch": 0.1707,
+      "grad_norm": 1.136373967026405,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 17070
+    },
+    {
+      "epoch": 0.17071,
+      "grad_norm": 1.501179003007741,
+      "learning_rate": 0.003,
+      "loss": 4.0049,
+      "step": 17071
+    },
+    {
+      "epoch": 0.17072,
+      "grad_norm": 0.9506830073519514,
+      "learning_rate": 0.003,
+      "loss": 4.0037,
+      "step": 17072
+    },
+    {
+      "epoch": 0.17073,
+      "grad_norm": 1.3292716085551004,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 17073
+    },
+    {
+      "epoch": 0.17074,
+      "grad_norm": 1.3345983635658956,
+      "learning_rate": 0.003,
+      "loss": 4.0298,
+      "step": 17074
+    },
+    {
+      "epoch": 0.17075,
+      "grad_norm": 1.390323079016144,
+      "learning_rate": 0.003,
+      "loss": 4.0163,
+      "step": 17075
+    },
+    {
+      "epoch": 0.17076,
+      "grad_norm": 1.3772049791967669,
+      "learning_rate": 0.003,
+      "loss": 4.0027,
+      "step": 17076
+    },
+    {
+      "epoch": 0.17077,
+      "grad_norm": 1.3685662444124322,
+      "learning_rate": 0.003,
+      "loss": 4.0334,
+      "step": 17077
+    },
+    {
+      "epoch": 0.17078,
+      "grad_norm": 1.2048938743876443,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 17078
+    },
+    {
+      "epoch": 0.17079,
+      "grad_norm": 1.2550198188663826,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 17079
+    },
+    {
+      "epoch": 0.1708,
+      "grad_norm": 1.2509248960232697,
+      "learning_rate": 0.003,
+      "loss": 4.0,
+      "step": 17080
+    },
+    {
+      "epoch": 0.17081,
+      "grad_norm": 1.3155347659383203,
+      "learning_rate": 0.003,
+      "loss": 4.0042,
+      "step": 17081
+    },
+    {
+      "epoch": 0.17082,
+      "grad_norm": 1.2297799179188096,
+      "learning_rate": 0.003,
+      "loss": 4.0481,
+      "step": 17082
+    },
+    {
+      "epoch": 0.17083,
+      "grad_norm": 1.2401252434978838,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 17083
+    },
+    {
+      "epoch": 0.17084,
+      "grad_norm": 1.119392793832822,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 17084
+    },
+    {
+      "epoch": 0.17085,
+      "grad_norm": 1.1671328595938733,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 17085
+    },
+    {
+      "epoch": 0.17086,
+      "grad_norm": 1.384826264679687,
+      "learning_rate": 0.003,
+      "loss": 4.0032,
+      "step": 17086
+    },
+    {
+      "epoch": 0.17087,
+      "grad_norm": 1.5263313659422078,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 17087
+    },
+    {
+      "epoch": 0.17088,
+      "grad_norm": 0.9182959568931038,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 17088
+    },
+    {
+      "epoch": 0.17089,
+      "grad_norm": 1.284208012928188,
+      "learning_rate": 0.003,
+      "loss": 4.0018,
+      "step": 17089
+    },
+    {
+      "epoch": 0.1709,
+      "grad_norm": 1.2661764940608917,
+      "learning_rate": 0.003,
+      "loss": 4.005,
+      "step": 17090
+    },
+    {
+      "epoch": 0.17091,
+      "grad_norm": 1.280902038637925,
+      "learning_rate": 0.003,
+      "loss": 4.0402,
+      "step": 17091
+    },
+    {
+      "epoch": 0.17092,
+      "grad_norm": 1.382288880310461,
+      "learning_rate": 0.003,
+      "loss": 3.9877,
+      "step": 17092
+    },
+    {
+      "epoch": 0.17093,
+      "grad_norm": 1.1166438612003322,
+      "learning_rate": 0.003,
+      "loss": 4.0043,
+      "step": 17093
+    },
+    {
+      "epoch": 0.17094,
+      "grad_norm": 1.5879639454360261,
+      "learning_rate": 0.003,
+      "loss": 4.0281,
+      "step": 17094
+    },
+    {
+      "epoch": 0.17095,
+      "grad_norm": 1.242611008549483,
+      "learning_rate": 0.003,
+      "loss": 3.9909,
+      "step": 17095
+    },
+    {
+      "epoch": 0.17096,
+      "grad_norm": 1.3686366406113646,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 17096
+    },
+    {
+      "epoch": 0.17097,
+      "grad_norm": 1.2406692703157454,
+      "learning_rate": 0.003,
+      "loss": 3.9945,
+      "step": 17097
+    },
+    {
+      "epoch": 0.17098,
+      "grad_norm": 1.3392783702470064,
+      "learning_rate": 0.003,
+      "loss": 4.0351,
+      "step": 17098
+    },
+    {
+      "epoch": 0.17099,
+      "grad_norm": 1.3207583531649192,
+      "learning_rate": 0.003,
+      "loss": 3.9842,
+      "step": 17099
+    },
+    {
+      "epoch": 0.171,
+      "grad_norm": 0.9115212843188456,
+      "learning_rate": 0.003,
+      "loss": 3.9994,
+      "step": 17100
+    },
+    {
+      "epoch": 0.17101,
+      "grad_norm": 1.4207831207188837,
+      "learning_rate": 0.003,
+      "loss": 4.0026,
+      "step": 17101
+    },
+    {
+      "epoch": 0.17102,
+      "grad_norm": 1.0106793121437063,
+      "learning_rate": 0.003,
+      "loss": 3.9955,
+      "step": 17102
+    },
+    {
+      "epoch": 0.17103,
+      "grad_norm": 1.4870568757014018,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 17103
+    },
+    {
+      "epoch": 0.17104,
+      "grad_norm": 1.1297006257174922,
+      "learning_rate": 0.003,
+      "loss": 4.0074,
+      "step": 17104
+    },
+    {
+      "epoch": 0.17105,
+      "grad_norm": 1.4574497121201375,
+      "learning_rate": 0.003,
+      "loss": 3.9996,
+      "step": 17105
+    },
+    {
+      "epoch": 0.17106,
+      "grad_norm": 1.279203515550617,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 17106
+    },
+    {
+      "epoch": 0.17107,
+      "grad_norm": 1.120641556247558,
+      "learning_rate": 0.003,
+      "loss": 4.0306,
+      "step": 17107
+    },
+    {
+      "epoch": 0.17108,
+      "grad_norm": 1.3611441573055538,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 17108
+    },
+    {
+      "epoch": 0.17109,
+      "grad_norm": 1.1597597254319019,
+      "learning_rate": 0.003,
+      "loss": 4.0021,
+      "step": 17109
+    },
+    {
+      "epoch": 0.1711,
+      "grad_norm": 1.623200123412091,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 17110
+    },
+    {
+      "epoch": 0.17111,
+      "grad_norm": 1.0573921747614077,
+      "learning_rate": 0.003,
+      "loss": 3.9992,
+      "step": 17111
+    },
+    {
+      "epoch": 0.17112,
+      "grad_norm": 1.3914494160558084,
+      "learning_rate": 0.003,
+      "loss": 3.9978,
+      "step": 17112
+    },
+    {
+      "epoch": 0.17113,
+      "grad_norm": 1.0537510192201738,
+      "learning_rate": 0.003,
+      "loss": 3.9937,
+      "step": 17113
+    },
+    {
+      "epoch": 0.17114,
+      "grad_norm": 1.4343479118895226,
+      "learning_rate": 0.003,
+      "loss": 4.0123,
+      "step": 17114
+    },
+    {
+      "epoch": 0.17115,
+      "grad_norm": 1.0597295689465882,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 17115
+    },
+    {
+      "epoch": 0.17116,
+      "grad_norm": 1.4008441502145679,
+      "learning_rate": 0.003,
+      "loss": 4.0098,
+      "step": 17116
+    },
+    {
+      "epoch": 0.17117,
+      "grad_norm": 1.1533115489494603,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 17117
+    },
+    {
+      "epoch": 0.17118,
+      "grad_norm": 1.3389173282580609,
+      "learning_rate": 0.003,
+      "loss": 4.0119,
+      "step": 17118
+    },
+    {
+      "epoch": 0.17119,
+      "grad_norm": 1.0763942311886536,
+      "learning_rate": 0.003,
+      "loss": 4.0211,
+      "step": 17119
+    },
+    {
+      "epoch": 0.1712,
+      "grad_norm": 1.3857008464880827,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 17120
+    },
+    {
+      "epoch": 0.17121,
+      "grad_norm": 1.1795518489267065,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 17121
+    },
+    {
+      "epoch": 0.17122,
+      "grad_norm": 1.2837531808444262,
+      "learning_rate": 0.003,
+      "loss": 4.0004,
+      "step": 17122
+    },
+    {
+      "epoch": 0.17123,
+      "grad_norm": 1.213157552417634,
+      "learning_rate": 0.003,
+      "loss": 4.0012,
+      "step": 17123
+    },
+    {
+      "epoch": 0.17124,
+      "grad_norm": 1.1134858549742377,
+      "learning_rate": 0.003,
+      "loss": 3.9719,
+      "step": 17124
+    },
+    {
+      "epoch": 0.17125,
+      "grad_norm": 1.256476065981997,
+      "learning_rate": 0.003,
+      "loss": 4.0479,
+      "step": 17125
+    },
+    {
+      "epoch": 0.17126,
+      "grad_norm": 1.2994744510899439,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 17126
+    },
+    {
+      "epoch": 0.17127,
+      "grad_norm": 1.3261576115703024,
+      "learning_rate": 0.003,
+      "loss": 4.0212,
+      "step": 17127
+    },
+    {
+      "epoch": 0.17128,
+      "grad_norm": 1.509759654632934,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 17128
+    },
+    {
+      "epoch": 0.17129,
+      "grad_norm": 1.157129471144684,
+      "learning_rate": 0.003,
+      "loss": 3.9915,
+      "step": 17129
+    },
+    {
+      "epoch": 0.1713,
+      "grad_norm": 1.3506595690549228,
+      "learning_rate": 0.003,
+      "loss": 4.0089,
+      "step": 17130
+    },
+    {
+      "epoch": 0.17131,
+      "grad_norm": 1.1810803692557885,
+      "learning_rate": 0.003,
+      "loss": 3.9991,
+      "step": 17131
+    },
+    {
+      "epoch": 0.17132,
+      "grad_norm": 1.198309547444243,
+      "learning_rate": 0.003,
+      "loss": 4.0026,
+      "step": 17132
+    },
+    {
+      "epoch": 0.17133,
+      "grad_norm": 1.2633420447354877,
+      "learning_rate": 0.003,
+      "loss": 4.0461,
+      "step": 17133
+    },
+    {
+      "epoch": 0.17134,
+      "grad_norm": 1.2609231995929508,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 17134
+    },
+    {
+      "epoch": 0.17135,
+      "grad_norm": 1.0653434516998657,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 17135
+    },
+    {
+      "epoch": 0.17136,
+      "grad_norm": 1.3862357041857178,
+      "learning_rate": 0.003,
+      "loss": 4.0058,
+      "step": 17136
+    },
+    {
+      "epoch": 0.17137,
+      "grad_norm": 1.1175033017808103,
+      "learning_rate": 0.003,
+      "loss": 4.015,
+      "step": 17137
+    },
+    {
+      "epoch": 0.17138,
+      "grad_norm": 1.5237743028823334,
+      "learning_rate": 0.003,
+      "loss": 4.0142,
+      "step": 17138
+    },
+    {
+      "epoch": 0.17139,
+      "grad_norm": 1.175540536081767,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 17139
+    },
+    {
+      "epoch": 0.1714,
+      "grad_norm": 1.6728633199713923,
+      "learning_rate": 0.003,
+      "loss": 4.0468,
+      "step": 17140
+    },
+    {
+      "epoch": 0.17141,
+      "grad_norm": 1.308263822363612,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 17141
+    },
+    {
+      "epoch": 0.17142,
+      "grad_norm": 1.1339564662254087,
+      "learning_rate": 0.003,
+      "loss": 3.984,
+      "step": 17142
+    },
+    {
+      "epoch": 0.17143,
+      "grad_norm": 1.2650050455155888,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 17143
+    },
+    {
+      "epoch": 0.17144,
+      "grad_norm": 1.163011141852609,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 17144
+    },
+    {
+      "epoch": 0.17145,
+      "grad_norm": 1.5102254688904788,
+      "learning_rate": 0.003,
+      "loss": 4.0012,
+      "step": 17145
+    },
+    {
+      "epoch": 0.17146,
+      "grad_norm": 1.2037448878305657,
+      "learning_rate": 0.003,
+      "loss": 4.0132,
+      "step": 17146
+    },
+    {
+      "epoch": 0.17147,
+      "grad_norm": 1.5557240481772199,
+      "learning_rate": 0.003,
+      "loss": 4.0013,
+      "step": 17147
+    },
+    {
+      "epoch": 0.17148,
+      "grad_norm": 1.0974950524747786,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 17148
+    },
+    {
+      "epoch": 0.17149,
+      "grad_norm": 1.3844767753715188,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 17149
+    },
+    {
+      "epoch": 0.1715,
+      "grad_norm": 1.319276395045874,
+      "learning_rate": 0.003,
+      "loss": 4.0134,
+      "step": 17150
+    },
+    {
+      "epoch": 0.17151,
+      "grad_norm": 1.026204186557338,
+      "learning_rate": 0.003,
+      "loss": 4.0153,
+      "step": 17151
+    },
+    {
+      "epoch": 0.17152,
+      "grad_norm": 1.2994904639201104,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 17152
+    },
+    {
+      "epoch": 0.17153,
+      "grad_norm": 1.1432778565248294,
+      "learning_rate": 0.003,
+      "loss": 4.0092,
+      "step": 17153
+    },
+    {
+      "epoch": 0.17154,
+      "grad_norm": 1.3697557622494088,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 17154
+    },
+    {
+      "epoch": 0.17155,
+      "grad_norm": 0.9894500157329774,
+      "learning_rate": 0.003,
+      "loss": 4.0092,
+      "step": 17155
+    },
+    {
+      "epoch": 0.17156,
+      "grad_norm": 1.3056644822045587,
+      "learning_rate": 0.003,
+      "loss": 4.0425,
+      "step": 17156
+    },
+    {
+      "epoch": 0.17157,
+      "grad_norm": 1.282287938433088,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 17157
+    },
+    {
+      "epoch": 0.17158,
+      "grad_norm": 1.5061809099266898,
+      "learning_rate": 0.003,
+      "loss": 3.9978,
+      "step": 17158
+    },
+    {
+      "epoch": 0.17159,
+      "grad_norm": 1.1371383950913954,
+      "learning_rate": 0.003,
+      "loss": 4.03,
+      "step": 17159
+    },
+    {
+      "epoch": 0.1716,
+      "grad_norm": 1.1832037804357762,
+      "learning_rate": 0.003,
+      "loss": 4.0057,
+      "step": 17160
+    },
+    {
+      "epoch": 0.17161,
+      "grad_norm": 1.2545340918402512,
+      "learning_rate": 0.003,
+      "loss": 4.0453,
+      "step": 17161
+    },
+    {
+      "epoch": 0.17162,
+      "grad_norm": 1.250357599105719,
+      "learning_rate": 0.003,
+      "loss": 4.0422,
+      "step": 17162
+    },
+    {
+      "epoch": 0.17163,
+      "grad_norm": 1.4133447120518978,
+      "learning_rate": 0.003,
+      "loss": 3.9942,
+      "step": 17163
+    },
+    {
+      "epoch": 0.17164,
+      "grad_norm": 1.1332442236888076,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 17164
+    },
+    {
+      "epoch": 0.17165,
+      "grad_norm": 1.5546016808686114,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 17165
+    },
+    {
+      "epoch": 0.17166,
+      "grad_norm": 1.0412075628559774,
+      "learning_rate": 0.003,
+      "loss": 4.0173,
+      "step": 17166
+    },
+    {
+      "epoch": 0.17167,
+      "grad_norm": 1.4810738774004917,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 17167
+    },
+    {
+      "epoch": 0.17168,
+      "grad_norm": 1.0665666006787518,
+      "learning_rate": 0.003,
+      "loss": 3.9964,
+      "step": 17168
+    },
+    {
+      "epoch": 0.17169,
+      "grad_norm": 1.3356993977397127,
+      "learning_rate": 0.003,
+      "loss": 4.0374,
+      "step": 17169
+    },
+    {
+      "epoch": 0.1717,
+      "grad_norm": 1.35007975105471,
+      "learning_rate": 0.003,
+      "loss": 4.0239,
+      "step": 17170
+    },
+    {
+      "epoch": 0.17171,
+      "grad_norm": 1.2221539025536585,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 17171
+    },
+    {
+      "epoch": 0.17172,
+      "grad_norm": 1.2277033051888775,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 17172
+    },
+    {
+      "epoch": 0.17173,
+      "grad_norm": 1.2042206000526587,
+      "learning_rate": 0.003,
+      "loss": 3.9814,
+      "step": 17173
+    },
+    {
+      "epoch": 0.17174,
+      "grad_norm": 1.2356537217268861,
+      "learning_rate": 0.003,
+      "loss": 4.0332,
+      "step": 17174
+    },
+    {
+      "epoch": 0.17175,
+      "grad_norm": 1.2937051183249795,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 17175
+    },
+    {
+      "epoch": 0.17176,
+      "grad_norm": 1.1750530246637987,
+      "learning_rate": 0.003,
+      "loss": 4.0096,
+      "step": 17176
+    },
+    {
+      "epoch": 0.17177,
+      "grad_norm": 1.1435867677060834,
+      "learning_rate": 0.003,
+      "loss": 3.985,
+      "step": 17177
+    },
+    {
+      "epoch": 0.17178,
+      "grad_norm": 1.1244967318359802,
+      "learning_rate": 0.003,
+      "loss": 4.0121,
+      "step": 17178
+    },
+    {
+      "epoch": 0.17179,
+      "grad_norm": 1.4152304529583006,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 17179
+    },
+    {
+      "epoch": 0.1718,
+      "grad_norm": 1.1261904961191211,
+      "learning_rate": 0.003,
+      "loss": 4.0277,
+      "step": 17180
+    },
+    {
+      "epoch": 0.17181,
+      "grad_norm": 1.5335060142886194,
+      "learning_rate": 0.003,
+      "loss": 4.0052,
+      "step": 17181
+    },
+    {
+      "epoch": 0.17182,
+      "grad_norm": 1.2391843231048232,
+      "learning_rate": 0.003,
+      "loss": 3.9908,
+      "step": 17182
+    },
+    {
+      "epoch": 0.17183,
+      "grad_norm": 1.2088367421460817,
+      "learning_rate": 0.003,
+      "loss": 4.0126,
+      "step": 17183
+    },
+    {
+      "epoch": 0.17184,
+      "grad_norm": 1.267094285490875,
+      "learning_rate": 0.003,
+      "loss": 4.0246,
+      "step": 17184
+    },
+    {
+      "epoch": 0.17185,
+      "grad_norm": 1.0561147685321686,
+      "learning_rate": 0.003,
+      "loss": 3.9843,
+      "step": 17185
+    },
+    {
+      "epoch": 0.17186,
+      "grad_norm": 1.3648888221954985,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 17186
+    },
+    {
+      "epoch": 0.17187,
+      "grad_norm": 1.514129993387543,
+      "learning_rate": 0.003,
+      "loss": 4.031,
+      "step": 17187
+    },
+    {
+      "epoch": 0.17188,
+      "grad_norm": 1.096699626012865,
+      "learning_rate": 0.003,
+      "loss": 4.0372,
+      "step": 17188
+    },
+    {
+      "epoch": 0.17189,
+      "grad_norm": 1.4513064755452996,
+      "learning_rate": 0.003,
+      "loss": 3.9957,
+      "step": 17189
+    },
+    {
+      "epoch": 0.1719,
+      "grad_norm": 1.0785963548641389,
+      "learning_rate": 0.003,
+      "loss": 4.0022,
+      "step": 17190
+    },
+    {
+      "epoch": 0.17191,
+      "grad_norm": 1.3173078321818739,
+      "learning_rate": 0.003,
+      "loss": 4.0417,
+      "step": 17191
+    },
+    {
+      "epoch": 0.17192,
+      "grad_norm": 1.221197625265541,
+      "learning_rate": 0.003,
+      "loss": 4.0122,
+      "step": 17192
+    },
+    {
+      "epoch": 0.17193,
+      "grad_norm": 1.1767196120016654,
+      "learning_rate": 0.003,
+      "loss": 3.9994,
+      "step": 17193
+    },
+    {
+      "epoch": 0.17194,
+      "grad_norm": 1.2755338735482418,
+      "learning_rate": 0.003,
+      "loss": 4.001,
+      "step": 17194
+    },
+    {
+      "epoch": 0.17195,
+      "grad_norm": 1.1647728885087472,
+      "learning_rate": 0.003,
+      "loss": 3.9997,
+      "step": 17195
+    },
+    {
+      "epoch": 0.17196,
+      "grad_norm": 1.3491590116637575,
+      "learning_rate": 0.003,
+      "loss": 3.9903,
+      "step": 17196
+    },
+    {
+      "epoch": 0.17197,
+      "grad_norm": 1.0154907883507833,
+      "learning_rate": 0.003,
+      "loss": 3.9992,
+      "step": 17197
+    },
+    {
+      "epoch": 0.17198,
+      "grad_norm": 1.6514837011453636,
+      "learning_rate": 0.003,
+      "loss": 4.0632,
+      "step": 17198
+    },
+    {
+      "epoch": 0.17199,
+      "grad_norm": 1.2249600301431616,
+      "learning_rate": 0.003,
+      "loss": 4.019,
+      "step": 17199
+    },
+    {
+      "epoch": 0.172,
+      "grad_norm": 1.6335846575515012,
+      "learning_rate": 0.003,
+      "loss": 4.0325,
+      "step": 17200
+    },
+    {
+      "epoch": 0.17201,
+      "grad_norm": 0.970957332720887,
+      "learning_rate": 0.003,
+      "loss": 4.0061,
+      "step": 17201
+    },
+    {
+      "epoch": 0.17202,
+      "grad_norm": 1.4736329370686672,
+      "learning_rate": 0.003,
+      "loss": 3.9997,
+      "step": 17202
+    },
+    {
+      "epoch": 0.17203,
+      "grad_norm": 1.1642100779363067,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 17203
+    },
+    {
+      "epoch": 0.17204,
+      "grad_norm": 1.3738026949477042,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 17204
+    },
+    {
+      "epoch": 0.17205,
+      "grad_norm": 1.3653211359108215,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 17205
+    },
+    {
+      "epoch": 0.17206,
+      "grad_norm": 1.1810839310821635,
+      "learning_rate": 0.003,
+      "loss": 4.0292,
+      "step": 17206
+    },
+    {
+      "epoch": 0.17207,
+      "grad_norm": 1.2788381610917945,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 17207
+    },
+    {
+      "epoch": 0.17208,
+      "grad_norm": 1.343528758148611,
+      "learning_rate": 0.003,
+      "loss": 4.028,
+      "step": 17208
+    },
+    {
+      "epoch": 0.17209,
+      "grad_norm": 1.2044259761732348,
+      "learning_rate": 0.003,
+      "loss": 4.012,
+      "step": 17209
+    },
+    {
+      "epoch": 0.1721,
+      "grad_norm": 1.282205491394408,
+      "learning_rate": 0.003,
+      "loss": 3.9874,
+      "step": 17210
+    },
+    {
+      "epoch": 0.17211,
+      "grad_norm": 1.441980895914016,
+      "learning_rate": 0.003,
+      "loss": 4.0037,
+      "step": 17211
+    },
+    {
+      "epoch": 0.17212,
+      "grad_norm": 1.050487950193999,
+      "learning_rate": 0.003,
+      "loss": 4.0111,
+      "step": 17212
+    },
+    {
+      "epoch": 0.17213,
+      "grad_norm": 1.5050463177550548,
+      "learning_rate": 0.003,
+      "loss": 4.0401,
+      "step": 17213
+    },
+    {
+      "epoch": 0.17214,
+      "grad_norm": 1.1403063493006569,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 17214
+    },
+    {
+      "epoch": 0.17215,
+      "grad_norm": 1.3687090498085275,
+      "learning_rate": 0.003,
+      "loss": 4.0072,
+      "step": 17215
+    },
+    {
+      "epoch": 0.17216,
+      "grad_norm": 1.149331732564463,
+      "learning_rate": 0.003,
+      "loss": 3.9949,
+      "step": 17216
+    },
+    {
+      "epoch": 0.17217,
+      "grad_norm": 1.4725985013833562,
+      "learning_rate": 0.003,
+      "loss": 3.9797,
+      "step": 17217
+    },
+    {
+      "epoch": 0.17218,
+      "grad_norm": 1.1358280974247414,
+      "learning_rate": 0.003,
+      "loss": 3.9853,
+      "step": 17218
+    },
+    {
+      "epoch": 0.17219,
+      "grad_norm": 1.3384339930647065,
+      "learning_rate": 0.003,
+      "loss": 3.9885,
+      "step": 17219
+    },
+    {
+      "epoch": 0.1722,
+      "grad_norm": 1.0902169071833985,
+      "learning_rate": 0.003,
+      "loss": 3.9804,
+      "step": 17220
+    },
+    {
+      "epoch": 0.17221,
+      "grad_norm": 1.473353818398544,
+      "learning_rate": 0.003,
+      "loss": 4.0196,
+      "step": 17221
+    },
+    {
+      "epoch": 0.17222,
+      "grad_norm": 0.904154848913335,
+      "learning_rate": 0.003,
+      "loss": 4.0128,
+      "step": 17222
+    },
+    {
+      "epoch": 0.17223,
+      "grad_norm": 1.233038973013818,
+      "learning_rate": 0.003,
+      "loss": 3.9751,
+      "step": 17223
+    },
+    {
+      "epoch": 0.17224,
+      "grad_norm": 1.2218626090563223,
+      "learning_rate": 0.003,
+      "loss": 4.0409,
+      "step": 17224
+    },
+    {
+      "epoch": 0.17225,
+      "grad_norm": 1.5852766020655138,
+      "learning_rate": 0.003,
+      "loss": 4.0196,
+      "step": 17225
+    },
+    {
+      "epoch": 0.17226,
+      "grad_norm": 1.0144098208355075,
+      "learning_rate": 0.003,
+      "loss": 4.0378,
+      "step": 17226
+    },
+    {
+      "epoch": 0.17227,
+      "grad_norm": 1.6856679111747674,
+      "learning_rate": 0.003,
+      "loss": 4.0255,
+      "step": 17227
+    },
+    {
+      "epoch": 0.17228,
+      "grad_norm": 1.1674976534137596,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 17228
+    },
+    {
+      "epoch": 0.17229,
+      "grad_norm": 1.1846419527574505,
+      "learning_rate": 0.003,
+      "loss": 4.0252,
+      "step": 17229
+    },
+    {
+      "epoch": 0.1723,
+      "grad_norm": 1.3372635062550227,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 17230
+    },
+    {
+      "epoch": 0.17231,
+      "grad_norm": 1.4360328784576093,
+      "learning_rate": 0.003,
+      "loss": 4.0078,
+      "step": 17231
+    },
+    {
+      "epoch": 0.17232,
+      "grad_norm": 1.167948325233245,
+      "learning_rate": 0.003,
+      "loss": 4.0149,
+      "step": 17232
+    },
+    {
+      "epoch": 0.17233,
+      "grad_norm": 1.5103516999920048,
+      "learning_rate": 0.003,
+      "loss": 4.0116,
+      "step": 17233
+    },
+    {
+      "epoch": 0.17234,
+      "grad_norm": 1.1718884631666613,
+      "learning_rate": 0.003,
+      "loss": 4.0015,
+      "step": 17234
+    },
+    {
+      "epoch": 0.17235,
+      "grad_norm": 1.256899195330911,
+      "learning_rate": 0.003,
+      "loss": 3.9839,
+      "step": 17235
+    },
+    {
+      "epoch": 0.17236,
+      "grad_norm": 1.3231041800701269,
+      "learning_rate": 0.003,
+      "loss": 4.0429,
+      "step": 17236
+    },
+    {
+      "epoch": 0.17237,
+      "grad_norm": 1.2089819685493517,
+      "learning_rate": 0.003,
+      "loss": 4.0026,
+      "step": 17237
+    },
+    {
+      "epoch": 0.17238,
+      "grad_norm": 1.3938837032385776,
+      "learning_rate": 0.003,
+      "loss": 4.0432,
+      "step": 17238
+    },
+    {
+      "epoch": 0.17239,
+      "grad_norm": 1.0986323428112195,
+      "learning_rate": 0.003,
+      "loss": 4.0139,
+      "step": 17239
+    },
+    {
+      "epoch": 0.1724,
+      "grad_norm": 1.4926533238792659,
+      "learning_rate": 0.003,
+      "loss": 3.9638,
+      "step": 17240
+    },
+    {
+      "epoch": 0.17241,
+      "grad_norm": 0.9565770312520956,
+      "learning_rate": 0.003,
+      "loss": 4.025,
+      "step": 17241
+    },
+    {
+      "epoch": 0.17242,
+      "grad_norm": 1.4964969318330428,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 17242
+    },
+    {
+      "epoch": 0.17243,
+      "grad_norm": 1.078719932273665,
+      "learning_rate": 0.003,
+      "loss": 4.0053,
+      "step": 17243
+    },
+    {
+      "epoch": 0.17244,
+      "grad_norm": 1.4107399149627897,
+      "learning_rate": 0.003,
+      "loss": 4.0547,
+      "step": 17244
+    },
+    {
+      "epoch": 0.17245,
+      "grad_norm": 1.205665776851779,
+      "learning_rate": 0.003,
+      "loss": 4.0313,
+      "step": 17245
+    },
+    {
+      "epoch": 0.17246,
+      "grad_norm": 1.4098420085442245,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 17246
+    },
+    {
+      "epoch": 0.17247,
+      "grad_norm": 1.1738218195027936,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 17247
+    },
+    {
+      "epoch": 0.17248,
+      "grad_norm": 1.5534211984480004,
+      "learning_rate": 0.003,
+      "loss": 4.0417,
+      "step": 17248
+    },
+    {
+      "epoch": 0.17249,
+      "grad_norm": 1.1059230496993113,
+      "learning_rate": 0.003,
+      "loss": 3.9985,
+      "step": 17249
+    },
+    {
+      "epoch": 0.1725,
+      "grad_norm": 1.2068057270289108,
+      "learning_rate": 0.003,
+      "loss": 4.0391,
+      "step": 17250
+    },
+    {
+      "epoch": 0.17251,
+      "grad_norm": 1.3653391554834,
+      "learning_rate": 0.003,
+      "loss": 4.0157,
+      "step": 17251
+    },
+    {
+      "epoch": 0.17252,
+      "grad_norm": 0.9652249172910172,
+      "learning_rate": 0.003,
+      "loss": 4.0329,
+      "step": 17252
+    },
+    {
+      "epoch": 0.17253,
+      "grad_norm": 1.3883368862028067,
+      "learning_rate": 0.003,
+      "loss": 4.0078,
+      "step": 17253
+    },
+    {
+      "epoch": 0.17254,
+      "grad_norm": 1.1475381505515283,
+      "learning_rate": 0.003,
+      "loss": 3.9887,
+      "step": 17254
+    },
+    {
+      "epoch": 0.17255,
+      "grad_norm": 1.1818737148016185,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 17255
+    },
+    {
+      "epoch": 0.17256,
+      "grad_norm": 1.3480812015205677,
+      "learning_rate": 0.003,
+      "loss": 4.0196,
+      "step": 17256
+    },
+    {
+      "epoch": 0.17257,
+      "grad_norm": 1.3232272920505788,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 17257
+    },
+    {
+      "epoch": 0.17258,
+      "grad_norm": 1.5173980234137427,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 17258
+    },
+    {
+      "epoch": 0.17259,
+      "grad_norm": 1.2723554794602032,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 17259
+    },
+    {
+      "epoch": 0.1726,
+      "grad_norm": 1.1888805114386718,
+      "learning_rate": 0.003,
+      "loss": 4.0043,
+      "step": 17260
+    },
+    {
+      "epoch": 0.17261,
+      "grad_norm": 1.2247012755221713,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 17261
+    },
+    {
+      "epoch": 0.17262,
+      "grad_norm": 1.148262136354316,
+      "learning_rate": 0.003,
+      "loss": 4.0047,
+      "step": 17262
+    },
+    {
+      "epoch": 0.17263,
+      "grad_norm": 1.3775928289484927,
+      "learning_rate": 0.003,
+      "loss": 4.0249,
+      "step": 17263
+    },
+    {
+      "epoch": 0.17264,
+      "grad_norm": 1.1251068551429848,
+      "learning_rate": 0.003,
+      "loss": 3.9906,
+      "step": 17264
+    },
+    {
+      "epoch": 0.17265,
+      "grad_norm": 1.3824170647249685,
+      "learning_rate": 0.003,
+      "loss": 3.9905,
+      "step": 17265
+    },
+    {
+      "epoch": 0.17266,
+      "grad_norm": 0.871810097084379,
+      "learning_rate": 0.003,
+      "loss": 3.9883,
+      "step": 17266
+    },
+    {
+      "epoch": 0.17267,
+      "grad_norm": 0.9651045548460154,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 17267
+    },
+    {
+      "epoch": 0.17268,
+      "grad_norm": 1.2767085120092714,
+      "learning_rate": 0.003,
+      "loss": 4.0251,
+      "step": 17268
+    },
+    {
+      "epoch": 0.17269,
+      "grad_norm": 1.1721368271222345,
+      "learning_rate": 0.003,
+      "loss": 4.0061,
+      "step": 17269
+    },
+    {
+      "epoch": 0.1727,
+      "grad_norm": 1.1223776476237222,
+      "learning_rate": 0.003,
+      "loss": 3.9966,
+      "step": 17270
+    },
+    {
+      "epoch": 0.17271,
+      "grad_norm": 1.311417321913556,
+      "learning_rate": 0.003,
+      "loss": 4.0315,
+      "step": 17271
+    },
+    {
+      "epoch": 0.17272,
+      "grad_norm": 1.27141658246143,
+      "learning_rate": 0.003,
+      "loss": 4.0033,
+      "step": 17272
+    },
+    {
+      "epoch": 0.17273,
+      "grad_norm": 1.4113108900497515,
+      "learning_rate": 0.003,
+      "loss": 3.9977,
+      "step": 17273
+    },
+    {
+      "epoch": 0.17274,
+      "grad_norm": 1.3858642060179733,
+      "learning_rate": 0.003,
+      "loss": 4.0236,
+      "step": 17274
+    },
+    {
+      "epoch": 0.17275,
+      "grad_norm": 1.1843334908306122,
+      "learning_rate": 0.003,
+      "loss": 3.9792,
+      "step": 17275
+    },
+    {
+      "epoch": 0.17276,
+      "grad_norm": 1.331721774774919,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 17276
+    },
+    {
+      "epoch": 0.17277,
+      "grad_norm": 1.2843503356977932,
+      "learning_rate": 0.003,
+      "loss": 4.0182,
+      "step": 17277
+    },
+    {
+      "epoch": 0.17278,
+      "grad_norm": 1.1975980849144978,
+      "learning_rate": 0.003,
+      "loss": 3.9951,
+      "step": 17278
+    },
+    {
+      "epoch": 0.17279,
+      "grad_norm": 1.4014082743567955,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 17279
+    },
+    {
+      "epoch": 0.1728,
+      "grad_norm": 1.06715525384562,
+      "learning_rate": 0.003,
+      "loss": 3.9848,
+      "step": 17280
+    },
+    {
+      "epoch": 0.17281,
+      "grad_norm": 1.62746983560525,
+      "learning_rate": 0.003,
+      "loss": 3.9742,
+      "step": 17281
+    },
+    {
+      "epoch": 0.17282,
+      "grad_norm": 0.8849603978508457,
+      "learning_rate": 0.003,
+      "loss": 3.9886,
+      "step": 17282
+    },
+    {
+      "epoch": 0.17283,
+      "grad_norm": 1.3812091147021786,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 17283
+    },
+    {
+      "epoch": 0.17284,
+      "grad_norm": 1.2985208598972184,
+      "learning_rate": 0.003,
+      "loss": 4.0112,
+      "step": 17284
+    },
+    {
+      "epoch": 0.17285,
+      "grad_norm": 1.228938514266029,
+      "learning_rate": 0.003,
+      "loss": 4.0254,
+      "step": 17285
+    },
+    {
+      "epoch": 0.17286,
+      "grad_norm": 1.3800775260071658,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 17286
+    },
+    {
+      "epoch": 0.17287,
+      "grad_norm": 1.1335184142844406,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 17287
+    },
+    {
+      "epoch": 0.17288,
+      "grad_norm": 1.6187836779866542,
+      "learning_rate": 0.003,
+      "loss": 4.0088,
+      "step": 17288
+    },
+    {
+      "epoch": 0.17289,
+      "grad_norm": 1.0880921795295901,
+      "learning_rate": 0.003,
+      "loss": 4.0184,
+      "step": 17289
+    },
+    {
+      "epoch": 0.1729,
+      "grad_norm": 1.3183469402744639,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 17290
+    },
+    {
+      "epoch": 0.17291,
+      "grad_norm": 1.1312108474649964,
+      "learning_rate": 0.003,
+      "loss": 4.0061,
+      "step": 17291
+    },
+    {
+      "epoch": 0.17292,
+      "grad_norm": 1.2583469188890024,
+      "learning_rate": 0.003,
+      "loss": 3.994,
+      "step": 17292
+    },
+    {
+      "epoch": 0.17293,
+      "grad_norm": 1.0036171406305894,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 17293
+    },
+    {
+      "epoch": 0.17294,
+      "grad_norm": 1.2717671981000425,
+      "learning_rate": 0.003,
+      "loss": 4.0215,
+      "step": 17294
+    },
+    {
+      "epoch": 0.17295,
+      "grad_norm": 1.3573026646066382,
+      "learning_rate": 0.003,
+      "loss": 3.9946,
+      "step": 17295
+    },
+    {
+      "epoch": 0.17296,
+      "grad_norm": 1.1595736297044072,
+      "learning_rate": 0.003,
+      "loss": 3.9922,
+      "step": 17296
+    },
+    {
+      "epoch": 0.17297,
+      "grad_norm": 1.3391544056038518,
+      "learning_rate": 0.003,
+      "loss": 4.0257,
+      "step": 17297
+    },
+    {
+      "epoch": 0.17298,
+      "grad_norm": 1.3052310544504753,
+      "learning_rate": 0.003,
+      "loss": 3.9946,
+      "step": 17298
+    },
+    {
+      "epoch": 0.17299,
+      "grad_norm": 1.3962524411100379,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 17299
+    },
+    {
+      "epoch": 0.173,
+      "grad_norm": 1.313060365360365,
+      "learning_rate": 0.003,
+      "loss": 4.0392,
+      "step": 17300
+    },
+    {
+      "epoch": 0.17301,
+      "grad_norm": 1.283124319072189,
+      "learning_rate": 0.003,
+      "loss": 4.0144,
+      "step": 17301
+    },
+    {
+      "epoch": 0.17302,
+      "grad_norm": 1.2925301300118133,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 17302
+    },
+    {
+      "epoch": 0.17303,
+      "grad_norm": 1.1853360077635136,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 17303
+    },
+    {
+      "epoch": 0.17304,
+      "grad_norm": 1.20723607947912,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 17304
+    },
+    {
+      "epoch": 0.17305,
+      "grad_norm": 1.3688554411898706,
+      "learning_rate": 0.003,
+      "loss": 3.9948,
+      "step": 17305
+    },
+    {
+      "epoch": 0.17306,
+      "grad_norm": 1.2597179043885396,
+      "learning_rate": 0.003,
+      "loss": 3.9871,
+      "step": 17306
+    },
+    {
+      "epoch": 0.17307,
+      "grad_norm": 1.3381299531733732,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 17307
+    },
+    {
+      "epoch": 0.17308,
+      "grad_norm": 1.0221588315818542,
+      "learning_rate": 0.003,
+      "loss": 4.0191,
+      "step": 17308
+    },
+    {
+      "epoch": 0.17309,
+      "grad_norm": 1.4406904812727543,
+      "learning_rate": 0.003,
+      "loss": 4.04,
+      "step": 17309
+    },
+    {
+      "epoch": 0.1731,
+      "grad_norm": 1.0715062331314942,
+      "learning_rate": 0.003,
+      "loss": 4.0331,
+      "step": 17310
+    },
+    {
+      "epoch": 0.17311,
+      "grad_norm": 1.4638372254172154,
+      "learning_rate": 0.003,
+      "loss": 4.0384,
+      "step": 17311
+    },
+    {
+      "epoch": 0.17312,
+      "grad_norm": 1.0568515584991716,
+      "learning_rate": 0.003,
+      "loss": 4.004,
+      "step": 17312
+    },
+    {
+      "epoch": 0.17313,
+      "grad_norm": 1.39433827191643,
+      "learning_rate": 0.003,
+      "loss": 4.0233,
+      "step": 17313
+    },
+    {
+      "epoch": 0.17314,
+      "grad_norm": 0.9855272687414648,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 17314
+    },
+    {
+      "epoch": 0.17315,
+      "grad_norm": 1.2762421632760321,
+      "learning_rate": 0.003,
+      "loss": 4.006,
+      "step": 17315
+    },
+    {
+      "epoch": 0.17316,
+      "grad_norm": 1.077189428086061,
+      "learning_rate": 0.003,
+      "loss": 3.9993,
+      "step": 17316
+    },
+    {
+      "epoch": 0.17317,
+      "grad_norm": 1.667953493782491,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 17317
+    },
+    {
+      "epoch": 0.17318,
+      "grad_norm": 1.3129642827230545,
+      "learning_rate": 0.003,
+      "loss": 4.0539,
+      "step": 17318
+    },
+    {
+      "epoch": 0.17319,
+      "grad_norm": 1.225283913272662,
+      "learning_rate": 0.003,
+      "loss": 3.9939,
+      "step": 17319
+    },
+    {
+      "epoch": 0.1732,
+      "grad_norm": 1.4212125329003817,
+      "learning_rate": 0.003,
+      "loss": 4.0069,
+      "step": 17320
+    },
+    {
+      "epoch": 0.17321,
+      "grad_norm": 1.3433281763166263,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 17321
+    },
+    {
+      "epoch": 0.17322,
+      "grad_norm": 1.2880076072785773,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 17322
+    },
+    {
+      "epoch": 0.17323,
+      "grad_norm": 1.2690146848678605,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 17323
+    },
+    {
+      "epoch": 0.17324,
+      "grad_norm": 1.1182734624267912,
+      "learning_rate": 0.003,
+      "loss": 4.0279,
+      "step": 17324
+    },
+    {
+      "epoch": 0.17325,
+      "grad_norm": 1.2846627438473532,
+      "learning_rate": 0.003,
+      "loss": 3.9869,
+      "step": 17325
+    },
+    {
+      "epoch": 0.17326,
+      "grad_norm": 1.4021564989948474,
+      "learning_rate": 0.003,
+      "loss": 3.9996,
+      "step": 17326
+    },
+    {
+      "epoch": 0.17327,
+      "grad_norm": 1.0692247719863897,
+      "learning_rate": 0.003,
+      "loss": 4.0418,
+      "step": 17327
+    },
+    {
+      "epoch": 0.17328,
+      "grad_norm": 1.3707062697557013,
+      "learning_rate": 0.003,
+      "loss": 3.9947,
+      "step": 17328
+    },
+    {
+      "epoch": 0.17329,
+      "grad_norm": 1.0903184574512028,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 17329
+    },
+    {
+      "epoch": 0.1733,
+      "grad_norm": 1.359809657985758,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 17330
+    },
+    {
+      "epoch": 0.17331,
+      "grad_norm": 1.276241963281705,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 17331
+    },
+    {
+      "epoch": 0.17332,
+      "grad_norm": 1.447065159401617,
+      "learning_rate": 0.003,
+      "loss": 4.0365,
+      "step": 17332
+    },
+    {
+      "epoch": 0.17333,
+      "grad_norm": 1.0959714650664985,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 17333
+    },
+    {
+      "epoch": 0.17334,
+      "grad_norm": 1.2519159045010237,
+      "learning_rate": 0.003,
+      "loss": 3.9873,
+      "step": 17334
+    },
+    {
+      "epoch": 0.17335,
+      "grad_norm": 1.6111332627475863,
+      "learning_rate": 0.003,
+      "loss": 4.0128,
+      "step": 17335
+    },
+    {
+      "epoch": 0.17336,
+      "grad_norm": 1.0054733076695719,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 17336
+    },
+    {
+      "epoch": 0.17337,
+      "grad_norm": 1.4945123114552656,
+      "learning_rate": 0.003,
+      "loss": 4.0334,
+      "step": 17337
+    },
+    {
+      "epoch": 0.17338,
+      "grad_norm": 1.0235002207606916,
+      "learning_rate": 0.003,
+      "loss": 3.974,
+      "step": 17338
+    },
+    {
+      "epoch": 0.17339,
+      "grad_norm": 1.4223634327208732,
+      "learning_rate": 0.003,
+      "loss": 4.0422,
+      "step": 17339
+    },
+    {
+      "epoch": 0.1734,
+      "grad_norm": 1.1891548110046788,
+      "learning_rate": 0.003,
+      "loss": 4.0413,
+      "step": 17340
+    },
+    {
+      "epoch": 0.17341,
+      "grad_norm": 1.2678570960019357,
+      "learning_rate": 0.003,
+      "loss": 3.997,
+      "step": 17341
+    },
+    {
+      "epoch": 0.17342,
+      "grad_norm": 1.1946809146193087,
+      "learning_rate": 0.003,
+      "loss": 3.9988,
+      "step": 17342
+    },
+    {
+      "epoch": 0.17343,
+      "grad_norm": 1.3417607658892123,
+      "learning_rate": 0.003,
+      "loss": 4.0339,
+      "step": 17343
+    },
+    {
+      "epoch": 0.17344,
+      "grad_norm": 1.279236075035203,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 17344
+    },
+    {
+      "epoch": 0.17345,
+      "grad_norm": 1.6873037813875194,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 17345
+    },
+    {
+      "epoch": 0.17346,
+      "grad_norm": 1.0906193772457748,
+      "learning_rate": 0.003,
+      "loss": 4.026,
+      "step": 17346
+    },
+    {
+      "epoch": 0.17347,
+      "grad_norm": 1.301669503890121,
+      "learning_rate": 0.003,
+      "loss": 3.9948,
+      "step": 17347
+    },
+    {
+      "epoch": 0.17348,
+      "grad_norm": 1.1201010663394788,
+      "learning_rate": 0.003,
+      "loss": 3.9998,
+      "step": 17348
+    },
+    {
+      "epoch": 0.17349,
+      "grad_norm": 1.2822773260180442,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 17349
+    },
+    {
+      "epoch": 0.1735,
+      "grad_norm": 1.269482955451877,
+      "learning_rate": 0.003,
+      "loss": 4.0053,
+      "step": 17350
+    },
+    {
+      "epoch": 0.17351,
+      "grad_norm": 1.2707305975348144,
+      "learning_rate": 0.003,
+      "loss": 4.0119,
+      "step": 17351
+    },
+    {
+      "epoch": 0.17352,
+      "grad_norm": 1.1494463357967402,
+      "learning_rate": 0.003,
+      "loss": 4.0104,
+      "step": 17352
+    },
+    {
+      "epoch": 0.17353,
+      "grad_norm": 1.0918376495855586,
+      "learning_rate": 0.003,
+      "loss": 3.9706,
+      "step": 17353
+    },
+    {
+      "epoch": 0.17354,
+      "grad_norm": 1.2566020425973183,
+      "learning_rate": 0.003,
+      "loss": 4.0308,
+      "step": 17354
+    },
+    {
+      "epoch": 0.17355,
+      "grad_norm": 1.2542980099734522,
+      "learning_rate": 0.003,
+      "loss": 4.0281,
+      "step": 17355
+    },
+    {
+      "epoch": 0.17356,
+      "grad_norm": 1.4794482403087317,
+      "learning_rate": 0.003,
+      "loss": 4.0324,
+      "step": 17356
+    },
+    {
+      "epoch": 0.17357,
+      "grad_norm": 1.1625413074001234,
+      "learning_rate": 0.003,
+      "loss": 4.0425,
+      "step": 17357
+    },
+    {
+      "epoch": 0.17358,
+      "grad_norm": 1.2878581349328948,
+      "learning_rate": 0.003,
+      "loss": 3.9907,
+      "step": 17358
+    },
+    {
+      "epoch": 0.17359,
+      "grad_norm": 1.1218207169793912,
+      "learning_rate": 0.003,
+      "loss": 3.9942,
+      "step": 17359
+    },
+    {
+      "epoch": 0.1736,
+      "grad_norm": 1.2486176787250105,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 17360
+    },
+    {
+      "epoch": 0.17361,
+      "grad_norm": 1.266343297913411,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 17361
+    },
+    {
+      "epoch": 0.17362,
+      "grad_norm": 1.479733028324367,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 17362
+    },
+    {
+      "epoch": 0.17363,
+      "grad_norm": 1.174357803273249,
+      "learning_rate": 0.003,
+      "loss": 4.0352,
+      "step": 17363
+    },
+    {
+      "epoch": 0.17364,
+      "grad_norm": 1.3289916018824397,
+      "learning_rate": 0.003,
+      "loss": 3.9763,
+      "step": 17364
+    },
+    {
+      "epoch": 0.17365,
+      "grad_norm": 1.2266454626377468,
+      "learning_rate": 0.003,
+      "loss": 3.9899,
+      "step": 17365
+    },
+    {
+      "epoch": 0.17366,
+      "grad_norm": 1.1761585123971194,
+      "learning_rate": 0.003,
+      "loss": 3.9928,
+      "step": 17366
+    },
+    {
+      "epoch": 0.17367,
+      "grad_norm": 1.3180785258166257,
+      "learning_rate": 0.003,
+      "loss": 3.9925,
+      "step": 17367
+    },
+    {
+      "epoch": 0.17368,
+      "grad_norm": 1.1512397103307213,
+      "learning_rate": 0.003,
+      "loss": 3.9785,
+      "step": 17368
+    },
+    {
+      "epoch": 0.17369,
+      "grad_norm": 1.3030786067405742,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 17369
+    },
+    {
+      "epoch": 0.1737,
+      "grad_norm": 1.4600400342286277,
+      "learning_rate": 0.003,
+      "loss": 4.0312,
+      "step": 17370
+    },
+    {
+      "epoch": 0.17371,
+      "grad_norm": 0.9789559321499843,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 17371
+    },
+    {
+      "epoch": 0.17372,
+      "grad_norm": 1.406150001021043,
+      "learning_rate": 0.003,
+      "loss": 4.0049,
+      "step": 17372
+    },
+    {
+      "epoch": 0.17373,
+      "grad_norm": 1.1831169481340245,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 17373
+    },
+    {
+      "epoch": 0.17374,
+      "grad_norm": 1.4770654324579267,
+      "learning_rate": 0.003,
+      "loss": 4.0442,
+      "step": 17374
+    },
+    {
+      "epoch": 0.17375,
+      "grad_norm": 1.1501125608044909,
+      "learning_rate": 0.003,
+      "loss": 4.0162,
+      "step": 17375
+    },
+    {
+      "epoch": 0.17376,
+      "grad_norm": 1.2110112945636147,
+      "learning_rate": 0.003,
+      "loss": 3.998,
+      "step": 17376
+    },
+    {
+      "epoch": 0.17377,
+      "grad_norm": 1.3738972981009299,
+      "learning_rate": 0.003,
+      "loss": 4.0128,
+      "step": 17377
+    },
+    {
+      "epoch": 0.17378,
+      "grad_norm": 1.117572575502471,
+      "learning_rate": 0.003,
+      "loss": 4.0093,
+      "step": 17378
+    },
+    {
+      "epoch": 0.17379,
+      "grad_norm": 1.3860507283902452,
+      "learning_rate": 0.003,
+      "loss": 3.9838,
+      "step": 17379
+    },
+    {
+      "epoch": 0.1738,
+      "grad_norm": 1.0779684375512073,
+      "learning_rate": 0.003,
+      "loss": 3.9692,
+      "step": 17380
+    },
+    {
+      "epoch": 0.17381,
+      "grad_norm": 1.5955300999519237,
+      "learning_rate": 0.003,
+      "loss": 4.0091,
+      "step": 17381
+    },
+    {
+      "epoch": 0.17382,
+      "grad_norm": 1.2292224660826188,
+      "learning_rate": 0.003,
+      "loss": 4.0425,
+      "step": 17382
+    },
+    {
+      "epoch": 0.17383,
+      "grad_norm": 1.3205397866165893,
+      "learning_rate": 0.003,
+      "loss": 3.9942,
+      "step": 17383
+    },
+    {
+      "epoch": 0.17384,
+      "grad_norm": 1.1634386312620115,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 17384
+    },
+    {
+      "epoch": 0.17385,
+      "grad_norm": 1.33842741762835,
+      "learning_rate": 0.003,
+      "loss": 4.0009,
+      "step": 17385
+    },
+    {
+      "epoch": 0.17386,
+      "grad_norm": 1.0875714130899277,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 17386
+    },
+    {
+      "epoch": 0.17387,
+      "grad_norm": 1.3582376011663713,
+      "learning_rate": 0.003,
+      "loss": 4.0142,
+      "step": 17387
+    },
+    {
+      "epoch": 0.17388,
+      "grad_norm": 1.1678374075460847,
+      "learning_rate": 0.003,
+      "loss": 4.0018,
+      "step": 17388
+    },
+    {
+      "epoch": 0.17389,
+      "grad_norm": 1.211558868854294,
+      "learning_rate": 0.003,
+      "loss": 4.0047,
+      "step": 17389
+    },
+    {
+      "epoch": 0.1739,
+      "grad_norm": 1.4528085360953462,
+      "learning_rate": 0.003,
+      "loss": 4.0132,
+      "step": 17390
+    },
+    {
+      "epoch": 0.17391,
+      "grad_norm": 1.119391271340527,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 17391
+    },
+    {
+      "epoch": 0.17392,
+      "grad_norm": 1.3227918665408323,
+      "learning_rate": 0.003,
+      "loss": 3.9999,
+      "step": 17392
+    },
+    {
+      "epoch": 0.17393,
+      "grad_norm": 0.9623420610117401,
+      "learning_rate": 0.003,
+      "loss": 4.0107,
+      "step": 17393
+    },
+    {
+      "epoch": 0.17394,
+      "grad_norm": 1.4037951528357682,
+      "learning_rate": 0.003,
+      "loss": 4.0251,
+      "step": 17394
+    },
+    {
+      "epoch": 0.17395,
+      "grad_norm": 1.058862036609257,
+      "learning_rate": 0.003,
+      "loss": 3.9868,
+      "step": 17395
+    },
+    {
+      "epoch": 0.17396,
+      "grad_norm": 1.5177044219698903,
+      "learning_rate": 0.003,
+      "loss": 3.9958,
+      "step": 17396
+    },
+    {
+      "epoch": 0.17397,
+      "grad_norm": 1.0067233592822795,
+      "learning_rate": 0.003,
+      "loss": 4.0141,
+      "step": 17397
+    },
+    {
+      "epoch": 0.17398,
+      "grad_norm": 1.440925491596526,
+      "learning_rate": 0.003,
+      "loss": 4.0528,
+      "step": 17398
+    },
+    {
+      "epoch": 0.17399,
+      "grad_norm": 1.1403915015967954,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 17399
+    },
+    {
+      "epoch": 0.174,
+      "grad_norm": 1.243404881400955,
+      "learning_rate": 0.003,
+      "loss": 4.0188,
+      "step": 17400
+    },
+    {
+      "epoch": 0.17401,
+      "grad_norm": 1.4799671525618545,
+      "learning_rate": 0.003,
+      "loss": 3.9953,
+      "step": 17401
+    },
+    {
+      "epoch": 0.17402,
+      "grad_norm": 1.2513926420361863,
+      "learning_rate": 0.003,
+      "loss": 4.007,
+      "step": 17402
+    },
+    {
+      "epoch": 0.17403,
+      "grad_norm": 1.4293955535980087,
+      "learning_rate": 0.003,
+      "loss": 4.0328,
+      "step": 17403
+    },
+    {
+      "epoch": 0.17404,
+      "grad_norm": 1.0335425855082576,
+      "learning_rate": 0.003,
+      "loss": 4.007,
+      "step": 17404
+    },
+    {
+      "epoch": 0.17405,
+      "grad_norm": 1.1741411409646767,
+      "learning_rate": 0.003,
+      "loss": 3.9978,
+      "step": 17405
+    },
+    {
+      "epoch": 0.17406,
+      "grad_norm": 1.3652937364359996,
+      "learning_rate": 0.003,
+      "loss": 4.0209,
+      "step": 17406
+    },
+    {
+      "epoch": 0.17407,
+      "grad_norm": 1.2124474143939583,
+      "learning_rate": 0.003,
+      "loss": 3.9806,
+      "step": 17407
+    },
+    {
+      "epoch": 0.17408,
+      "grad_norm": 1.565002645615215,
+      "learning_rate": 0.003,
+      "loss": 4.0408,
+      "step": 17408
+    },
+    {
+      "epoch": 0.17409,
+      "grad_norm": 1.239366136435791,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 17409
+    },
+    {
+      "epoch": 0.1741,
+      "grad_norm": 1.0812481404551977,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 17410
+    },
+    {
+      "epoch": 0.17411,
+      "grad_norm": 1.5086957512968893,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 17411
+    },
+    {
+      "epoch": 0.17412,
+      "grad_norm": 1.1652147415973524,
+      "learning_rate": 0.003,
+      "loss": 4.0104,
+      "step": 17412
+    },
+    {
+      "epoch": 0.17413,
+      "grad_norm": 1.1927128455337468,
+      "learning_rate": 0.003,
+      "loss": 3.9856,
+      "step": 17413
+    },
+    {
+      "epoch": 0.17414,
+      "grad_norm": 1.3528521306498578,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 17414
+    },
+    {
+      "epoch": 0.17415,
+      "grad_norm": 1.0960703425326428,
+      "learning_rate": 0.003,
+      "loss": 4.0258,
+      "step": 17415
+    },
+    {
+      "epoch": 0.17416,
+      "grad_norm": 1.4602162168314847,
+      "learning_rate": 0.003,
+      "loss": 4.0229,
+      "step": 17416
+    },
+    {
+      "epoch": 0.17417,
+      "grad_norm": 1.0368443511701424,
+      "learning_rate": 0.003,
+      "loss": 4.0111,
+      "step": 17417
+    },
+    {
+      "epoch": 0.17418,
+      "grad_norm": 1.6099977197910769,
+      "learning_rate": 0.003,
+      "loss": 4.0134,
+      "step": 17418
+    },
+    {
+      "epoch": 0.17419,
+      "grad_norm": 1.1972675032513773,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 17419
+    },
+    {
+      "epoch": 0.1742,
+      "grad_norm": 1.3184638228001029,
+      "learning_rate": 0.003,
+      "loss": 3.9909,
+      "step": 17420
+    },
+    {
+      "epoch": 0.17421,
+      "grad_norm": 1.176368382451766,
+      "learning_rate": 0.003,
+      "loss": 4.0141,
+      "step": 17421
+    },
+    {
+      "epoch": 0.17422,
+      "grad_norm": 1.093838392847663,
+      "learning_rate": 0.003,
+      "loss": 4.0053,
+      "step": 17422
+    },
+    {
+      "epoch": 0.17423,
+      "grad_norm": 1.3771944160059182,
+      "learning_rate": 0.003,
+      "loss": 4.0153,
+      "step": 17423
+    },
+    {
+      "epoch": 0.17424,
+      "grad_norm": 1.2426775725792576,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 17424
+    },
+    {
+      "epoch": 0.17425,
+      "grad_norm": 1.3163419783944523,
+      "learning_rate": 0.003,
+      "loss": 4.0228,
+      "step": 17425
+    },
+    {
+      "epoch": 0.17426,
+      "grad_norm": 0.97312063332047,
+      "learning_rate": 0.003,
+      "loss": 3.9947,
+      "step": 17426
+    },
+    {
+      "epoch": 0.17427,
+      "grad_norm": 1.463229691284514,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 17427
+    },
+    {
+      "epoch": 0.17428,
+      "grad_norm": 1.2014268675563655,
+      "learning_rate": 0.003,
+      "loss": 3.9919,
+      "step": 17428
+    },
+    {
+      "epoch": 0.17429,
+      "grad_norm": 1.2928611370309657,
+      "learning_rate": 0.003,
+      "loss": 4.0454,
+      "step": 17429
+    },
+    {
+      "epoch": 0.1743,
+      "grad_norm": 1.236739863097322,
+      "learning_rate": 0.003,
+      "loss": 3.9855,
+      "step": 17430
+    },
+    {
+      "epoch": 0.17431,
+      "grad_norm": 1.2581378071175113,
+      "learning_rate": 0.003,
+      "loss": 3.9789,
+      "step": 17431
+    },
+    {
+      "epoch": 0.17432,
+      "grad_norm": 1.2825682371586564,
+      "learning_rate": 0.003,
+      "loss": 4.0077,
+      "step": 17432
+    },
+    {
+      "epoch": 0.17433,
+      "grad_norm": 1.3083920982067194,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 17433
+    },
+    {
+      "epoch": 0.17434,
+      "grad_norm": 1.1835437320930857,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 17434
+    },
+    {
+      "epoch": 0.17435,
+      "grad_norm": 1.2615839587476694,
+      "learning_rate": 0.003,
+      "loss": 4.0281,
+      "step": 17435
+    },
+    {
+      "epoch": 0.17436,
+      "grad_norm": 1.3765442849644418,
+      "learning_rate": 0.003,
+      "loss": 3.9779,
+      "step": 17436
+    },
+    {
+      "epoch": 0.17437,
+      "grad_norm": 1.0665678127620197,
+      "learning_rate": 0.003,
+      "loss": 3.9974,
+      "step": 17437
+    },
+    {
+      "epoch": 0.17438,
+      "grad_norm": 1.6294484139660101,
+      "learning_rate": 0.003,
+      "loss": 4.0264,
+      "step": 17438
+    },
+    {
+      "epoch": 0.17439,
+      "grad_norm": 1.2135317553416847,
+      "learning_rate": 0.003,
+      "loss": 4.0187,
+      "step": 17439
+    },
+    {
+      "epoch": 0.1744,
+      "grad_norm": 1.3014420389996388,
+      "learning_rate": 0.003,
+      "loss": 4.0091,
+      "step": 17440
+    },
+    {
+      "epoch": 0.17441,
+      "grad_norm": 1.1441838771751138,
+      "learning_rate": 0.003,
+      "loss": 4.0017,
+      "step": 17441
+    },
+    {
+      "epoch": 0.17442,
+      "grad_norm": 1.5053710572281074,
+      "learning_rate": 0.003,
+      "loss": 4.0019,
+      "step": 17442
+    },
+    {
+      "epoch": 0.17443,
+      "grad_norm": 0.9045722850562977,
+      "learning_rate": 0.003,
+      "loss": 4.0109,
+      "step": 17443
+    },
+    {
+      "epoch": 0.17444,
+      "grad_norm": 1.154807886610162,
+      "learning_rate": 0.003,
+      "loss": 3.972,
+      "step": 17444
+    },
+    {
+      "epoch": 0.17445,
+      "grad_norm": 1.4581057868595215,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 17445
+    },
+    {
+      "epoch": 0.17446,
+      "grad_norm": 1.102586921173529,
+      "learning_rate": 0.003,
+      "loss": 4.0023,
+      "step": 17446
+    },
+    {
+      "epoch": 0.17447,
+      "grad_norm": 1.7142103095973686,
+      "learning_rate": 0.003,
+      "loss": 4.0091,
+      "step": 17447
+    },
+    {
+      "epoch": 0.17448,
+      "grad_norm": 1.0883226149353684,
+      "learning_rate": 0.003,
+      "loss": 4.0345,
+      "step": 17448
+    },
+    {
+      "epoch": 0.17449,
+      "grad_norm": 1.439175568795504,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 17449
+    },
+    {
+      "epoch": 0.1745,
+      "grad_norm": 0.9894201968795465,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 17450
+    },
+    {
+      "epoch": 0.17451,
+      "grad_norm": 1.1319851110611714,
+      "learning_rate": 0.003,
+      "loss": 3.9811,
+      "step": 17451
+    },
+    {
+      "epoch": 0.17452,
+      "grad_norm": 1.4101783964755321,
+      "learning_rate": 0.003,
+      "loss": 4.0244,
+      "step": 17452
+    },
+    {
+      "epoch": 0.17453,
+      "grad_norm": 1.1950631496714745,
+      "learning_rate": 0.003,
+      "loss": 4.0284,
+      "step": 17453
+    },
+    {
+      "epoch": 0.17454,
+      "grad_norm": 1.2678178704996652,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 17454
+    },
+    {
+      "epoch": 0.17455,
+      "grad_norm": 1.29183275761241,
+      "learning_rate": 0.003,
+      "loss": 4.0059,
+      "step": 17455
+    },
+    {
+      "epoch": 0.17456,
+      "grad_norm": 1.3880097914219025,
+      "learning_rate": 0.003,
+      "loss": 3.9989,
+      "step": 17456
+    },
+    {
+      "epoch": 0.17457,
+      "grad_norm": 1.4516062258836981,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 17457
+    },
+    {
+      "epoch": 0.17458,
+      "grad_norm": 1.0864160692418074,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 17458
+    },
+    {
+      "epoch": 0.17459,
+      "grad_norm": 1.1726149063068008,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 17459
+    },
+    {
+      "epoch": 0.1746,
+      "grad_norm": 1.0890544630370935,
+      "learning_rate": 0.003,
+      "loss": 4.0042,
+      "step": 17460
+    },
+    {
+      "epoch": 0.17461,
+      "grad_norm": 1.3667948040505313,
+      "learning_rate": 0.003,
+      "loss": 4.0126,
+      "step": 17461
+    },
+    {
+      "epoch": 0.17462,
+      "grad_norm": 1.1645584883877051,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 17462
+    },
+    {
+      "epoch": 0.17463,
+      "grad_norm": 1.503741330222054,
+      "learning_rate": 0.003,
+      "loss": 3.9951,
+      "step": 17463
+    },
+    {
+      "epoch": 0.17464,
+      "grad_norm": 1.0489494837596345,
+      "learning_rate": 0.003,
+      "loss": 4.0265,
+      "step": 17464
+    },
+    {
+      "epoch": 0.17465,
+      "grad_norm": 1.4163168678260742,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 17465
+    },
+    {
+      "epoch": 0.17466,
+      "grad_norm": 1.0905468833071308,
+      "learning_rate": 0.003,
+      "loss": 4.0007,
+      "step": 17466
+    },
+    {
+      "epoch": 0.17467,
+      "grad_norm": 1.561498753185741,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 17467
+    },
+    {
+      "epoch": 0.17468,
+      "grad_norm": 1.2436964093941147,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 17468
+    },
+    {
+      "epoch": 0.17469,
+      "grad_norm": 1.3442079014440735,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 17469
+    },
+    {
+      "epoch": 0.1747,
+      "grad_norm": 1.3412844922898968,
+      "learning_rate": 0.003,
+      "loss": 4.0397,
+      "step": 17470
+    },
+    {
+      "epoch": 0.17471,
+      "grad_norm": 1.5212025627883503,
+      "learning_rate": 0.003,
+      "loss": 4.0151,
+      "step": 17471
+    },
+    {
+      "epoch": 0.17472,
+      "grad_norm": 1.1359504116512613,
+      "learning_rate": 0.003,
+      "loss": 4.0276,
+      "step": 17472
+    },
+    {
+      "epoch": 0.17473,
+      "grad_norm": 1.189587217761058,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 17473
+    },
+    {
+      "epoch": 0.17474,
+      "grad_norm": 1.1467561240598758,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 17474
+    },
+    {
+      "epoch": 0.17475,
+      "grad_norm": 1.2190623306021742,
+      "learning_rate": 0.003,
+      "loss": 3.9944,
+      "step": 17475
+    },
+    {
+      "epoch": 0.17476,
+      "grad_norm": 1.2077612593166749,
+      "learning_rate": 0.003,
+      "loss": 4.0187,
+      "step": 17476
+    },
+    {
+      "epoch": 0.17477,
+      "grad_norm": 1.2805042241733537,
+      "learning_rate": 0.003,
+      "loss": 4.0009,
+      "step": 17477
+    },
+    {
+      "epoch": 0.17478,
+      "grad_norm": 1.1128725501592887,
+      "learning_rate": 0.003,
+      "loss": 3.9884,
+      "step": 17478
+    },
+    {
+      "epoch": 0.17479,
+      "grad_norm": 1.24886952802986,
+      "learning_rate": 0.003,
+      "loss": 3.9938,
+      "step": 17479
+    },
+    {
+      "epoch": 0.1748,
+      "grad_norm": 1.2882811283564244,
+      "learning_rate": 0.003,
+      "loss": 4.0162,
+      "step": 17480
+    },
+    {
+      "epoch": 0.17481,
+      "grad_norm": 1.3820476542413211,
+      "learning_rate": 0.003,
+      "loss": 3.9959,
+      "step": 17481
+    },
+    {
+      "epoch": 0.17482,
+      "grad_norm": 1.4295014325350368,
+      "learning_rate": 0.003,
+      "loss": 3.9848,
+      "step": 17482
+    },
+    {
+      "epoch": 0.17483,
+      "grad_norm": 1.239118088638119,
+      "learning_rate": 0.003,
+      "loss": 4.0171,
+      "step": 17483
+    },
+    {
+      "epoch": 0.17484,
+      "grad_norm": 1.2489244256582073,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 17484
+    },
+    {
+      "epoch": 0.17485,
+      "grad_norm": 1.2527953693531593,
+      "learning_rate": 0.003,
+      "loss": 4.0059,
+      "step": 17485
+    },
+    {
+      "epoch": 0.17486,
+      "grad_norm": 1.2442497188525592,
+      "learning_rate": 0.003,
+      "loss": 4.0384,
+      "step": 17486
+    },
+    {
+      "epoch": 0.17487,
+      "grad_norm": 1.2567200106459853,
+      "learning_rate": 0.003,
+      "loss": 3.9844,
+      "step": 17487
+    },
+    {
+      "epoch": 0.17488,
+      "grad_norm": 1.3224776601648938,
+      "learning_rate": 0.003,
+      "loss": 4.0028,
+      "step": 17488
+    },
+    {
+      "epoch": 0.17489,
+      "grad_norm": 1.0544687569406892,
+      "learning_rate": 0.003,
+      "loss": 3.987,
+      "step": 17489
+    },
+    {
+      "epoch": 0.1749,
+      "grad_norm": 1.3644368776681581,
+      "learning_rate": 0.003,
+      "loss": 3.9898,
+      "step": 17490
+    },
+    {
+      "epoch": 0.17491,
+      "grad_norm": 1.170964249779947,
+      "learning_rate": 0.003,
+      "loss": 3.9842,
+      "step": 17491
+    },
+    {
+      "epoch": 0.17492,
+      "grad_norm": 1.570199409081135,
+      "learning_rate": 0.003,
+      "loss": 3.9885,
+      "step": 17492
+    },
+    {
+      "epoch": 0.17493,
+      "grad_norm": 1.1791374142942461,
+      "learning_rate": 0.003,
+      "loss": 4.015,
+      "step": 17493
+    },
+    {
+      "epoch": 0.17494,
+      "grad_norm": 1.4209200136606746,
+      "learning_rate": 0.003,
+      "loss": 4.0026,
+      "step": 17494
+    },
+    {
+      "epoch": 0.17495,
+      "grad_norm": 1.2564179478971467,
+      "learning_rate": 0.003,
+      "loss": 3.9888,
+      "step": 17495
+    },
+    {
+      "epoch": 0.17496,
+      "grad_norm": 1.3155050931655181,
+      "learning_rate": 0.003,
+      "loss": 3.9923,
+      "step": 17496
+    },
+    {
+      "epoch": 0.17497,
+      "grad_norm": 1.328719821056356,
+      "learning_rate": 0.003,
+      "loss": 4.0144,
+      "step": 17497
+    },
+    {
+      "epoch": 0.17498,
+      "grad_norm": 1.4774943449101552,
+      "learning_rate": 0.003,
+      "loss": 4.0363,
+      "step": 17498
+    },
+    {
+      "epoch": 0.17499,
+      "grad_norm": 1.2361239694490804,
+      "learning_rate": 0.003,
+      "loss": 3.9911,
+      "step": 17499
+    },
+    {
+      "epoch": 0.175,
+      "grad_norm": 1.26961012920436,
+      "learning_rate": 0.003,
+      "loss": 4.0123,
+      "step": 17500
+    },
+    {
+      "epoch": 0.17501,
+      "grad_norm": 1.168143621476018,
+      "learning_rate": 0.003,
+      "loss": 4.0141,
+      "step": 17501
+    },
+    {
+      "epoch": 0.17502,
+      "grad_norm": 1.371389114647039,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 17502
+    },
+    {
+      "epoch": 0.17503,
+      "grad_norm": 1.21480838583015,
+      "learning_rate": 0.003,
+      "loss": 4.035,
+      "step": 17503
+    },
+    {
+      "epoch": 0.17504,
+      "grad_norm": 1.174257813679406,
+      "learning_rate": 0.003,
+      "loss": 4.0059,
+      "step": 17504
+    },
+    {
+      "epoch": 0.17505,
+      "grad_norm": 1.290775859106054,
+      "learning_rate": 0.003,
+      "loss": 4.0007,
+      "step": 17505
+    },
+    {
+      "epoch": 0.17506,
+      "grad_norm": 1.3943836445178055,
+      "learning_rate": 0.003,
+      "loss": 3.9932,
+      "step": 17506
+    },
+    {
+      "epoch": 0.17507,
+      "grad_norm": 1.1928197149111355,
+      "learning_rate": 0.003,
+      "loss": 3.9836,
+      "step": 17507
+    },
+    {
+      "epoch": 0.17508,
+      "grad_norm": 1.3179143177195527,
+      "learning_rate": 0.003,
+      "loss": 4.001,
+      "step": 17508
+    },
+    {
+      "epoch": 0.17509,
+      "grad_norm": 1.106568451592994,
+      "learning_rate": 0.003,
+      "loss": 3.9986,
+      "step": 17509
+    },
+    {
+      "epoch": 0.1751,
+      "grad_norm": 1.2945594796110989,
+      "learning_rate": 0.003,
+      "loss": 3.994,
+      "step": 17510
+    },
+    {
+      "epoch": 0.17511,
+      "grad_norm": 1.1046731165335164,
+      "learning_rate": 0.003,
+      "loss": 4.0005,
+      "step": 17511
+    },
+    {
+      "epoch": 0.17512,
+      "grad_norm": 1.452680110654347,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 17512
+    },
+    {
+      "epoch": 0.17513,
+      "grad_norm": 1.2021451899954632,
+      "learning_rate": 0.003,
+      "loss": 4.0311,
+      "step": 17513
+    },
+    {
+      "epoch": 0.17514,
+      "grad_norm": 1.2475477973757747,
+      "learning_rate": 0.003,
+      "loss": 4.0204,
+      "step": 17514
+    },
+    {
+      "epoch": 0.17515,
+      "grad_norm": 1.26448299868792,
+      "learning_rate": 0.003,
+      "loss": 4.0039,
+      "step": 17515
+    },
+    {
+      "epoch": 0.17516,
+      "grad_norm": 1.346066366740584,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 17516
+    },
+    {
+      "epoch": 0.17517,
+      "grad_norm": 1.2380737111410907,
+      "learning_rate": 0.003,
+      "loss": 4.0236,
+      "step": 17517
+    },
+    {
+      "epoch": 0.17518,
+      "grad_norm": 1.2012239141263896,
+      "learning_rate": 0.003,
+      "loss": 4.0021,
+      "step": 17518
+    },
+    {
+      "epoch": 0.17519,
+      "grad_norm": 1.2286300179862844,
+      "learning_rate": 0.003,
+      "loss": 3.9763,
+      "step": 17519
+    },
+    {
+      "epoch": 0.1752,
+      "grad_norm": 1.1285470362368863,
+      "learning_rate": 0.003,
+      "loss": 4.0028,
+      "step": 17520
+    },
+    {
+      "epoch": 0.17521,
+      "grad_norm": 1.4469269417899426,
+      "learning_rate": 0.003,
+      "loss": 3.9894,
+      "step": 17521
+    },
+    {
+      "epoch": 0.17522,
+      "grad_norm": 0.9874807206339904,
+      "learning_rate": 0.003,
+      "loss": 3.99,
+      "step": 17522
+    },
+    {
+      "epoch": 0.17523,
+      "grad_norm": 1.4122407383518634,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 17523
+    },
+    {
+      "epoch": 0.17524,
+      "grad_norm": 1.0231215239865685,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 17524
+    },
+    {
+      "epoch": 0.17525,
+      "grad_norm": 1.390824344503427,
+      "learning_rate": 0.003,
+      "loss": 4.0061,
+      "step": 17525
+    },
+    {
+      "epoch": 0.17526,
+      "grad_norm": 1.2058258237116968,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 17526
+    },
+    {
+      "epoch": 0.17527,
+      "grad_norm": 1.4580280986425835,
+      "learning_rate": 0.003,
+      "loss": 3.9864,
+      "step": 17527
+    },
+    {
+      "epoch": 0.17528,
+      "grad_norm": 1.0776378794974155,
+      "learning_rate": 0.003,
+      "loss": 3.9903,
+      "step": 17528
+    },
+    {
+      "epoch": 0.17529,
+      "grad_norm": 1.3549429661530912,
+      "learning_rate": 0.003,
+      "loss": 4.0088,
+      "step": 17529
+    },
+    {
+      "epoch": 0.1753,
+      "grad_norm": 1.0128705323649436,
+      "learning_rate": 0.003,
+      "loss": 3.9846,
+      "step": 17530
+    },
+    {
+      "epoch": 0.17531,
+      "grad_norm": 1.5774919045495472,
+      "learning_rate": 0.003,
+      "loss": 4.0428,
+      "step": 17531
+    },
+    {
+      "epoch": 0.17532,
+      "grad_norm": 1.0305305958749693,
+      "learning_rate": 0.003,
+      "loss": 3.9622,
+      "step": 17532
+    },
+    {
+      "epoch": 0.17533,
+      "grad_norm": 1.6209730948269785,
+      "learning_rate": 0.003,
+      "loss": 4.0242,
+      "step": 17533
+    },
+    {
+      "epoch": 0.17534,
+      "grad_norm": 1.1751955702817969,
+      "learning_rate": 0.003,
+      "loss": 4.0014,
+      "step": 17534
+    },
+    {
+      "epoch": 0.17535,
+      "grad_norm": 1.1649296906823114,
+      "learning_rate": 0.003,
+      "loss": 3.9735,
+      "step": 17535
+    },
+    {
+      "epoch": 0.17536,
+      "grad_norm": 1.2174031909483718,
+      "learning_rate": 0.003,
+      "loss": 3.9971,
+      "step": 17536
+    },
+    {
+      "epoch": 0.17537,
+      "grad_norm": 1.185420170654492,
+      "learning_rate": 0.003,
+      "loss": 4.0115,
+      "step": 17537
+    },
+    {
+      "epoch": 0.17538,
+      "grad_norm": 1.3201442908408842,
+      "learning_rate": 0.003,
+      "loss": 4.0435,
+      "step": 17538
+    },
+    {
+      "epoch": 0.17539,
+      "grad_norm": 1.3588723981014663,
+      "learning_rate": 0.003,
+      "loss": 3.9996,
+      "step": 17539
+    },
+    {
+      "epoch": 0.1754,
+      "grad_norm": 1.4270005102548036,
+      "learning_rate": 0.003,
+      "loss": 4.0483,
+      "step": 17540
+    },
+    {
+      "epoch": 0.17541,
+      "grad_norm": 1.1309033485992048,
+      "learning_rate": 0.003,
+      "loss": 4.0331,
+      "step": 17541
+    },
+    {
+      "epoch": 0.17542,
+      "grad_norm": 1.1601972477484055,
+      "learning_rate": 0.003,
+      "loss": 4.0146,
+      "step": 17542
+    },
+    {
+      "epoch": 0.17543,
+      "grad_norm": 1.2719466286625902,
+      "learning_rate": 0.003,
+      "loss": 4.0292,
+      "step": 17543
+    },
+    {
+      "epoch": 0.17544,
+      "grad_norm": 1.2934184007525624,
+      "learning_rate": 0.003,
+      "loss": 4.0207,
+      "step": 17544
+    },
+    {
+      "epoch": 0.17545,
+      "grad_norm": 1.1443382594276104,
+      "learning_rate": 0.003,
+      "loss": 4.0068,
+      "step": 17545
+    },
+    {
+      "epoch": 0.17546,
+      "grad_norm": 1.484698575855083,
+      "learning_rate": 0.003,
+      "loss": 4.0294,
+      "step": 17546
+    },
+    {
+      "epoch": 0.17547,
+      "grad_norm": 1.443084888027566,
+      "learning_rate": 0.003,
+      "loss": 4.0006,
+      "step": 17547
+    },
+    {
+      "epoch": 0.17548,
+      "grad_norm": 1.3578247263605012,
+      "learning_rate": 0.003,
+      "loss": 3.9977,
+      "step": 17548
+    },
+    {
+      "epoch": 0.17549,
+      "grad_norm": 1.1222740359797407,
+      "learning_rate": 0.003,
+      "loss": 3.993,
+      "step": 17549
+    },
+    {
+      "epoch": 0.1755,
+      "grad_norm": 1.2217045827974942,
+      "learning_rate": 0.003,
+      "loss": 3.9914,
+      "step": 17550
+    },
+    {
+      "epoch": 0.17551,
+      "grad_norm": 1.2599905587835294,
+      "learning_rate": 0.003,
+      "loss": 4.0037,
+      "step": 17551
+    },
+    {
+      "epoch": 0.17552,
+      "grad_norm": 1.3173881636461955,
+      "learning_rate": 0.003,
+      "loss": 4.0419,
+      "step": 17552
+    },
+    {
+      "epoch": 0.17553,
+      "grad_norm": 1.1985283675532807,
+      "learning_rate": 0.003,
+      "loss": 4.0092,
+      "step": 17553
+    },
+    {
+      "epoch": 0.17554,
+      "grad_norm": 1.3535779532008505,
+      "learning_rate": 0.003,
+      "loss": 4.0098,
+      "step": 17554
+    },
+    {
+      "epoch": 0.17555,
+      "grad_norm": 1.1593943577563153,
+      "learning_rate": 0.003,
+      "loss": 3.9748,
+      "step": 17555
+    },
+    {
+      "epoch": 0.17556,
+      "grad_norm": 1.2212030190780252,
+      "learning_rate": 0.003,
+      "loss": 4.0172,
+      "step": 17556
+    },
+    {
+      "epoch": 0.17557,
+      "grad_norm": 1.1953193688900052,
+      "learning_rate": 0.003,
+      "loss": 3.9988,
+      "step": 17557
+    },
+    {
+      "epoch": 0.17558,
+      "grad_norm": 1.284028278474116,
+      "learning_rate": 0.003,
+      "loss": 4.0282,
+      "step": 17558
+    },
+    {
+      "epoch": 0.17559,
+      "grad_norm": 1.3281165223541755,
+      "learning_rate": 0.003,
+      "loss": 4.0232,
+      "step": 17559
+    },
+    {
+      "epoch": 0.1756,
+      "grad_norm": 0.9785727433834605,
+      "learning_rate": 0.003,
+      "loss": 3.9746,
+      "step": 17560
+    },
+    {
+      "epoch": 0.17561,
+      "grad_norm": 1.2432156776124126,
+      "learning_rate": 0.003,
+      "loss": 3.9909,
+      "step": 17561
+    },
+    {
+      "epoch": 0.17562,
+      "grad_norm": 1.3246982368270601,
+      "learning_rate": 0.003,
+      "loss": 4.0198,
+      "step": 17562
+    },
+    {
+      "epoch": 0.17563,
+      "grad_norm": 1.249921604351906,
+      "learning_rate": 0.003,
+      "loss": 4.0013,
+      "step": 17563
+    },
+    {
+      "epoch": 0.17564,
+      "grad_norm": 1.2465591420404474,
+      "learning_rate": 0.003,
+      "loss": 4.0073,
+      "step": 17564
+    },
+    {
+      "epoch": 0.17565,
+      "grad_norm": 1.2093675545638525,
+      "learning_rate": 0.003,
+      "loss": 4.0024,
+      "step": 17565
+    },
+    {
+      "epoch": 0.17566,
+      "grad_norm": 1.421791354142211,
+      "learning_rate": 0.003,
+      "loss": 3.9862,
+      "step": 17566
+    },
+    {
+      "epoch": 0.17567,
+      "grad_norm": 1.237781468795189,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 17567
+    },
+    {
+      "epoch": 0.17568,
+      "grad_norm": 1.0586259261472677,
+      "learning_rate": 0.003,
+      "loss": 3.9932,
+      "step": 17568
+    },
+    {
+      "epoch": 0.17569,
+      "grad_norm": 1.3867036041741054,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 17569
+    },
+    {
+      "epoch": 0.1757,
+      "grad_norm": 1.3645783708200951,
+      "learning_rate": 0.003,
+      "loss": 4.004,
+      "step": 17570
+    },
+    {
+      "epoch": 0.17571,
+      "grad_norm": 1.362526636972413,
+      "learning_rate": 0.003,
+      "loss": 4.0077,
+      "step": 17571
+    },
+    {
+      "epoch": 0.17572,
+      "grad_norm": 1.169807271156517,
+      "learning_rate": 0.003,
+      "loss": 4.0116,
+      "step": 17572
+    },
+    {
+      "epoch": 0.17573,
+      "grad_norm": 1.2493866528248445,
+      "learning_rate": 0.003,
+      "loss": 4.0035,
+      "step": 17573
+    },
+    {
+      "epoch": 0.17574,
+      "grad_norm": 1.2984770898637006,
+      "learning_rate": 0.003,
+      "loss": 4.0063,
+      "step": 17574
+    },
+    {
+      "epoch": 0.17575,
+      "grad_norm": 1.0356437941386805,
+      "learning_rate": 0.003,
+      "loss": 3.9906,
+      "step": 17575
+    },
+    {
+      "epoch": 0.17576,
+      "grad_norm": 1.4871383560855997,
+      "learning_rate": 0.003,
+      "loss": 4.028,
+      "step": 17576
+    },
+    {
+      "epoch": 0.17577,
+      "grad_norm": 1.150557329959256,
+      "learning_rate": 0.003,
+      "loss": 3.9892,
+      "step": 17577
+    },
+    {
+      "epoch": 0.17578,
+      "grad_norm": 1.4133663757588073,
+      "learning_rate": 0.003,
+      "loss": 4.03,
+      "step": 17578
+    },
+    {
+      "epoch": 0.17579,
+      "grad_norm": 1.2581433405946318,
+      "learning_rate": 0.003,
+      "loss": 4.0172,
+      "step": 17579
+    },
+    {
+      "epoch": 0.1758,
+      "grad_norm": 1.4213042433507315,
+      "learning_rate": 0.003,
+      "loss": 4.0191,
+      "step": 17580
+    },
+    {
+      "epoch": 0.17581,
+      "grad_norm": 1.2669235513462596,
+      "learning_rate": 0.003,
+      "loss": 4.0445,
+      "step": 17581
+    },
+    {
+      "epoch": 0.17582,
+      "grad_norm": 1.366659090525752,
+      "learning_rate": 0.003,
+      "loss": 4.045,
+      "step": 17582
+    },
+    {
+      "epoch": 0.17583,
+      "grad_norm": 1.5443151445880576,
+      "learning_rate": 0.003,
+      "loss": 4.0272,
+      "step": 17583
+    },
+    {
+      "epoch": 0.17584,
+      "grad_norm": 1.187717678251351,
+      "learning_rate": 0.003,
+      "loss": 4.0218,
+      "step": 17584
+    },
+    {
+      "epoch": 0.17585,
+      "grad_norm": 1.566622961243868,
+      "learning_rate": 0.003,
+      "loss": 4.0297,
+      "step": 17585
+    },
+    {
+      "epoch": 0.17586,
+      "grad_norm": 1.235649476915203,
+      "learning_rate": 0.003,
+      "loss": 4.0084,
+      "step": 17586
+    },
+    {
+      "epoch": 0.17587,
+      "grad_norm": 1.379215757629583,
+      "learning_rate": 0.003,
+      "loss": 4.0214,
+      "step": 17587
+    },
+    {
+      "epoch": 0.17588,
+      "grad_norm": 1.0614341038953146,
+      "learning_rate": 0.003,
+      "loss": 4.0482,
+      "step": 17588
+    },
+    {
+      "epoch": 0.17589,
+      "grad_norm": 1.2648484455968334,
+      "learning_rate": 0.003,
+      "loss": 3.9925,
+      "step": 17589
+    },
+    {
+      "epoch": 0.1759,
+      "grad_norm": 1.1576910924641934,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 17590
+    },
+    {
+      "epoch": 0.17591,
+      "grad_norm": 1.1431254154227177,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 17591
+    },
+    {
+      "epoch": 0.17592,
+      "grad_norm": 1.3161147509011362,
+      "learning_rate": 0.003,
+      "loss": 4.0024,
+      "step": 17592
+    },
+    {
+      "epoch": 0.17593,
+      "grad_norm": 1.2696360298337392,
+      "learning_rate": 0.003,
+      "loss": 4.0249,
+      "step": 17593
+    },
+    {
+      "epoch": 0.17594,
+      "grad_norm": 1.4224208341519127,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 17594
+    },
+    {
+      "epoch": 0.17595,
+      "grad_norm": 0.99950016326709,
+      "learning_rate": 0.003,
+      "loss": 3.9976,
+      "step": 17595
+    },
+    {
+      "epoch": 0.17596,
+      "grad_norm": 1.5338785658460057,
+      "learning_rate": 0.003,
+      "loss": 4.0255,
+      "step": 17596
+    },
+    {
+      "epoch": 0.17597,
+      "grad_norm": 1.0377250441976649,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 17597
+    },
+    {
+      "epoch": 0.17598,
+      "grad_norm": 1.6791282982424311,
+      "learning_rate": 0.003,
+      "loss": 4.0544,
+      "step": 17598
+    },
+    {
+      "epoch": 0.17599,
+      "grad_norm": 1.0560648540651831,
+      "learning_rate": 0.003,
+      "loss": 4.0499,
+      "step": 17599
+    },
+    {
+      "epoch": 0.176,
+      "grad_norm": 1.2152626989221356,
+      "learning_rate": 0.003,
+      "loss": 4.0037,
+      "step": 17600
+    },
+    {
+      "epoch": 0.17601,
+      "grad_norm": 1.3408770196110984,
+      "learning_rate": 0.003,
+      "loss": 4.0284,
+      "step": 17601
+    },
+    {
+      "epoch": 0.17602,
+      "grad_norm": 1.135109841606531,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 17602
+    },
+    {
+      "epoch": 0.17603,
+      "grad_norm": 1.243107289474913,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 17603
+    },
+    {
+      "epoch": 0.17604,
+      "grad_norm": 1.2852511433011993,
+      "learning_rate": 0.003,
+      "loss": 4.0109,
+      "step": 17604
+    },
+    {
+      "epoch": 0.17605,
+      "grad_norm": 0.956126693243154,
+      "learning_rate": 0.003,
+      "loss": 4.0291,
+      "step": 17605
+    },
+    {
+      "epoch": 0.17606,
+      "grad_norm": 1.635243932239004,
+      "learning_rate": 0.003,
+      "loss": 4.0233,
+      "step": 17606
+    },
+    {
+      "epoch": 0.17607,
+      "grad_norm": 1.1330955529107827,
+      "learning_rate": 0.003,
+      "loss": 4.0122,
+      "step": 17607
+    },
+    {
+      "epoch": 0.17608,
+      "grad_norm": 1.5430115729514313,
+      "learning_rate": 0.003,
+      "loss": 3.999,
+      "step": 17608
+    },
+    {
+      "epoch": 0.17609,
+      "grad_norm": 1.08412975084303,
+      "learning_rate": 0.003,
+      "loss": 4.0065,
+      "step": 17609
+    },
+    {
+      "epoch": 0.1761,
+      "grad_norm": 1.377042344730517,
+      "learning_rate": 0.003,
+      "loss": 3.9901,
+      "step": 17610
+    },
+    {
+      "epoch": 0.17611,
+      "grad_norm": 1.1638370823196682,
+      "learning_rate": 0.003,
+      "loss": 4.019,
+      "step": 17611
+    },
+    {
+      "epoch": 0.17612,
+      "grad_norm": 1.493808977247851,
+      "learning_rate": 0.003,
+      "loss": 3.9594,
+      "step": 17612
+    },
+    {
+      "epoch": 0.17613,
+      "grad_norm": 1.3228465169531465,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 17613
+    },
+    {
+      "epoch": 0.17614,
+      "grad_norm": 1.3087121810410431,
+      "learning_rate": 0.003,
+      "loss": 3.989,
+      "step": 17614
+    },
+    {
+      "epoch": 0.17615,
+      "grad_norm": 1.2068750677649611,
+      "learning_rate": 0.003,
+      "loss": 3.9868,
+      "step": 17615
+    },
+    {
+      "epoch": 0.17616,
+      "grad_norm": 1.0696833716922531,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 17616
+    },
+    {
+      "epoch": 0.17617,
+      "grad_norm": 1.2759221671837773,
+      "learning_rate": 0.003,
+      "loss": 3.9966,
+      "step": 17617
+    },
+    {
+      "epoch": 0.17618,
+      "grad_norm": 1.2043041905130851,
+      "learning_rate": 0.003,
+      "loss": 4.0366,
+      "step": 17618
+    },
+    {
+      "epoch": 0.17619,
+      "grad_norm": 1.4037105214642187,
+      "learning_rate": 0.003,
+      "loss": 4.0444,
+      "step": 17619
+    },
+    {
+      "epoch": 0.1762,
+      "grad_norm": 1.2242585181629013,
+      "learning_rate": 0.003,
+      "loss": 3.994,
+      "step": 17620
+    },
+    {
+      "epoch": 0.17621,
+      "grad_norm": 1.2206287301130856,
+      "learning_rate": 0.003,
+      "loss": 3.9956,
+      "step": 17621
+    },
+    {
+      "epoch": 0.17622,
+      "grad_norm": 1.2835961455791998,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 17622
+    },
+    {
+      "epoch": 0.17623,
+      "grad_norm": 1.2070454470336767,
+      "learning_rate": 0.003,
+      "loss": 4.0109,
+      "step": 17623
+    },
+    {
+      "epoch": 0.17624,
+      "grad_norm": 1.313277425758526,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 17624
+    },
+    {
+      "epoch": 0.17625,
+      "grad_norm": 1.2724185930197756,
+      "learning_rate": 0.003,
+      "loss": 3.997,
+      "step": 17625
+    },
+    {
+      "epoch": 0.17626,
+      "grad_norm": 1.1092201178149645,
+      "learning_rate": 0.003,
+      "loss": 3.9928,
+      "step": 17626
+    },
+    {
+      "epoch": 0.17627,
+      "grad_norm": 1.402151390353694,
+      "learning_rate": 0.003,
+      "loss": 4.037,
+      "step": 17627
+    },
+    {
+      "epoch": 0.17628,
+      "grad_norm": 1.1428631040156145,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 17628
+    },
+    {
+      "epoch": 0.17629,
+      "grad_norm": 1.2710406949237625,
+      "learning_rate": 0.003,
+      "loss": 4.031,
+      "step": 17629
+    },
+    {
+      "epoch": 0.1763,
+      "grad_norm": 1.2241399705227562,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 17630
+    },
+    {
+      "epoch": 0.17631,
+      "grad_norm": 1.2815206513869735,
+      "learning_rate": 0.003,
+      "loss": 4.0275,
+      "step": 17631
+    },
+    {
+      "epoch": 0.17632,
+      "grad_norm": 1.1123932464141089,
+      "learning_rate": 0.003,
+      "loss": 4.0591,
+      "step": 17632
+    },
+    {
+      "epoch": 0.17633,
+      "grad_norm": 1.3194960667717541,
+      "learning_rate": 0.003,
+      "loss": 4.0064,
+      "step": 17633
+    },
+    {
+      "epoch": 0.17634,
+      "grad_norm": 1.1513525106952893,
+      "learning_rate": 0.003,
+      "loss": 3.998,
+      "step": 17634
+    },
+    {
+      "epoch": 0.17635,
+      "grad_norm": 1.508444451922507,
+      "learning_rate": 0.003,
+      "loss": 3.9993,
+      "step": 17635
+    },
+    {
+      "epoch": 0.17636,
+      "grad_norm": 1.2828155918425586,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 17636
+    },
+    {
+      "epoch": 0.17637,
+      "grad_norm": 1.275723512265431,
+      "learning_rate": 0.003,
+      "loss": 3.9996,
+      "step": 17637
+    },
+    {
+      "epoch": 0.17638,
+      "grad_norm": 1.3655858171298656,
+      "learning_rate": 0.003,
+      "loss": 4.033,
+      "step": 17638
+    },
+    {
+      "epoch": 0.17639,
+      "grad_norm": 1.2186927399922851,
+      "learning_rate": 0.003,
+      "loss": 4.0098,
+      "step": 17639
+    },
+    {
+      "epoch": 0.1764,
+      "grad_norm": 1.3689522848466271,
+      "learning_rate": 0.003,
+      "loss": 4.02,
+      "step": 17640
+    },
+    {
+      "epoch": 0.17641,
+      "grad_norm": 1.4312717569589986,
+      "learning_rate": 0.003,
+      "loss": 4.0081,
+      "step": 17641
+    },
+    {
+      "epoch": 0.17642,
+      "grad_norm": 1.1798880942001364,
+      "learning_rate": 0.003,
+      "loss": 3.9672,
+      "step": 17642
+    },
+    {
+      "epoch": 0.17643,
+      "grad_norm": 1.5216060480329352,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 17643
+    },
+    {
+      "epoch": 0.17644,
+      "grad_norm": 1.0725669386174144,
+      "learning_rate": 0.003,
+      "loss": 4.0007,
+      "step": 17644
+    },
+    {
+      "epoch": 0.17645,
+      "grad_norm": 1.2059907916524846,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 17645
+    },
+    {
+      "epoch": 0.17646,
+      "grad_norm": 1.089964308292804,
+      "learning_rate": 0.003,
+      "loss": 3.991,
+      "step": 17646
+    },
+    {
+      "epoch": 0.17647,
+      "grad_norm": 1.2652122674975332,
+      "learning_rate": 0.003,
+      "loss": 3.9941,
+      "step": 17647
+    },
+    {
+      "epoch": 0.17648,
+      "grad_norm": 1.0873367578770192,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 17648
+    },
+    {
+      "epoch": 0.17649,
+      "grad_norm": 1.350335271643384,
+      "learning_rate": 0.003,
+      "loss": 3.9946,
+      "step": 17649
+    },
+    {
+      "epoch": 0.1765,
+      "grad_norm": 1.1466414457032206,
+      "learning_rate": 0.003,
+      "loss": 4.0211,
+      "step": 17650
+    },
+    {
+      "epoch": 0.17651,
+      "grad_norm": 1.6318275207641635,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 17651
+    },
+    {
+      "epoch": 0.17652,
+      "grad_norm": 1.0730130344316913,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 17652
+    },
+    {
+      "epoch": 0.17653,
+      "grad_norm": 1.4448135906604729,
+      "learning_rate": 0.003,
+      "loss": 4.0252,
+      "step": 17653
+    },
+    {
+      "epoch": 0.17654,
+      "grad_norm": 0.995886658610207,
+      "learning_rate": 0.003,
+      "loss": 4.0182,
+      "step": 17654
+    },
+    {
+      "epoch": 0.17655,
+      "grad_norm": 1.2610663984889068,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 17655
+    },
+    {
+      "epoch": 0.17656,
+      "grad_norm": 1.1872689563076624,
+      "learning_rate": 0.003,
+      "loss": 4.0086,
+      "step": 17656
+    },
+    {
+      "epoch": 0.17657,
+      "grad_norm": 1.3197271867164788,
+      "learning_rate": 0.003,
+      "loss": 3.9744,
+      "step": 17657
+    },
+    {
+      "epoch": 0.17658,
+      "grad_norm": 1.1312799689004187,
+      "learning_rate": 0.003,
+      "loss": 3.9982,
+      "step": 17658
+    },
+    {
+      "epoch": 0.17659,
+      "grad_norm": 1.4613108130626726,
+      "learning_rate": 0.003,
+      "loss": 4.0105,
+      "step": 17659
+    },
+    {
+      "epoch": 0.1766,
+      "grad_norm": 1.215305872947806,
+      "learning_rate": 0.003,
+      "loss": 4.0408,
+      "step": 17660
+    },
+    {
+      "epoch": 0.17661,
+      "grad_norm": 1.3428648707045394,
+      "learning_rate": 0.003,
+      "loss": 4.0154,
+      "step": 17661
+    },
+    {
+      "epoch": 0.17662,
+      "grad_norm": 1.0548112155389986,
+      "learning_rate": 0.003,
+      "loss": 3.9848,
+      "step": 17662
+    },
+    {
+      "epoch": 0.17663,
+      "grad_norm": 1.38585228216765,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 17663
+    },
+    {
+      "epoch": 0.17664,
+      "grad_norm": 1.1616798399752524,
+      "learning_rate": 0.003,
+      "loss": 4.0041,
+      "step": 17664
+    },
+    {
+      "epoch": 0.17665,
+      "grad_norm": 1.5132186238889027,
+      "learning_rate": 0.003,
+      "loss": 4.0251,
+      "step": 17665
+    },
+    {
+      "epoch": 0.17666,
+      "grad_norm": 1.2126955483808322,
+      "learning_rate": 0.003,
+      "loss": 4.0196,
+      "step": 17666
+    },
+    {
+      "epoch": 0.17667,
+      "grad_norm": 1.1756236455801312,
+      "learning_rate": 0.003,
+      "loss": 3.9973,
+      "step": 17667
+    },
+    {
+      "epoch": 0.17668,
+      "grad_norm": 1.1492162473406433,
+      "learning_rate": 0.003,
+      "loss": 4.0394,
+      "step": 17668
+    },
+    {
+      "epoch": 0.17669,
+      "grad_norm": 1.1566973144214787,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 17669
+    },
+    {
+      "epoch": 0.1767,
+      "grad_norm": 1.506338186573076,
+      "learning_rate": 0.003,
+      "loss": 4.0005,
+      "step": 17670
+    },
+    {
+      "epoch": 0.17671,
+      "grad_norm": 1.3894656365989166,
+      "learning_rate": 0.003,
+      "loss": 4.0309,
+      "step": 17671
+    },
+    {
+      "epoch": 0.17672,
+      "grad_norm": 1.3466288883358835,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 17672
+    },
+    {
+      "epoch": 0.17673,
+      "grad_norm": 1.186810242645646,
+      "learning_rate": 0.003,
+      "loss": 3.9921,
+      "step": 17673
+    },
+    {
+      "epoch": 0.17674,
+      "grad_norm": 1.275311566356402,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 17674
+    },
+    {
+      "epoch": 0.17675,
+      "grad_norm": 1.2940798901031692,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 17675
+    },
+    {
+      "epoch": 0.17676,
+      "grad_norm": 1.3305715960327655,
+      "learning_rate": 0.003,
+      "loss": 4.0173,
+      "step": 17676
+    },
+    {
+      "epoch": 0.17677,
+      "grad_norm": 1.1855274683196175,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 17677
+    },
+    {
+      "epoch": 0.17678,
+      "grad_norm": 1.5154701258114815,
+      "learning_rate": 0.003,
+      "loss": 4.0444,
+      "step": 17678
+    },
+    {
+      "epoch": 0.17679,
+      "grad_norm": 1.091549182112842,
+      "learning_rate": 0.003,
+      "loss": 3.9896,
+      "step": 17679
+    },
+    {
+      "epoch": 0.1768,
+      "grad_norm": 1.2864130044198197,
+      "learning_rate": 0.003,
+      "loss": 3.9852,
+      "step": 17680
+    },
+    {
+      "epoch": 0.17681,
+      "grad_norm": 1.16301276367685,
+      "learning_rate": 0.003,
+      "loss": 4.0426,
+      "step": 17681
+    },
+    {
+      "epoch": 0.17682,
+      "grad_norm": 1.4227728662833272,
+      "learning_rate": 0.003,
+      "loss": 3.9833,
+      "step": 17682
+    },
+    {
+      "epoch": 0.17683,
+      "grad_norm": 1.1344882161162675,
+      "learning_rate": 0.003,
+      "loss": 4.0288,
+      "step": 17683
+    },
+    {
+      "epoch": 0.17684,
+      "grad_norm": 1.4008816348134472,
+      "learning_rate": 0.003,
+      "loss": 4.0325,
+      "step": 17684
+    },
+    {
+      "epoch": 0.17685,
+      "grad_norm": 1.231251621514772,
+      "learning_rate": 0.003,
+      "loss": 4.0276,
+      "step": 17685
+    },
+    {
+      "epoch": 0.17686,
+      "grad_norm": 1.2301141486401743,
+      "learning_rate": 0.003,
+      "loss": 4.0174,
+      "step": 17686
+    },
+    {
+      "epoch": 0.17687,
+      "grad_norm": 1.1821536828911614,
+      "learning_rate": 0.003,
+      "loss": 3.9882,
+      "step": 17687
+    },
+    {
+      "epoch": 0.17688,
+      "grad_norm": 1.6340806933485859,
+      "learning_rate": 0.003,
+      "loss": 4.0002,
+      "step": 17688
+    },
+    {
+      "epoch": 0.17689,
+      "grad_norm": 1.291422498596477,
+      "learning_rate": 0.003,
+      "loss": 4.002,
+      "step": 17689
+    },
+    {
+      "epoch": 0.1769,
+      "grad_norm": 1.370574438743369,
+      "learning_rate": 0.003,
+      "loss": 4.0269,
+      "step": 17690
+    },
+    {
+      "epoch": 0.17691,
+      "grad_norm": 1.2254311160033806,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 17691
+    },
+    {
+      "epoch": 0.17692,
+      "grad_norm": 1.2974026858514667,
+      "learning_rate": 0.003,
+      "loss": 3.9955,
+      "step": 17692
+    },
+    {
+      "epoch": 0.17693,
+      "grad_norm": 1.272721143710752,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 17693
+    },
+    {
+      "epoch": 0.17694,
+      "grad_norm": 1.1896694512983272,
+      "learning_rate": 0.003,
+      "loss": 3.9927,
+      "step": 17694
+    },
+    {
+      "epoch": 0.17695,
+      "grad_norm": 1.2918362518647468,
+      "learning_rate": 0.003,
+      "loss": 4.0072,
+      "step": 17695
+    },
+    {
+      "epoch": 0.17696,
+      "grad_norm": 1.2716963200505105,
+      "learning_rate": 0.003,
+      "loss": 3.9989,
+      "step": 17696
+    },
+    {
+      "epoch": 0.17697,
+      "grad_norm": 1.4537290270714847,
+      "learning_rate": 0.003,
+      "loss": 4.0119,
+      "step": 17697
+    },
+    {
+      "epoch": 0.17698,
+      "grad_norm": 1.1057124277034496,
+      "learning_rate": 0.003,
+      "loss": 4.0079,
+      "step": 17698
+    },
+    {
+      "epoch": 0.17699,
+      "grad_norm": 1.422077767087793,
+      "learning_rate": 0.003,
+      "loss": 4.0005,
+      "step": 17699
+    },
+    {
+      "epoch": 0.177,
+      "grad_norm": 0.9764022016301912,
+      "learning_rate": 0.003,
+      "loss": 4.004,
+      "step": 17700
+    },
+    {
+      "epoch": 0.17701,
+      "grad_norm": 1.3305469891418082,
+      "learning_rate": 0.003,
+      "loss": 4.0104,
+      "step": 17701
+    },
+    {
+      "epoch": 0.17702,
+      "grad_norm": 1.0302701942137433,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 17702
+    },
+    {
+      "epoch": 0.17703,
+      "grad_norm": 1.4584854604802886,
+      "learning_rate": 0.003,
+      "loss": 3.9772,
+      "step": 17703
+    },
+    {
+      "epoch": 0.17704,
+      "grad_norm": 1.1897371995285562,
+      "learning_rate": 0.003,
+      "loss": 4.0061,
+      "step": 17704
+    },
+    {
+      "epoch": 0.17705,
+      "grad_norm": 1.3886917458309411,
+      "learning_rate": 0.003,
+      "loss": 3.9971,
+      "step": 17705
+    },
+    {
+      "epoch": 0.17706,
+      "grad_norm": 1.1999671477506535,
+      "learning_rate": 0.003,
+      "loss": 4.0174,
+      "step": 17706
+    },
+    {
+      "epoch": 0.17707,
+      "grad_norm": 1.2901270427968135,
+      "learning_rate": 0.003,
+      "loss": 4.0078,
+      "step": 17707
+    },
+    {
+      "epoch": 0.17708,
+      "grad_norm": 1.2304546999945096,
+      "learning_rate": 0.003,
+      "loss": 4.0013,
+      "step": 17708
+    },
+    {
+      "epoch": 0.17709,
+      "grad_norm": 1.2430163275046657,
+      "learning_rate": 0.003,
+      "loss": 4.0388,
+      "step": 17709
+    },
+    {
+      "epoch": 0.1771,
+      "grad_norm": 1.2656647666393261,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 17710
+    },
+    {
+      "epoch": 0.17711,
+      "grad_norm": 1.3360788684996352,
+      "learning_rate": 0.003,
+      "loss": 4.0092,
+      "step": 17711
+    },
+    {
+      "epoch": 0.17712,
+      "grad_norm": 1.2041746388333983,
+      "learning_rate": 0.003,
+      "loss": 3.9735,
+      "step": 17712
+    },
+    {
+      "epoch": 0.17713,
+      "grad_norm": 1.280418735699287,
+      "learning_rate": 0.003,
+      "loss": 3.9914,
+      "step": 17713
+    },
+    {
+      "epoch": 0.17714,
+      "grad_norm": 1.075661513736821,
+      "learning_rate": 0.003,
+      "loss": 4.0155,
+      "step": 17714
+    },
+    {
+      "epoch": 0.17715,
+      "grad_norm": 1.4788091330140924,
+      "learning_rate": 0.003,
+      "loss": 4.0011,
+      "step": 17715
+    },
+    {
+      "epoch": 0.17716,
+      "grad_norm": 1.2800608738801205,
+      "learning_rate": 0.003,
+      "loss": 4.0307,
+      "step": 17716
+    },
+    {
+      "epoch": 0.17717,
+      "grad_norm": 1.287082989265906,
+      "learning_rate": 0.003,
+      "loss": 4.0378,
+      "step": 17717
+    },
+    {
+      "epoch": 0.17718,
+      "grad_norm": 1.1821351252449808,
+      "learning_rate": 0.003,
+      "loss": 4.037,
+      "step": 17718
+    },
+    {
+      "epoch": 0.17719,
+      "grad_norm": 1.4291892459728956,
+      "learning_rate": 0.003,
+      "loss": 3.9895,
+      "step": 17719
+    },
+    {
+      "epoch": 0.1772,
+      "grad_norm": 1.0418877738975607,
+      "learning_rate": 0.003,
+      "loss": 3.9984,
+      "step": 17720
+    },
+    {
+      "epoch": 0.17721,
+      "grad_norm": 1.5396247215856047,
+      "learning_rate": 0.003,
+      "loss": 4.0322,
+      "step": 17721
+    },
+    {
+      "epoch": 0.17722,
+      "grad_norm": 1.2578079509816884,
+      "learning_rate": 0.003,
+      "loss": 3.9894,
+      "step": 17722
+    },
+    {
+      "epoch": 0.17723,
+      "grad_norm": 1.5959123237774604,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 17723
+    },
+    {
+      "epoch": 0.17724,
+      "grad_norm": 1.2700442301751993,
+      "learning_rate": 0.003,
+      "loss": 4.0198,
+      "step": 17724
+    },
+    {
+      "epoch": 0.17725,
+      "grad_norm": 1.2688808066870647,
+      "learning_rate": 0.003,
+      "loss": 4.0213,
+      "step": 17725
+    },
+    {
+      "epoch": 0.17726,
+      "grad_norm": 1.411334765064551,
+      "learning_rate": 0.003,
+      "loss": 4.0028,
+      "step": 17726
+    },
+    {
+      "epoch": 0.17727,
+      "grad_norm": 1.3186517008825351,
+      "learning_rate": 0.003,
+      "loss": 4.0059,
+      "step": 17727
+    },
+    {
+      "epoch": 0.17728,
+      "grad_norm": 1.209822516658508,
+      "learning_rate": 0.003,
+      "loss": 4.0309,
+      "step": 17728
+    },
+    {
+      "epoch": 0.17729,
+      "grad_norm": 1.4961920570514124,
+      "learning_rate": 0.003,
+      "loss": 4.019,
+      "step": 17729
+    },
+    {
+      "epoch": 0.1773,
+      "grad_norm": 1.27328898760038,
+      "learning_rate": 0.003,
+      "loss": 3.9969,
+      "step": 17730
+    },
+    {
+      "epoch": 0.17731,
+      "grad_norm": 1.2122259626114444,
+      "learning_rate": 0.003,
+      "loss": 4.0078,
+      "step": 17731
+    },
+    {
+      "epoch": 0.17732,
+      "grad_norm": 1.2777247490895498,
+      "learning_rate": 0.003,
+      "loss": 4.0339,
+      "step": 17732
+    },
+    {
+      "epoch": 0.17733,
+      "grad_norm": 1.1962870920111894,
+      "learning_rate": 0.003,
+      "loss": 3.9975,
+      "step": 17733
+    },
+    {
+      "epoch": 0.17734,
+      "grad_norm": 1.3076328325875022,
+      "learning_rate": 0.003,
+      "loss": 4.0379,
+      "step": 17734
+    },
+    {
+      "epoch": 0.17735,
+      "grad_norm": 1.2468178873207434,
+      "learning_rate": 0.003,
+      "loss": 4.0096,
+      "step": 17735
+    },
+    {
+      "epoch": 0.17736,
+      "grad_norm": 1.4154689015219308,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 17736
+    },
+    {
+      "epoch": 0.17737,
+      "grad_norm": 1.1167695404121292,
+      "learning_rate": 0.003,
+      "loss": 3.9949,
+      "step": 17737
+    },
+    {
+      "epoch": 0.17738,
+      "grad_norm": 1.4217078305436055,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 17738
+    },
+    {
+      "epoch": 0.17739,
+      "grad_norm": 1.133246619258175,
+      "learning_rate": 0.003,
+      "loss": 4.0431,
+      "step": 17739
+    },
+    {
+      "epoch": 0.1774,
+      "grad_norm": 1.2347033258411095,
+      "learning_rate": 0.003,
+      "loss": 4.0146,
+      "step": 17740
+    },
+    {
+      "epoch": 0.17741,
+      "grad_norm": 1.1348099403562892,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 17741
+    },
+    {
+      "epoch": 0.17742,
+      "grad_norm": 1.3785597373138432,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 17742
+    },
+    {
+      "epoch": 0.17743,
+      "grad_norm": 1.2690463645671801,
+      "learning_rate": 0.003,
+      "loss": 4.0546,
+      "step": 17743
+    },
+    {
+      "epoch": 0.17744,
+      "grad_norm": 1.3338291965253706,
+      "learning_rate": 0.003,
+      "loss": 3.9981,
+      "step": 17744
+    },
+    {
+      "epoch": 0.17745,
+      "grad_norm": 1.12328921255648,
+      "learning_rate": 0.003,
+      "loss": 4.0118,
+      "step": 17745
+    },
+    {
+      "epoch": 0.17746,
+      "grad_norm": 1.5091144446382325,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 17746
+    },
+    {
+      "epoch": 0.17747,
+      "grad_norm": 1.1177715836119348,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 17747
+    },
+    {
+      "epoch": 0.17748,
+      "grad_norm": 1.3387225070034643,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 17748
+    },
+    {
+      "epoch": 0.17749,
+      "grad_norm": 1.233881653208535,
+      "learning_rate": 0.003,
+      "loss": 3.9878,
+      "step": 17749
+    },
+    {
+      "epoch": 0.1775,
+      "grad_norm": 1.2206411609276537,
+      "learning_rate": 0.003,
+      "loss": 4.0113,
+      "step": 17750
+    },
+    {
+      "epoch": 0.17751,
+      "grad_norm": 1.2803136770513195,
+      "learning_rate": 0.003,
+      "loss": 4.0319,
+      "step": 17751
+    },
+    {
+      "epoch": 0.17752,
+      "grad_norm": 1.281486086260054,
+      "learning_rate": 0.003,
+      "loss": 4.002,
+      "step": 17752
+    },
+    {
+      "epoch": 0.17753,
+      "grad_norm": 1.1452833113701208,
+      "learning_rate": 0.003,
+      "loss": 3.9973,
+      "step": 17753
+    },
+    {
+      "epoch": 0.17754,
+      "grad_norm": 1.3107700497422121,
+      "learning_rate": 0.003,
+      "loss": 4.0349,
+      "step": 17754
+    },
+    {
+      "epoch": 0.17755,
+      "grad_norm": 1.2631770254982326,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 17755
+    },
+    {
+      "epoch": 0.17756,
+      "grad_norm": 1.4179454778251528,
+      "learning_rate": 0.003,
+      "loss": 3.9842,
+      "step": 17756
+    },
+    {
+      "epoch": 0.17757,
+      "grad_norm": 1.209455850454635,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 17757
+    },
+    {
+      "epoch": 0.17758,
+      "grad_norm": 1.2430503248091411,
+      "learning_rate": 0.003,
+      "loss": 4.0146,
+      "step": 17758
+    },
+    {
+      "epoch": 0.17759,
+      "grad_norm": 1.2258695046015262,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 17759
+    },
+    {
+      "epoch": 0.1776,
+      "grad_norm": 1.3720321833058833,
+      "learning_rate": 0.003,
+      "loss": 3.9868,
+      "step": 17760
+    },
+    {
+      "epoch": 0.17761,
+      "grad_norm": 1.1157879586813826,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 17761
+    },
+    {
+      "epoch": 0.17762,
+      "grad_norm": 1.379835831092119,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 17762
+    },
+    {
+      "epoch": 0.17763,
+      "grad_norm": 1.074832416938527,
+      "learning_rate": 0.003,
+      "loss": 4.0035,
+      "step": 17763
+    },
+    {
+      "epoch": 0.17764,
+      "grad_norm": 1.4779325456764498,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 17764
+    },
+    {
+      "epoch": 0.17765,
+      "grad_norm": 1.0737916991954493,
+      "learning_rate": 0.003,
+      "loss": 4.0134,
+      "step": 17765
+    },
+    {
+      "epoch": 0.17766,
+      "grad_norm": 1.144441928094557,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 17766
+    },
+    {
+      "epoch": 0.17767,
+      "grad_norm": 1.3573965743376548,
+      "learning_rate": 0.003,
+      "loss": 4.026,
+      "step": 17767
+    },
+    {
+      "epoch": 0.17768,
+      "grad_norm": 1.4348911192679688,
+      "learning_rate": 0.003,
+      "loss": 3.9975,
+      "step": 17768
+    },
+    {
+      "epoch": 0.17769,
+      "grad_norm": 1.288248592955297,
+      "learning_rate": 0.003,
+      "loss": 4.0283,
+      "step": 17769
+    },
+    {
+      "epoch": 0.1777,
+      "grad_norm": 1.3009077629177082,
+      "learning_rate": 0.003,
+      "loss": 3.9905,
+      "step": 17770
+    },
+    {
+      "epoch": 0.17771,
+      "grad_norm": 1.1001977329301142,
+      "learning_rate": 0.003,
+      "loss": 3.9927,
+      "step": 17771
+    },
+    {
+      "epoch": 0.17772,
+      "grad_norm": 1.3781231544392432,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 17772
+    },
+    {
+      "epoch": 0.17773,
+      "grad_norm": 1.2070000918672394,
+      "learning_rate": 0.003,
+      "loss": 3.9993,
+      "step": 17773
+    },
+    {
+      "epoch": 0.17774,
+      "grad_norm": 1.3584805334694108,
+      "learning_rate": 0.003,
+      "loss": 4.0144,
+      "step": 17774
+    },
+    {
+      "epoch": 0.17775,
+      "grad_norm": 1.3257674589255668,
+      "learning_rate": 0.003,
+      "loss": 3.9769,
+      "step": 17775
+    },
+    {
+      "epoch": 0.17776,
+      "grad_norm": 1.1440316911369965,
+      "learning_rate": 0.003,
+      "loss": 4.0089,
+      "step": 17776
+    },
+    {
+      "epoch": 0.17777,
+      "grad_norm": 1.2951993538101918,
+      "learning_rate": 0.003,
+      "loss": 3.9875,
+      "step": 17777
+    },
+    {
+      "epoch": 0.17778,
+      "grad_norm": 1.219670726338208,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 17778
+    },
+    {
+      "epoch": 0.17779,
+      "grad_norm": 1.2576854830100939,
+      "learning_rate": 0.003,
+      "loss": 3.9877,
+      "step": 17779
+    },
+    {
+      "epoch": 0.1778,
+      "grad_norm": 1.2969554118521678,
+      "learning_rate": 0.003,
+      "loss": 4.0233,
+      "step": 17780
+    },
+    {
+      "epoch": 0.17781,
+      "grad_norm": 1.395480473701788,
+      "learning_rate": 0.003,
+      "loss": 3.9953,
+      "step": 17781
+    },
+    {
+      "epoch": 0.17782,
+      "grad_norm": 1.1210870129212929,
+      "learning_rate": 0.003,
+      "loss": 4.0139,
+      "step": 17782
+    },
+    {
+      "epoch": 0.17783,
+      "grad_norm": 1.3925910127657615,
+      "learning_rate": 0.003,
+      "loss": 3.9962,
+      "step": 17783
+    },
+    {
+      "epoch": 0.17784,
+      "grad_norm": 1.1606521282473876,
+      "learning_rate": 0.003,
+      "loss": 4.0163,
+      "step": 17784
+    },
+    {
+      "epoch": 0.17785,
+      "grad_norm": 1.3082696149032056,
+      "learning_rate": 0.003,
+      "loss": 3.9942,
+      "step": 17785
+    },
+    {
+      "epoch": 0.17786,
+      "grad_norm": 1.055551621576099,
+      "learning_rate": 0.003,
+      "loss": 4.0252,
+      "step": 17786
+    },
+    {
+      "epoch": 0.17787,
+      "grad_norm": 1.437729945871911,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 17787
+    },
+    {
+      "epoch": 0.17788,
+      "grad_norm": 1.0769162266838206,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 17788
+    },
+    {
+      "epoch": 0.17789,
+      "grad_norm": 1.4482118142629796,
+      "learning_rate": 0.003,
+      "loss": 4.005,
+      "step": 17789
+    },
+    {
+      "epoch": 0.1779,
+      "grad_norm": 1.3582494783626673,
+      "learning_rate": 0.003,
+      "loss": 3.988,
+      "step": 17790
+    },
+    {
+      "epoch": 0.17791,
+      "grad_norm": 1.2590550485047474,
+      "learning_rate": 0.003,
+      "loss": 4.006,
+      "step": 17791
+    },
+    {
+      "epoch": 0.17792,
+      "grad_norm": 1.5233881061352947,
+      "learning_rate": 0.003,
+      "loss": 3.9852,
+      "step": 17792
+    },
+    {
+      "epoch": 0.17793,
+      "grad_norm": 1.2476714509933917,
+      "learning_rate": 0.003,
+      "loss": 4.0433,
+      "step": 17793
+    },
+    {
+      "epoch": 0.17794,
+      "grad_norm": 1.1457363146513437,
+      "learning_rate": 0.003,
+      "loss": 4.0132,
+      "step": 17794
+    },
+    {
+      "epoch": 0.17795,
+      "grad_norm": 1.285982846103947,
+      "learning_rate": 0.003,
+      "loss": 4.0049,
+      "step": 17795
+    },
+    {
+      "epoch": 0.17796,
+      "grad_norm": 0.9674368897959768,
+      "learning_rate": 0.003,
+      "loss": 3.9859,
+      "step": 17796
+    },
+    {
+      "epoch": 0.17797,
+      "grad_norm": 1.3847835487943505,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 17797
+    },
+    {
+      "epoch": 0.17798,
+      "grad_norm": 1.388823923849772,
+      "learning_rate": 0.003,
+      "loss": 4.0173,
+      "step": 17798
+    },
+    {
+      "epoch": 0.17799,
+      "grad_norm": 1.1850443723939679,
+      "learning_rate": 0.003,
+      "loss": 4.0296,
+      "step": 17799
+    },
+    {
+      "epoch": 0.178,
+      "grad_norm": 1.239507130863008,
+      "learning_rate": 0.003,
+      "loss": 4.0096,
+      "step": 17800
+    },
+    {
+      "epoch": 0.17801,
+      "grad_norm": 1.1649600820146366,
+      "learning_rate": 0.003,
+      "loss": 3.9914,
+      "step": 17801
+    },
+    {
+      "epoch": 0.17802,
+      "grad_norm": 1.1564667995275404,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 17802
+    },
+    {
+      "epoch": 0.17803,
+      "grad_norm": 1.2422393675117376,
+      "learning_rate": 0.003,
+      "loss": 4.021,
+      "step": 17803
+    },
+    {
+      "epoch": 0.17804,
+      "grad_norm": 1.270593768355618,
+      "learning_rate": 0.003,
+      "loss": 4.0291,
+      "step": 17804
+    },
+    {
+      "epoch": 0.17805,
+      "grad_norm": 1.3871869687322365,
+      "learning_rate": 0.003,
+      "loss": 4.0239,
+      "step": 17805
+    },
+    {
+      "epoch": 0.17806,
+      "grad_norm": 1.2303533481624622,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 17806
+    },
+    {
+      "epoch": 0.17807,
+      "grad_norm": 1.3022480992793621,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 17807
+    },
+    {
+      "epoch": 0.17808,
+      "grad_norm": 1.0142472820069428,
+      "learning_rate": 0.003,
+      "loss": 3.9762,
+      "step": 17808
+    },
+    {
+      "epoch": 0.17809,
+      "grad_norm": 1.2972177297703724,
+      "learning_rate": 0.003,
+      "loss": 4.0282,
+      "step": 17809
+    },
+    {
+      "epoch": 0.1781,
+      "grad_norm": 1.0214519579537469,
+      "learning_rate": 0.003,
+      "loss": 3.9966,
+      "step": 17810
+    },
+    {
+      "epoch": 0.17811,
+      "grad_norm": 1.4056262049180404,
+      "learning_rate": 0.003,
+      "loss": 4.0242,
+      "step": 17811
+    },
+    {
+      "epoch": 0.17812,
+      "grad_norm": 0.9613461804577055,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 17812
+    },
+    {
+      "epoch": 0.17813,
+      "grad_norm": 1.3987265467581167,
+      "learning_rate": 0.003,
+      "loss": 4.0531,
+      "step": 17813
+    },
+    {
+      "epoch": 0.17814,
+      "grad_norm": 1.1919738807894389,
+      "learning_rate": 0.003,
+      "loss": 4.0307,
+      "step": 17814
+    },
+    {
+      "epoch": 0.17815,
+      "grad_norm": 1.3973972286087633,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 17815
+    },
+    {
+      "epoch": 0.17816,
+      "grad_norm": 1.0827310417582043,
+      "learning_rate": 0.003,
+      "loss": 3.9939,
+      "step": 17816
+    },
+    {
+      "epoch": 0.17817,
+      "grad_norm": 1.462587451628352,
+      "learning_rate": 0.003,
+      "loss": 4.0278,
+      "step": 17817
+    },
+    {
+      "epoch": 0.17818,
+      "grad_norm": 1.3370237425179567,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 17818
+    },
+    {
+      "epoch": 0.17819,
+      "grad_norm": 1.3710553095240827,
+      "learning_rate": 0.003,
+      "loss": 4.032,
+      "step": 17819
+    },
+    {
+      "epoch": 0.1782,
+      "grad_norm": 1.3646700731882089,
+      "learning_rate": 0.003,
+      "loss": 4.0418,
+      "step": 17820
+    },
+    {
+      "epoch": 0.17821,
+      "grad_norm": 1.2610373738435834,
+      "learning_rate": 0.003,
+      "loss": 4.0337,
+      "step": 17821
+    },
+    {
+      "epoch": 0.17822,
+      "grad_norm": 1.2254092006565505,
+      "learning_rate": 0.003,
+      "loss": 4.0025,
+      "step": 17822
+    },
+    {
+      "epoch": 0.17823,
+      "grad_norm": 1.4283800539534146,
+      "learning_rate": 0.003,
+      "loss": 3.999,
+      "step": 17823
+    },
+    {
+      "epoch": 0.17824,
+      "grad_norm": 1.1670433064995493,
+      "learning_rate": 0.003,
+      "loss": 3.9913,
+      "step": 17824
+    },
+    {
+      "epoch": 0.17825,
+      "grad_norm": 1.5167128526422318,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 17825
+    },
+    {
+      "epoch": 0.17826,
+      "grad_norm": 1.2188122274167377,
+      "learning_rate": 0.003,
+      "loss": 4.0186,
+      "step": 17826
+    },
+    {
+      "epoch": 0.17827,
+      "grad_norm": 1.300553278058531,
+      "learning_rate": 0.003,
+      "loss": 4.0258,
+      "step": 17827
+    },
+    {
+      "epoch": 0.17828,
+      "grad_norm": 1.0737184150438381,
+      "learning_rate": 0.003,
+      "loss": 4.0088,
+      "step": 17828
+    },
+    {
+      "epoch": 0.17829,
+      "grad_norm": 1.281079446383483,
+      "learning_rate": 0.003,
+      "loss": 4.019,
+      "step": 17829
+    },
+    {
+      "epoch": 0.1783,
+      "grad_norm": 1.2856173908121271,
+      "learning_rate": 0.003,
+      "loss": 3.9716,
+      "step": 17830
+    },
+    {
+      "epoch": 0.17831,
+      "grad_norm": 1.1935793636552723,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 17831
+    },
+    {
+      "epoch": 0.17832,
+      "grad_norm": 1.205178492606702,
+      "learning_rate": 0.003,
+      "loss": 4.0167,
+      "step": 17832
+    },
+    {
+      "epoch": 0.17833,
+      "grad_norm": 1.1944403357477134,
+      "learning_rate": 0.003,
+      "loss": 4.0001,
+      "step": 17833
+    },
+    {
+      "epoch": 0.17834,
+      "grad_norm": 1.3908171744977453,
+      "learning_rate": 0.003,
+      "loss": 4.0069,
+      "step": 17834
+    },
+    {
+      "epoch": 0.17835,
+      "grad_norm": 1.1405338280337045,
+      "learning_rate": 0.003,
+      "loss": 4.0223,
+      "step": 17835
+    },
+    {
+      "epoch": 0.17836,
+      "grad_norm": 1.6468845041136069,
+      "learning_rate": 0.003,
+      "loss": 4.0223,
+      "step": 17836
+    },
+    {
+      "epoch": 0.17837,
+      "grad_norm": 1.0256403167460948,
+      "learning_rate": 0.003,
+      "loss": 4.0005,
+      "step": 17837
+    },
+    {
+      "epoch": 0.17838,
+      "grad_norm": 1.7529827411633745,
+      "learning_rate": 0.003,
+      "loss": 4.0419,
+      "step": 17838
+    },
+    {
+      "epoch": 0.17839,
+      "grad_norm": 1.0343519324796442,
+      "learning_rate": 0.003,
+      "loss": 4.0409,
+      "step": 17839
+    },
+    {
+      "epoch": 0.1784,
+      "grad_norm": 1.3088197200383436,
+      "learning_rate": 0.003,
+      "loss": 4.0316,
+      "step": 17840
+    },
+    {
+      "epoch": 0.17841,
+      "grad_norm": 1.391166098418807,
+      "learning_rate": 0.003,
+      "loss": 4.0007,
+      "step": 17841
+    },
+    {
+      "epoch": 0.17842,
+      "grad_norm": 1.0313034724517778,
+      "learning_rate": 0.003,
+      "loss": 3.9847,
+      "step": 17842
+    },
+    {
+      "epoch": 0.17843,
+      "grad_norm": 1.5906309328849493,
+      "learning_rate": 0.003,
+      "loss": 3.9839,
+      "step": 17843
+    },
+    {
+      "epoch": 0.17844,
+      "grad_norm": 1.2887138037584314,
+      "learning_rate": 0.003,
+      "loss": 4.0575,
+      "step": 17844
+    },
+    {
+      "epoch": 0.17845,
+      "grad_norm": 1.3157215102744646,
+      "learning_rate": 0.003,
+      "loss": 4.0252,
+      "step": 17845
+    },
+    {
+      "epoch": 0.17846,
+      "grad_norm": 1.25794672180459,
+      "learning_rate": 0.003,
+      "loss": 4.0266,
+      "step": 17846
+    },
+    {
+      "epoch": 0.17847,
+      "grad_norm": 1.2592578115929949,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 17847
+    },
+    {
+      "epoch": 0.17848,
+      "grad_norm": 1.2275976027820696,
+      "learning_rate": 0.003,
+      "loss": 4.019,
+      "step": 17848
+    },
+    {
+      "epoch": 0.17849,
+      "grad_norm": 1.414525973723539,
+      "learning_rate": 0.003,
+      "loss": 4.0451,
+      "step": 17849
+    },
+    {
+      "epoch": 0.1785,
+      "grad_norm": 1.3275192594843719,
+      "learning_rate": 0.003,
+      "loss": 4.0398,
+      "step": 17850
+    },
+    {
+      "epoch": 0.17851,
+      "grad_norm": 1.1369183603220658,
+      "learning_rate": 0.003,
+      "loss": 4.0377,
+      "step": 17851
+    },
+    {
+      "epoch": 0.17852,
+      "grad_norm": 1.223063913334217,
+      "learning_rate": 0.003,
+      "loss": 4.0232,
+      "step": 17852
+    },
+    {
+      "epoch": 0.17853,
+      "grad_norm": 1.156306213694106,
+      "learning_rate": 0.003,
+      "loss": 4.0209,
+      "step": 17853
+    },
+    {
+      "epoch": 0.17854,
+      "grad_norm": 1.230513390835216,
+      "learning_rate": 0.003,
+      "loss": 4.0022,
+      "step": 17854
+    },
+    {
+      "epoch": 0.17855,
+      "grad_norm": 1.3467261543697027,
+      "learning_rate": 0.003,
+      "loss": 3.9993,
+      "step": 17855
+    },
+    {
+      "epoch": 0.17856,
+      "grad_norm": 1.4479018385666143,
+      "learning_rate": 0.003,
+      "loss": 4.0357,
+      "step": 17856
+    },
+    {
+      "epoch": 0.17857,
+      "grad_norm": 1.096494157687689,
+      "learning_rate": 0.003,
+      "loss": 4.0091,
+      "step": 17857
+    },
+    {
+      "epoch": 0.17858,
+      "grad_norm": 1.37154842305199,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 17858
+    },
+    {
+      "epoch": 0.17859,
+      "grad_norm": 1.1382470176153237,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 17859
+    },
+    {
+      "epoch": 0.1786,
+      "grad_norm": 1.4260573661969602,
+      "learning_rate": 0.003,
+      "loss": 3.9974,
+      "step": 17860
+    },
+    {
+      "epoch": 0.17861,
+      "grad_norm": 1.2749053697764088,
+      "learning_rate": 0.003,
+      "loss": 4.0437,
+      "step": 17861
+    },
+    {
+      "epoch": 0.17862,
+      "grad_norm": 1.2813180513464035,
+      "learning_rate": 0.003,
+      "loss": 4.0082,
+      "step": 17862
+    },
+    {
+      "epoch": 0.17863,
+      "grad_norm": 1.1378759447463387,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 17863
+    },
+    {
+      "epoch": 0.17864,
+      "grad_norm": 1.1581966288327354,
+      "learning_rate": 0.003,
+      "loss": 4.0352,
+      "step": 17864
+    },
+    {
+      "epoch": 0.17865,
+      "grad_norm": 1.2251666216073551,
+      "learning_rate": 0.003,
+      "loss": 3.9895,
+      "step": 17865
+    },
+    {
+      "epoch": 0.17866,
+      "grad_norm": 1.5888960876025273,
+      "learning_rate": 0.003,
+      "loss": 4.0163,
+      "step": 17866
+    },
+    {
+      "epoch": 0.17867,
+      "grad_norm": 1.2610834406615368,
+      "learning_rate": 0.003,
+      "loss": 4.0198,
+      "step": 17867
+    },
+    {
+      "epoch": 0.17868,
+      "grad_norm": 1.2981799021894602,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 17868
+    },
+    {
+      "epoch": 0.17869,
+      "grad_norm": 1.22439715024633,
+      "learning_rate": 0.003,
+      "loss": 4.021,
+      "step": 17869
+    },
+    {
+      "epoch": 0.1787,
+      "grad_norm": 1.285613960652768,
+      "learning_rate": 0.003,
+      "loss": 3.9984,
+      "step": 17870
+    },
+    {
+      "epoch": 0.17871,
+      "grad_norm": 1.39587116721041,
+      "learning_rate": 0.003,
+      "loss": 4.0082,
+      "step": 17871
+    },
+    {
+      "epoch": 0.17872,
+      "grad_norm": 1.0331956935241802,
+      "learning_rate": 0.003,
+      "loss": 3.9876,
+      "step": 17872
+    },
+    {
+      "epoch": 0.17873,
+      "grad_norm": 1.5222012103196498,
+      "learning_rate": 0.003,
+      "loss": 4.0029,
+      "step": 17873
+    },
+    {
+      "epoch": 0.17874,
+      "grad_norm": 1.109330647963323,
+      "learning_rate": 0.003,
+      "loss": 4.0021,
+      "step": 17874
+    },
+    {
+      "epoch": 0.17875,
+      "grad_norm": 1.3513786080210313,
+      "learning_rate": 0.003,
+      "loss": 4.0464,
+      "step": 17875
+    },
+    {
+      "epoch": 0.17876,
+      "grad_norm": 1.1377725636560931,
+      "learning_rate": 0.003,
+      "loss": 4.0255,
+      "step": 17876
+    },
+    {
+      "epoch": 0.17877,
+      "grad_norm": 1.1081239090394368,
+      "learning_rate": 0.003,
+      "loss": 3.9982,
+      "step": 17877
+    },
+    {
+      "epoch": 0.17878,
+      "grad_norm": 1.3504881207612687,
+      "learning_rate": 0.003,
+      "loss": 3.9843,
+      "step": 17878
+    },
+    {
+      "epoch": 0.17879,
+      "grad_norm": 1.2202818156011814,
+      "learning_rate": 0.003,
+      "loss": 4.0065,
+      "step": 17879
+    },
+    {
+      "epoch": 0.1788,
+      "grad_norm": 1.4659793559370045,
+      "learning_rate": 0.003,
+      "loss": 3.9965,
+      "step": 17880
+    },
+    {
+      "epoch": 0.17881,
+      "grad_norm": 0.9991861962378518,
+      "learning_rate": 0.003,
+      "loss": 4.015,
+      "step": 17881
+    },
+    {
+      "epoch": 0.17882,
+      "grad_norm": 1.2840130566269725,
+      "learning_rate": 0.003,
+      "loss": 3.9905,
+      "step": 17882
+    },
+    {
+      "epoch": 0.17883,
+      "grad_norm": 1.223462630513146,
+      "learning_rate": 0.003,
+      "loss": 4.008,
+      "step": 17883
+    },
+    {
+      "epoch": 0.17884,
+      "grad_norm": 1.1745549627144207,
+      "learning_rate": 0.003,
+      "loss": 3.9853,
+      "step": 17884
+    },
+    {
+      "epoch": 0.17885,
+      "grad_norm": 1.3288648291234721,
+      "learning_rate": 0.003,
+      "loss": 4.0132,
+      "step": 17885
+    },
+    {
+      "epoch": 0.17886,
+      "grad_norm": 1.2275248459800174,
+      "learning_rate": 0.003,
+      "loss": 4.0331,
+      "step": 17886
+    },
+    {
+      "epoch": 0.17887,
+      "grad_norm": 1.2840299146550205,
+      "learning_rate": 0.003,
+      "loss": 4.0052,
+      "step": 17887
+    },
+    {
+      "epoch": 0.17888,
+      "grad_norm": 1.1109153791822728,
+      "learning_rate": 0.003,
+      "loss": 4.0294,
+      "step": 17888
+    },
+    {
+      "epoch": 0.17889,
+      "grad_norm": 1.4154726301866394,
+      "learning_rate": 0.003,
+      "loss": 4.0361,
+      "step": 17889
+    },
+    {
+      "epoch": 0.1789,
+      "grad_norm": 1.3765358041628772,
+      "learning_rate": 0.003,
+      "loss": 4.0468,
+      "step": 17890
+    },
+    {
+      "epoch": 0.17891,
+      "grad_norm": 1.356781113514337,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 17891
+    },
+    {
+      "epoch": 0.17892,
+      "grad_norm": 1.2083142687013617,
+      "learning_rate": 0.003,
+      "loss": 4.0543,
+      "step": 17892
+    },
+    {
+      "epoch": 0.17893,
+      "grad_norm": 1.3886682337620795,
+      "learning_rate": 0.003,
+      "loss": 4.004,
+      "step": 17893
+    },
+    {
+      "epoch": 0.17894,
+      "grad_norm": 1.087255529710916,
+      "learning_rate": 0.003,
+      "loss": 4.0073,
+      "step": 17894
+    },
+    {
+      "epoch": 0.17895,
+      "grad_norm": 1.2379106089715426,
+      "learning_rate": 0.003,
+      "loss": 4.0422,
+      "step": 17895
+    },
+    {
+      "epoch": 0.17896,
+      "grad_norm": 1.329775546400975,
+      "learning_rate": 0.003,
+      "loss": 4.0322,
+      "step": 17896
+    },
+    {
+      "epoch": 0.17897,
+      "grad_norm": 1.2250169685520114,
+      "learning_rate": 0.003,
+      "loss": 4.031,
+      "step": 17897
+    },
+    {
+      "epoch": 0.17898,
+      "grad_norm": 1.2887533824006294,
+      "learning_rate": 0.003,
+      "loss": 3.9863,
+      "step": 17898
+    },
+    {
+      "epoch": 0.17899,
+      "grad_norm": 1.1142232587310008,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 17899
+    },
+    {
+      "epoch": 0.179,
+      "grad_norm": 1.5125094557705192,
+      "learning_rate": 0.003,
+      "loss": 4.0326,
+      "step": 17900
+    },
+    {
+      "epoch": 0.17901,
+      "grad_norm": 0.8959437293219716,
+      "learning_rate": 0.003,
+      "loss": 4.0153,
+      "step": 17901
+    },
+    {
+      "epoch": 0.17902,
+      "grad_norm": 1.481590620348304,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 17902
+    },
+    {
+      "epoch": 0.17903,
+      "grad_norm": 1.4513535134662827,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 17903
+    },
+    {
+      "epoch": 0.17904,
+      "grad_norm": 1.2684392163155676,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 17904
+    },
+    {
+      "epoch": 0.17905,
+      "grad_norm": 1.3435859991089862,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 17905
+    },
+    {
+      "epoch": 0.17906,
+      "grad_norm": 1.14005741793786,
+      "learning_rate": 0.003,
+      "loss": 3.9893,
+      "step": 17906
+    },
+    {
+      "epoch": 0.17907,
+      "grad_norm": 1.2647231228841296,
+      "learning_rate": 0.003,
+      "loss": 3.9744,
+      "step": 17907
+    },
+    {
+      "epoch": 0.17908,
+      "grad_norm": 1.0303130185661538,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 17908
+    },
+    {
+      "epoch": 0.17909,
+      "grad_norm": 1.3362370493898243,
+      "learning_rate": 0.003,
+      "loss": 4.0329,
+      "step": 17909
+    },
+    {
+      "epoch": 0.1791,
+      "grad_norm": 1.3376456562964527,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 17910
+    },
+    {
+      "epoch": 0.17911,
+      "grad_norm": 1.2894298745129902,
+      "learning_rate": 0.003,
+      "loss": 3.9998,
+      "step": 17911
+    },
+    {
+      "epoch": 0.17912,
+      "grad_norm": 1.1938509311080707,
+      "learning_rate": 0.003,
+      "loss": 3.9941,
+      "step": 17912
+    },
+    {
+      "epoch": 0.17913,
+      "grad_norm": 1.2953870550115025,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 17913
+    },
+    {
+      "epoch": 0.17914,
+      "grad_norm": 1.3673196371585163,
+      "learning_rate": 0.003,
+      "loss": 4.0019,
+      "step": 17914
+    },
+    {
+      "epoch": 0.17915,
+      "grad_norm": 1.2180323020321726,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 17915
+    },
+    {
+      "epoch": 0.17916,
+      "grad_norm": 1.335540244149917,
+      "learning_rate": 0.003,
+      "loss": 4.0228,
+      "step": 17916
+    },
+    {
+      "epoch": 0.17917,
+      "grad_norm": 1.1782735032392786,
+      "learning_rate": 0.003,
+      "loss": 4.0304,
+      "step": 17917
+    },
+    {
+      "epoch": 0.17918,
+      "grad_norm": 1.361398393966456,
+      "learning_rate": 0.003,
+      "loss": 3.9969,
+      "step": 17918
+    },
+    {
+      "epoch": 0.17919,
+      "grad_norm": 1.176221574671349,
+      "learning_rate": 0.003,
+      "loss": 3.9864,
+      "step": 17919
+    },
+    {
+      "epoch": 0.1792,
+      "grad_norm": 1.397010968398075,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 17920
+    },
+    {
+      "epoch": 0.17921,
+      "grad_norm": 1.156606009121643,
+      "learning_rate": 0.003,
+      "loss": 3.9949,
+      "step": 17921
+    },
+    {
+      "epoch": 0.17922,
+      "grad_norm": 1.32125183249408,
+      "learning_rate": 0.003,
+      "loss": 4.0135,
+      "step": 17922
+    },
+    {
+      "epoch": 0.17923,
+      "grad_norm": 1.1974488910447372,
+      "learning_rate": 0.003,
+      "loss": 4.005,
+      "step": 17923
+    },
+    {
+      "epoch": 0.17924,
+      "grad_norm": 1.4585490204785243,
+      "learning_rate": 0.003,
+      "loss": 4.0196,
+      "step": 17924
+    },
+    {
+      "epoch": 0.17925,
+      "grad_norm": 1.1094529111745237,
+      "learning_rate": 0.003,
+      "loss": 3.992,
+      "step": 17925
+    },
+    {
+      "epoch": 0.17926,
+      "grad_norm": 1.309982676010241,
+      "learning_rate": 0.003,
+      "loss": 3.99,
+      "step": 17926
+    },
+    {
+      "epoch": 0.17927,
+      "grad_norm": 1.1844851359692525,
+      "learning_rate": 0.003,
+      "loss": 4.0272,
+      "step": 17927
+    },
+    {
+      "epoch": 0.17928,
+      "grad_norm": 1.4815185736791534,
+      "learning_rate": 0.003,
+      "loss": 4.02,
+      "step": 17928
+    },
+    {
+      "epoch": 0.17929,
+      "grad_norm": 1.1605835647717957,
+      "learning_rate": 0.003,
+      "loss": 3.9999,
+      "step": 17929
+    },
+    {
+      "epoch": 0.1793,
+      "grad_norm": 1.320507640873421,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 17930
+    },
+    {
+      "epoch": 0.17931,
+      "grad_norm": 1.274046317213217,
+      "learning_rate": 0.003,
+      "loss": 4.004,
+      "step": 17931
+    },
+    {
+      "epoch": 0.17932,
+      "grad_norm": 1.2972089424586997,
+      "learning_rate": 0.003,
+      "loss": 4.0212,
+      "step": 17932
+    },
+    {
+      "epoch": 0.17933,
+      "grad_norm": 1.2441857571769512,
+      "learning_rate": 0.003,
+      "loss": 3.9953,
+      "step": 17933
+    },
+    {
+      "epoch": 0.17934,
+      "grad_norm": 1.2009046967225345,
+      "learning_rate": 0.003,
+      "loss": 4.0372,
+      "step": 17934
+    },
+    {
+      "epoch": 0.17935,
+      "grad_norm": 1.068183752609735,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 17935
+    },
+    {
+      "epoch": 0.17936,
+      "grad_norm": 1.3524304151950766,
+      "learning_rate": 0.003,
+      "loss": 4.0167,
+      "step": 17936
+    },
+    {
+      "epoch": 0.17937,
+      "grad_norm": 1.2006139512947491,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 17937
+    },
+    {
+      "epoch": 0.17938,
+      "grad_norm": 1.5279550747071753,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 17938
+    },
+    {
+      "epoch": 0.17939,
+      "grad_norm": 1.0707218523923578,
+      "learning_rate": 0.003,
+      "loss": 4.0285,
+      "step": 17939
+    },
+    {
+      "epoch": 0.1794,
+      "grad_norm": 1.470373557818703,
+      "learning_rate": 0.003,
+      "loss": 3.9954,
+      "step": 17940
+    },
+    {
+      "epoch": 0.17941,
+      "grad_norm": 1.283217845435159,
+      "learning_rate": 0.003,
+      "loss": 3.9956,
+      "step": 17941
+    },
+    {
+      "epoch": 0.17942,
+      "grad_norm": 1.2524743350909433,
+      "learning_rate": 0.003,
+      "loss": 3.9838,
+      "step": 17942
+    },
+    {
+      "epoch": 0.17943,
+      "grad_norm": 1.3631818977429888,
+      "learning_rate": 0.003,
+      "loss": 3.9896,
+      "step": 17943
+    },
+    {
+      "epoch": 0.17944,
+      "grad_norm": 1.198417068713717,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 17944
+    },
+    {
+      "epoch": 0.17945,
+      "grad_norm": 1.3364558732265843,
+      "learning_rate": 0.003,
+      "loss": 3.9825,
+      "step": 17945
+    },
+    {
+      "epoch": 0.17946,
+      "grad_norm": 1.1993189564378368,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 17946
+    },
+    {
+      "epoch": 0.17947,
+      "grad_norm": 1.2724891851218023,
+      "learning_rate": 0.003,
+      "loss": 3.9985,
+      "step": 17947
+    },
+    {
+      "epoch": 0.17948,
+      "grad_norm": 1.329903192112161,
+      "learning_rate": 0.003,
+      "loss": 4.0003,
+      "step": 17948
+    },
+    {
+      "epoch": 0.17949,
+      "grad_norm": 1.2493252737844187,
+      "learning_rate": 0.003,
+      "loss": 4.0149,
+      "step": 17949
+    },
+    {
+      "epoch": 0.1795,
+      "grad_norm": 1.4258413169140705,
+      "learning_rate": 0.003,
+      "loss": 4.031,
+      "step": 17950
+    },
+    {
+      "epoch": 0.17951,
+      "grad_norm": 1.0947045287903696,
+      "learning_rate": 0.003,
+      "loss": 3.9967,
+      "step": 17951
+    },
+    {
+      "epoch": 0.17952,
+      "grad_norm": 1.2386797285364355,
+      "learning_rate": 0.003,
+      "loss": 3.9796,
+      "step": 17952
+    },
+    {
+      "epoch": 0.17953,
+      "grad_norm": 1.14594781761,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 17953
+    },
+    {
+      "epoch": 0.17954,
+      "grad_norm": 1.297250203912752,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 17954
+    },
+    {
+      "epoch": 0.17955,
+      "grad_norm": 1.4061598745408204,
+      "learning_rate": 0.003,
+      "loss": 3.9965,
+      "step": 17955
+    },
+    {
+      "epoch": 0.17956,
+      "grad_norm": 1.1411124734017042,
+      "learning_rate": 0.003,
+      "loss": 3.9884,
+      "step": 17956
+    },
+    {
+      "epoch": 0.17957,
+      "grad_norm": 1.4739424177374514,
+      "learning_rate": 0.003,
+      "loss": 3.9869,
+      "step": 17957
+    },
+    {
+      "epoch": 0.17958,
+      "grad_norm": 1.1333136046265506,
+      "learning_rate": 0.003,
+      "loss": 4.003,
+      "step": 17958
+    },
+    {
+      "epoch": 0.17959,
+      "grad_norm": 1.3103038254148236,
+      "learning_rate": 0.003,
+      "loss": 4.0105,
+      "step": 17959
+    },
+    {
+      "epoch": 0.1796,
+      "grad_norm": 1.119448017728659,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 17960
+    },
+    {
+      "epoch": 0.17961,
+      "grad_norm": 1.2547630506354666,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 17961
+    },
+    {
+      "epoch": 0.17962,
+      "grad_norm": 1.2326847898269968,
+      "learning_rate": 0.003,
+      "loss": 3.9945,
+      "step": 17962
+    },
+    {
+      "epoch": 0.17963,
+      "grad_norm": 1.3243312318037208,
+      "learning_rate": 0.003,
+      "loss": 3.9983,
+      "step": 17963
+    },
+    {
+      "epoch": 0.17964,
+      "grad_norm": 1.2676590854248495,
+      "learning_rate": 0.003,
+      "loss": 4.0142,
+      "step": 17964
+    },
+    {
+      "epoch": 0.17965,
+      "grad_norm": 1.144462546866322,
+      "learning_rate": 0.003,
+      "loss": 3.9939,
+      "step": 17965
+    },
+    {
+      "epoch": 0.17966,
+      "grad_norm": 1.316130726919737,
+      "learning_rate": 0.003,
+      "loss": 3.9773,
+      "step": 17966
+    },
+    {
+      "epoch": 0.17967,
+      "grad_norm": 1.2412240478500736,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 17967
+    },
+    {
+      "epoch": 0.17968,
+      "grad_norm": 1.2352631669776275,
+      "learning_rate": 0.003,
+      "loss": 4.0031,
+      "step": 17968
+    },
+    {
+      "epoch": 0.17969,
+      "grad_norm": 1.1924883625361797,
+      "learning_rate": 0.003,
+      "loss": 4.0069,
+      "step": 17969
+    },
+    {
+      "epoch": 0.1797,
+      "grad_norm": 1.1305434477854719,
+      "learning_rate": 0.003,
+      "loss": 4.0017,
+      "step": 17970
+    },
+    {
+      "epoch": 0.17971,
+      "grad_norm": 1.511192269977118,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 17971
+    },
+    {
+      "epoch": 0.17972,
+      "grad_norm": 1.2017320818124952,
+      "learning_rate": 0.003,
+      "loss": 4.0393,
+      "step": 17972
+    },
+    {
+      "epoch": 0.17973,
+      "grad_norm": 1.4131206724996497,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 17973
+    },
+    {
+      "epoch": 0.17974,
+      "grad_norm": 1.1541400742377594,
+      "learning_rate": 0.003,
+      "loss": 3.9713,
+      "step": 17974
+    },
+    {
+      "epoch": 0.17975,
+      "grad_norm": 1.5212433825381124,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 17975
+    },
+    {
+      "epoch": 0.17976,
+      "grad_norm": 1.0915852560086072,
+      "learning_rate": 0.003,
+      "loss": 3.9891,
+      "step": 17976
+    },
+    {
+      "epoch": 0.17977,
+      "grad_norm": 1.4541941510471958,
+      "learning_rate": 0.003,
+      "loss": 4.0058,
+      "step": 17977
+    },
+    {
+      "epoch": 0.17978,
+      "grad_norm": 1.1637687489354096,
+      "learning_rate": 0.003,
+      "loss": 4.0157,
+      "step": 17978
+    },
+    {
+      "epoch": 0.17979,
+      "grad_norm": 1.6518764770465473,
+      "learning_rate": 0.003,
+      "loss": 4.0051,
+      "step": 17979
+    },
+    {
+      "epoch": 0.1798,
+      "grad_norm": 1.226657411711691,
+      "learning_rate": 0.003,
+      "loss": 4.0,
+      "step": 17980
+    },
+    {
+      "epoch": 0.17981,
+      "grad_norm": 1.4672932830010585,
+      "learning_rate": 0.003,
+      "loss": 3.9461,
+      "step": 17981
+    },
+    {
+      "epoch": 0.17982,
+      "grad_norm": 1.0789539113969855,
+      "learning_rate": 0.003,
+      "loss": 4.0186,
+      "step": 17982
+    },
+    {
+      "epoch": 0.17983,
+      "grad_norm": 1.3077465429252888,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 17983
+    },
+    {
+      "epoch": 0.17984,
+      "grad_norm": 1.3217963244269073,
+      "learning_rate": 0.003,
+      "loss": 4.0213,
+      "step": 17984
+    },
+    {
+      "epoch": 0.17985,
+      "grad_norm": 1.4177608416518557,
+      "learning_rate": 0.003,
+      "loss": 4.0024,
+      "step": 17985
+    },
+    {
+      "epoch": 0.17986,
+      "grad_norm": 1.2630185518715435,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 17986
+    },
+    {
+      "epoch": 0.17987,
+      "grad_norm": 1.1224916779924532,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 17987
+    },
+    {
+      "epoch": 0.17988,
+      "grad_norm": 1.223872692574195,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 17988
+    },
+    {
+      "epoch": 0.17989,
+      "grad_norm": 1.1846536337950013,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 17989
+    },
+    {
+      "epoch": 0.1799,
+      "grad_norm": 1.3396779385326112,
+      "learning_rate": 0.003,
+      "loss": 4.0426,
+      "step": 17990
+    },
+    {
+      "epoch": 0.17991,
+      "grad_norm": 1.0887540493867556,
+      "learning_rate": 0.003,
+      "loss": 4.0172,
+      "step": 17991
+    },
+    {
+      "epoch": 0.17992,
+      "grad_norm": 1.2346943418605258,
+      "learning_rate": 0.003,
+      "loss": 4.0104,
+      "step": 17992
+    },
+    {
+      "epoch": 0.17993,
+      "grad_norm": 1.4320850253519977,
+      "learning_rate": 0.003,
+      "loss": 4.0139,
+      "step": 17993
+    },
+    {
+      "epoch": 0.17994,
+      "grad_norm": 1.218821459511451,
+      "learning_rate": 0.003,
+      "loss": 3.9983,
+      "step": 17994
+    },
+    {
+      "epoch": 0.17995,
+      "grad_norm": 1.3859837355781857,
+      "learning_rate": 0.003,
+      "loss": 4.0485,
+      "step": 17995
+    },
+    {
+      "epoch": 0.17996,
+      "grad_norm": 1.1344698861221325,
+      "learning_rate": 0.003,
+      "loss": 3.9897,
+      "step": 17996
+    },
+    {
+      "epoch": 0.17997,
+      "grad_norm": 1.3849875752526728,
+      "learning_rate": 0.003,
+      "loss": 4.0229,
+      "step": 17997
+    },
+    {
+      "epoch": 0.17998,
+      "grad_norm": 1.251398295605249,
+      "learning_rate": 0.003,
+      "loss": 3.9966,
+      "step": 17998
+    },
+    {
+      "epoch": 0.17999,
+      "grad_norm": 1.2819503563394226,
+      "learning_rate": 0.003,
+      "loss": 4.0209,
+      "step": 17999
+    },
+    {
+      "epoch": 0.18,
+      "grad_norm": 1.3741654746676373,
+      "learning_rate": 0.003,
+      "loss": 4.0121,
+      "step": 18000
+    },
+    {
+      "epoch": 0.18001,
+      "grad_norm": 1.1933635475479498,
+      "learning_rate": 0.003,
+      "loss": 3.9906,
+      "step": 18001
+    },
+    {
+      "epoch": 0.18002,
+      "grad_norm": 1.3098748972850278,
+      "learning_rate": 0.003,
+      "loss": 3.9595,
+      "step": 18002
+    },
+    {
+      "epoch": 0.18003,
+      "grad_norm": 1.212517051786349,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 18003
+    },
+    {
+      "epoch": 0.18004,
+      "grad_norm": 1.392106308722888,
+      "learning_rate": 0.003,
+      "loss": 4.0194,
+      "step": 18004
+    },
+    {
+      "epoch": 0.18005,
+      "grad_norm": 1.424687540303722,
+      "learning_rate": 0.003,
+      "loss": 4.0167,
+      "step": 18005
+    },
+    {
+      "epoch": 0.18006,
+      "grad_norm": 1.246009503653905,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 18006
+    },
+    {
+      "epoch": 0.18007,
+      "grad_norm": 1.099120441540404,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 18007
+    },
+    {
+      "epoch": 0.18008,
+      "grad_norm": 1.4627704259168404,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 18008
+    },
+    {
+      "epoch": 0.18009,
+      "grad_norm": 1.224397106357738,
+      "learning_rate": 0.003,
+      "loss": 3.9781,
+      "step": 18009
+    },
+    {
+      "epoch": 0.1801,
+      "grad_norm": 1.472982585937009,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 18010
+    },
+    {
+      "epoch": 0.18011,
+      "grad_norm": 1.3218278772814305,
+      "learning_rate": 0.003,
+      "loss": 4.0091,
+      "step": 18011
+    },
+    {
+      "epoch": 0.18012,
+      "grad_norm": 1.2015626900018146,
+      "learning_rate": 0.003,
+      "loss": 4.0144,
+      "step": 18012
+    },
+    {
+      "epoch": 0.18013,
+      "grad_norm": 1.3141744379305316,
+      "learning_rate": 0.003,
+      "loss": 4.0249,
+      "step": 18013
+    },
+    {
+      "epoch": 0.18014,
+      "grad_norm": 1.305760597878548,
+      "learning_rate": 0.003,
+      "loss": 3.9986,
+      "step": 18014
+    },
+    {
+      "epoch": 0.18015,
+      "grad_norm": 1.2904157889554446,
+      "learning_rate": 0.003,
+      "loss": 4.0572,
+      "step": 18015
+    },
+    {
+      "epoch": 0.18016,
+      "grad_norm": 1.0936669246650228,
+      "learning_rate": 0.003,
+      "loss": 4.016,
+      "step": 18016
+    },
+    {
+      "epoch": 0.18017,
+      "grad_norm": 1.4724170169853559,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 18017
+    },
+    {
+      "epoch": 0.18018,
+      "grad_norm": 1.1737041272683193,
+      "learning_rate": 0.003,
+      "loss": 4.0025,
+      "step": 18018
+    },
+    {
+      "epoch": 0.18019,
+      "grad_norm": 1.29288969785492,
+      "learning_rate": 0.003,
+      "loss": 3.9951,
+      "step": 18019
+    },
+    {
+      "epoch": 0.1802,
+      "grad_norm": 1.0685343810456742,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 18020
+    },
+    {
+      "epoch": 0.18021,
+      "grad_norm": 1.3399528333597222,
+      "learning_rate": 0.003,
+      "loss": 4.0394,
+      "step": 18021
+    },
+    {
+      "epoch": 0.18022,
+      "grad_norm": 1.2301345998275353,
+      "learning_rate": 0.003,
+      "loss": 4.0001,
+      "step": 18022
+    },
+    {
+      "epoch": 0.18023,
+      "grad_norm": 1.4377926684604674,
+      "learning_rate": 0.003,
+      "loss": 3.9959,
+      "step": 18023
+    },
+    {
+      "epoch": 0.18024,
+      "grad_norm": 1.4485503664499548,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 18024
+    },
+    {
+      "epoch": 0.18025,
+      "grad_norm": 1.108201695546443,
+      "learning_rate": 0.003,
+      "loss": 3.9835,
+      "step": 18025
+    },
+    {
+      "epoch": 0.18026,
+      "grad_norm": 1.2249896527161086,
+      "learning_rate": 0.003,
+      "loss": 4.0281,
+      "step": 18026
+    },
+    {
+      "epoch": 0.18027,
+      "grad_norm": 1.3631836057201774,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 18027
+    },
+    {
+      "epoch": 0.18028,
+      "grad_norm": 1.2128407493827715,
+      "learning_rate": 0.003,
+      "loss": 3.9833,
+      "step": 18028
+    },
+    {
+      "epoch": 0.18029,
+      "grad_norm": 1.4377792927247532,
+      "learning_rate": 0.003,
+      "loss": 4.0236,
+      "step": 18029
+    },
+    {
+      "epoch": 0.1803,
+      "grad_norm": 1.1196044734687098,
+      "learning_rate": 0.003,
+      "loss": 4.0367,
+      "step": 18030
+    },
+    {
+      "epoch": 0.18031,
+      "grad_norm": 1.3337921773230739,
+      "learning_rate": 0.003,
+      "loss": 3.9978,
+      "step": 18031
+    },
+    {
+      "epoch": 0.18032,
+      "grad_norm": 1.2361892568593928,
+      "learning_rate": 0.003,
+      "loss": 4.0715,
+      "step": 18032
+    },
+    {
+      "epoch": 0.18033,
+      "grad_norm": 1.2633246942423886,
+      "learning_rate": 0.003,
+      "loss": 4.0005,
+      "step": 18033
+    },
+    {
+      "epoch": 0.18034,
+      "grad_norm": 1.284347096442261,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 18034
+    },
+    {
+      "epoch": 0.18035,
+      "grad_norm": 1.2429609759207414,
+      "learning_rate": 0.003,
+      "loss": 4.0065,
+      "step": 18035
+    },
+    {
+      "epoch": 0.18036,
+      "grad_norm": 1.3922648571191765,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 18036
+    },
+    {
+      "epoch": 0.18037,
+      "grad_norm": 1.1520110539034873,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 18037
+    },
+    {
+      "epoch": 0.18038,
+      "grad_norm": 1.4640431772660716,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 18038
+    },
+    {
+      "epoch": 0.18039,
+      "grad_norm": 1.194106552299273,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 18039
+    },
+    {
+      "epoch": 0.1804,
+      "grad_norm": 1.4627313146103296,
+      "learning_rate": 0.003,
+      "loss": 4.0057,
+      "step": 18040
+    },
+    {
+      "epoch": 0.18041,
+      "grad_norm": 1.1027984597618095,
+      "learning_rate": 0.003,
+      "loss": 4.0031,
+      "step": 18041
+    },
+    {
+      "epoch": 0.18042,
+      "grad_norm": 1.3924257301363576,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 18042
+    },
+    {
+      "epoch": 0.18043,
+      "grad_norm": 1.1401766419096124,
+      "learning_rate": 0.003,
+      "loss": 4.0157,
+      "step": 18043
+    },
+    {
+      "epoch": 0.18044,
+      "grad_norm": 1.2731268679660979,
+      "learning_rate": 0.003,
+      "loss": 4.0506,
+      "step": 18044
+    },
+    {
+      "epoch": 0.18045,
+      "grad_norm": 1.2319214886258891,
+      "learning_rate": 0.003,
+      "loss": 4.0092,
+      "step": 18045
+    },
+    {
+      "epoch": 0.18046,
+      "grad_norm": 1.1969308560875676,
+      "learning_rate": 0.003,
+      "loss": 3.9883,
+      "step": 18046
+    },
+    {
+      "epoch": 0.18047,
+      "grad_norm": 1.1623865443335577,
+      "learning_rate": 0.003,
+      "loss": 4.0089,
+      "step": 18047
+    },
+    {
+      "epoch": 0.18048,
+      "grad_norm": 1.347600280707264,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 18048
+    },
+    {
+      "epoch": 0.18049,
+      "grad_norm": 1.3095982082710382,
+      "learning_rate": 0.003,
+      "loss": 4.0357,
+      "step": 18049
+    },
+    {
+      "epoch": 0.1805,
+      "grad_norm": 1.4009570741734567,
+      "learning_rate": 0.003,
+      "loss": 4.0186,
+      "step": 18050
+    },
+    {
+      "epoch": 0.18051,
+      "grad_norm": 1.350897678706278,
+      "learning_rate": 0.003,
+      "loss": 4.0162,
+      "step": 18051
+    },
+    {
+      "epoch": 0.18052,
+      "grad_norm": 1.1555449767293169,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 18052
+    },
+    {
+      "epoch": 0.18053,
+      "grad_norm": 1.201864879613062,
+      "learning_rate": 0.003,
+      "loss": 4.0041,
+      "step": 18053
+    },
+    {
+      "epoch": 0.18054,
+      "grad_norm": 1.3632552345487847,
+      "learning_rate": 0.003,
+      "loss": 3.9903,
+      "step": 18054
+    },
+    {
+      "epoch": 0.18055,
+      "grad_norm": 1.3748051053858183,
+      "learning_rate": 0.003,
+      "loss": 4.0294,
+      "step": 18055
+    },
+    {
+      "epoch": 0.18056,
+      "grad_norm": 1.368423119049279,
+      "learning_rate": 0.003,
+      "loss": 4.0017,
+      "step": 18056
+    },
+    {
+      "epoch": 0.18057,
+      "grad_norm": 1.4737393844383795,
+      "learning_rate": 0.003,
+      "loss": 4.0078,
+      "step": 18057
+    },
+    {
+      "epoch": 0.18058,
+      "grad_norm": 1.180383393312585,
+      "learning_rate": 0.003,
+      "loss": 4.0017,
+      "step": 18058
+    },
+    {
+      "epoch": 0.18059,
+      "grad_norm": 1.220989213819937,
+      "learning_rate": 0.003,
+      "loss": 3.9986,
+      "step": 18059
+    },
+    {
+      "epoch": 0.1806,
+      "grad_norm": 1.3790859140402394,
+      "learning_rate": 0.003,
+      "loss": 3.9817,
+      "step": 18060
+    },
+    {
+      "epoch": 0.18061,
+      "grad_norm": 1.3230364801202141,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 18061
+    },
+    {
+      "epoch": 0.18062,
+      "grad_norm": 1.2205140368731702,
+      "learning_rate": 0.003,
+      "loss": 4.0027,
+      "step": 18062
+    },
+    {
+      "epoch": 0.18063,
+      "grad_norm": 1.1794690741742826,
+      "learning_rate": 0.003,
+      "loss": 3.9989,
+      "step": 18063
+    },
+    {
+      "epoch": 0.18064,
+      "grad_norm": 1.3874867718840194,
+      "learning_rate": 0.003,
+      "loss": 3.9837,
+      "step": 18064
+    },
+    {
+      "epoch": 0.18065,
+      "grad_norm": 1.2158102941757782,
+      "learning_rate": 0.003,
+      "loss": 4.0115,
+      "step": 18065
+    },
+    {
+      "epoch": 0.18066,
+      "grad_norm": 1.3971232563203826,
+      "learning_rate": 0.003,
+      "loss": 3.997,
+      "step": 18066
+    },
+    {
+      "epoch": 0.18067,
+      "grad_norm": 1.1537258202681013,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 18067
+    },
+    {
+      "epoch": 0.18068,
+      "grad_norm": 1.317011717519727,
+      "learning_rate": 0.003,
+      "loss": 3.9884,
+      "step": 18068
+    },
+    {
+      "epoch": 0.18069,
+      "grad_norm": 1.4116033534763732,
+      "learning_rate": 0.003,
+      "loss": 4.0084,
+      "step": 18069
+    },
+    {
+      "epoch": 0.1807,
+      "grad_norm": 1.2352646362071453,
+      "learning_rate": 0.003,
+      "loss": 3.9878,
+      "step": 18070
+    },
+    {
+      "epoch": 0.18071,
+      "grad_norm": 1.0614122175823477,
+      "learning_rate": 0.003,
+      "loss": 4.0009,
+      "step": 18071
+    },
+    {
+      "epoch": 0.18072,
+      "grad_norm": 1.4037585678071596,
+      "learning_rate": 0.003,
+      "loss": 4.0273,
+      "step": 18072
+    },
+    {
+      "epoch": 0.18073,
+      "grad_norm": 1.2619816077975499,
+      "learning_rate": 0.003,
+      "loss": 4.0497,
+      "step": 18073
+    },
+    {
+      "epoch": 0.18074,
+      "grad_norm": 1.279158743537198,
+      "learning_rate": 0.003,
+      "loss": 4.0162,
+      "step": 18074
+    },
+    {
+      "epoch": 0.18075,
+      "grad_norm": 1.0144218595931895,
+      "learning_rate": 0.003,
+      "loss": 4.0065,
+      "step": 18075
+    },
+    {
+      "epoch": 0.18076,
+      "grad_norm": 1.3932923689905843,
+      "learning_rate": 0.003,
+      "loss": 4.0129,
+      "step": 18076
+    },
+    {
+      "epoch": 0.18077,
+      "grad_norm": 1.2381797948281783,
+      "learning_rate": 0.003,
+      "loss": 4.0347,
+      "step": 18077
+    },
+    {
+      "epoch": 0.18078,
+      "grad_norm": 1.2064732572389434,
+      "learning_rate": 0.003,
+      "loss": 3.9913,
+      "step": 18078
+    },
+    {
+      "epoch": 0.18079,
+      "grad_norm": 1.2309520334545374,
+      "learning_rate": 0.003,
+      "loss": 4.007,
+      "step": 18079
+    },
+    {
+      "epoch": 0.1808,
+      "grad_norm": 1.4472036453649932,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 18080
+    },
+    {
+      "epoch": 0.18081,
+      "grad_norm": 1.0872125792142273,
+      "learning_rate": 0.003,
+      "loss": 4.0417,
+      "step": 18081
+    },
+    {
+      "epoch": 0.18082,
+      "grad_norm": 1.7513922460165103,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 18082
+    },
+    {
+      "epoch": 0.18083,
+      "grad_norm": 0.901507635320083,
+      "learning_rate": 0.003,
+      "loss": 4.0079,
+      "step": 18083
+    },
+    {
+      "epoch": 0.18084,
+      "grad_norm": 1.1722118986780032,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 18084
+    },
+    {
+      "epoch": 0.18085,
+      "grad_norm": 1.3065034468877128,
+      "learning_rate": 0.003,
+      "loss": 3.9923,
+      "step": 18085
+    },
+    {
+      "epoch": 0.18086,
+      "grad_norm": 1.3000990175300853,
+      "learning_rate": 0.003,
+      "loss": 4.0273,
+      "step": 18086
+    },
+    {
+      "epoch": 0.18087,
+      "grad_norm": 1.176289481273397,
+      "learning_rate": 0.003,
+      "loss": 4.0218,
+      "step": 18087
+    },
+    {
+      "epoch": 0.18088,
+      "grad_norm": 1.474747785249665,
+      "learning_rate": 0.003,
+      "loss": 4.0291,
+      "step": 18088
+    },
+    {
+      "epoch": 0.18089,
+      "grad_norm": 1.15869905445471,
+      "learning_rate": 0.003,
+      "loss": 3.9927,
+      "step": 18089
+    },
+    {
+      "epoch": 0.1809,
+      "grad_norm": 1.531752376162567,
+      "learning_rate": 0.003,
+      "loss": 4.0416,
+      "step": 18090
+    },
+    {
+      "epoch": 0.18091,
+      "grad_norm": 1.3081067249732787,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 18091
+    },
+    {
+      "epoch": 0.18092,
+      "grad_norm": 1.1556575754200102,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 18092
+    },
+    {
+      "epoch": 0.18093,
+      "grad_norm": 1.311386628742779,
+      "learning_rate": 0.003,
+      "loss": 4.0368,
+      "step": 18093
+    },
+    {
+      "epoch": 0.18094,
+      "grad_norm": 1.2743429778379944,
+      "learning_rate": 0.003,
+      "loss": 3.997,
+      "step": 18094
+    },
+    {
+      "epoch": 0.18095,
+      "grad_norm": 1.1268201413256642,
+      "learning_rate": 0.003,
+      "loss": 3.9803,
+      "step": 18095
+    },
+    {
+      "epoch": 0.18096,
+      "grad_norm": 1.533802254578135,
+      "learning_rate": 0.003,
+      "loss": 4.0048,
+      "step": 18096
+    },
+    {
+      "epoch": 0.18097,
+      "grad_norm": 1.143733177519895,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 18097
+    },
+    {
+      "epoch": 0.18098,
+      "grad_norm": 1.6237064413171671,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 18098
+    },
+    {
+      "epoch": 0.18099,
+      "grad_norm": 1.2066381421671812,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 18099
+    },
+    {
+      "epoch": 0.181,
+      "grad_norm": 1.3431374559869123,
+      "learning_rate": 0.003,
+      "loss": 4.0331,
+      "step": 18100
+    },
+    {
+      "epoch": 0.18101,
+      "grad_norm": 1.202812996875956,
+      "learning_rate": 0.003,
+      "loss": 4.0065,
+      "step": 18101
+    },
+    {
+      "epoch": 0.18102,
+      "grad_norm": 1.1698554573987243,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 18102
+    },
+    {
+      "epoch": 0.18103,
+      "grad_norm": 1.2428386400393254,
+      "learning_rate": 0.003,
+      "loss": 4.0238,
+      "step": 18103
+    },
+    {
+      "epoch": 0.18104,
+      "grad_norm": 1.1208181772818042,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 18104
+    },
+    {
+      "epoch": 0.18105,
+      "grad_norm": 1.4972398021134263,
+      "learning_rate": 0.003,
+      "loss": 4.0391,
+      "step": 18105
+    },
+    {
+      "epoch": 0.18106,
+      "grad_norm": 1.115878346566044,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 18106
+    },
+    {
+      "epoch": 0.18107,
+      "grad_norm": 1.4277133733715102,
+      "learning_rate": 0.003,
+      "loss": 3.9973,
+      "step": 18107
+    },
+    {
+      "epoch": 0.18108,
+      "grad_norm": 1.4102164927486251,
+      "learning_rate": 0.003,
+      "loss": 4.0307,
+      "step": 18108
+    },
+    {
+      "epoch": 0.18109,
+      "grad_norm": 1.1152660397268048,
+      "learning_rate": 0.003,
+      "loss": 4.0033,
+      "step": 18109
+    },
+    {
+      "epoch": 0.1811,
+      "grad_norm": 1.2872868106598139,
+      "learning_rate": 0.003,
+      "loss": 4.0479,
+      "step": 18110
+    },
+    {
+      "epoch": 0.18111,
+      "grad_norm": 1.394418514138197,
+      "learning_rate": 0.003,
+      "loss": 3.9995,
+      "step": 18111
+    },
+    {
+      "epoch": 0.18112,
+      "grad_norm": 1.1602986408748321,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 18112
+    },
+    {
+      "epoch": 0.18113,
+      "grad_norm": 1.405413427856654,
+      "learning_rate": 0.003,
+      "loss": 4.0481,
+      "step": 18113
+    },
+    {
+      "epoch": 0.18114,
+      "grad_norm": 1.2248450601215533,
+      "learning_rate": 0.003,
+      "loss": 4.0224,
+      "step": 18114
+    },
+    {
+      "epoch": 0.18115,
+      "grad_norm": 1.3294455281217434,
+      "learning_rate": 0.003,
+      "loss": 3.9978,
+      "step": 18115
+    },
+    {
+      "epoch": 0.18116,
+      "grad_norm": 1.4136512393647949,
+      "learning_rate": 0.003,
+      "loss": 4.053,
+      "step": 18116
+    },
+    {
+      "epoch": 0.18117,
+      "grad_norm": 1.115576619747742,
+      "learning_rate": 0.003,
+      "loss": 4.0358,
+      "step": 18117
+    },
+    {
+      "epoch": 0.18118,
+      "grad_norm": 1.4550200396045296,
+      "learning_rate": 0.003,
+      "loss": 4.0128,
+      "step": 18118
+    },
+    {
+      "epoch": 0.18119,
+      "grad_norm": 1.178333624432399,
+      "learning_rate": 0.003,
+      "loss": 4.0135,
+      "step": 18119
+    },
+    {
+      "epoch": 0.1812,
+      "grad_norm": 1.53289204822824,
+      "learning_rate": 0.003,
+      "loss": 4.0079,
+      "step": 18120
+    },
+    {
+      "epoch": 0.18121,
+      "grad_norm": 1.165712719335234,
+      "learning_rate": 0.003,
+      "loss": 3.99,
+      "step": 18121
+    },
+    {
+      "epoch": 0.18122,
+      "grad_norm": 1.1536296849685932,
+      "learning_rate": 0.003,
+      "loss": 3.9996,
+      "step": 18122
+    },
+    {
+      "epoch": 0.18123,
+      "grad_norm": 1.3061571896655713,
+      "learning_rate": 0.003,
+      "loss": 3.9758,
+      "step": 18123
+    },
+    {
+      "epoch": 0.18124,
+      "grad_norm": 1.2293773622678938,
+      "learning_rate": 0.003,
+      "loss": 3.9831,
+      "step": 18124
+    },
+    {
+      "epoch": 0.18125,
+      "grad_norm": 1.2751405136705993,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 18125
+    },
+    {
+      "epoch": 0.18126,
+      "grad_norm": 1.108491319706021,
+      "learning_rate": 0.003,
+      "loss": 4.0157,
+      "step": 18126
+    },
+    {
+      "epoch": 0.18127,
+      "grad_norm": 1.3837138363424657,
+      "learning_rate": 0.003,
+      "loss": 4.0382,
+      "step": 18127
+    },
+    {
+      "epoch": 0.18128,
+      "grad_norm": 0.9960471378550317,
+      "learning_rate": 0.003,
+      "loss": 3.9845,
+      "step": 18128
+    },
+    {
+      "epoch": 0.18129,
+      "grad_norm": 1.5916971542444212,
+      "learning_rate": 0.003,
+      "loss": 3.9985,
+      "step": 18129
+    },
+    {
+      "epoch": 0.1813,
+      "grad_norm": 1.058993336455924,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 18130
+    },
+    {
+      "epoch": 0.18131,
+      "grad_norm": 1.5209413438626223,
+      "learning_rate": 0.003,
+      "loss": 4.022,
+      "step": 18131
+    },
+    {
+      "epoch": 0.18132,
+      "grad_norm": 1.1303170062062597,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 18132
+    },
+    {
+      "epoch": 0.18133,
+      "grad_norm": 1.5315039018604342,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 18133
+    },
+    {
+      "epoch": 0.18134,
+      "grad_norm": 1.1804779996899288,
+      "learning_rate": 0.003,
+      "loss": 4.0419,
+      "step": 18134
+    },
+    {
+      "epoch": 0.18135,
+      "grad_norm": 1.3623009834521842,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 18135
+    },
+    {
+      "epoch": 0.18136,
+      "grad_norm": 1.184765040963404,
+      "learning_rate": 0.003,
+      "loss": 4.0105,
+      "step": 18136
+    },
+    {
+      "epoch": 0.18137,
+      "grad_norm": 1.248538813627692,
+      "learning_rate": 0.003,
+      "loss": 4.0417,
+      "step": 18137
+    },
+    {
+      "epoch": 0.18138,
+      "grad_norm": 1.1882430006443279,
+      "learning_rate": 0.003,
+      "loss": 4.0115,
+      "step": 18138
+    },
+    {
+      "epoch": 0.18139,
+      "grad_norm": 1.2704498041616852,
+      "learning_rate": 0.003,
+      "loss": 4.0121,
+      "step": 18139
+    },
+    {
+      "epoch": 0.1814,
+      "grad_norm": 1.3659019087493167,
+      "learning_rate": 0.003,
+      "loss": 4.0107,
+      "step": 18140
+    },
+    {
+      "epoch": 0.18141,
+      "grad_norm": 1.2346528099879523,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 18141
+    },
+    {
+      "epoch": 0.18142,
+      "grad_norm": 1.3299592955694377,
+      "learning_rate": 0.003,
+      "loss": 3.9965,
+      "step": 18142
+    },
+    {
+      "epoch": 0.18143,
+      "grad_norm": 1.2893952830730906,
+      "learning_rate": 0.003,
+      "loss": 3.9963,
+      "step": 18143
+    },
+    {
+      "epoch": 0.18144,
+      "grad_norm": 1.2444686259253939,
+      "learning_rate": 0.003,
+      "loss": 4.0275,
+      "step": 18144
+    },
+    {
+      "epoch": 0.18145,
+      "grad_norm": 1.1147178423921968,
+      "learning_rate": 0.003,
+      "loss": 4.006,
+      "step": 18145
+    },
+    {
+      "epoch": 0.18146,
+      "grad_norm": 1.3555438914152864,
+      "learning_rate": 0.003,
+      "loss": 3.9993,
+      "step": 18146
+    },
+    {
+      "epoch": 0.18147,
+      "grad_norm": 1.1190174607528691,
+      "learning_rate": 0.003,
+      "loss": 4.0105,
+      "step": 18147
+    },
+    {
+      "epoch": 0.18148,
+      "grad_norm": 1.4753459405725098,
+      "learning_rate": 0.003,
+      "loss": 3.9978,
+      "step": 18148
+    },
+    {
+      "epoch": 0.18149,
+      "grad_norm": 1.2181917350244813,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 18149
+    },
+    {
+      "epoch": 0.1815,
+      "grad_norm": 1.3655624174024366,
+      "learning_rate": 0.003,
+      "loss": 4.0042,
+      "step": 18150
+    },
+    {
+      "epoch": 0.18151,
+      "grad_norm": 1.2763174805966657,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 18151
+    },
+    {
+      "epoch": 0.18152,
+      "grad_norm": 1.444889682698498,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 18152
+    },
+    {
+      "epoch": 0.18153,
+      "grad_norm": 1.2085691193360635,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 18153
+    },
+    {
+      "epoch": 0.18154,
+      "grad_norm": 1.0908245865605166,
+      "learning_rate": 0.003,
+      "loss": 3.9844,
+      "step": 18154
+    },
+    {
+      "epoch": 0.18155,
+      "grad_norm": 1.5099350769802962,
+      "learning_rate": 0.003,
+      "loss": 4.0307,
+      "step": 18155
+    },
+    {
+      "epoch": 0.18156,
+      "grad_norm": 1.1255545526946735,
+      "learning_rate": 0.003,
+      "loss": 4.0119,
+      "step": 18156
+    },
+    {
+      "epoch": 0.18157,
+      "grad_norm": 1.3686218230504699,
+      "learning_rate": 0.003,
+      "loss": 4.0244,
+      "step": 18157
+    },
+    {
+      "epoch": 0.18158,
+      "grad_norm": 1.014531954026706,
+      "learning_rate": 0.003,
+      "loss": 3.9876,
+      "step": 18158
+    },
+    {
+      "epoch": 0.18159,
+      "grad_norm": 1.5630287215197785,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 18159
+    },
+    {
+      "epoch": 0.1816,
+      "grad_norm": 1.1951031627857596,
+      "learning_rate": 0.003,
+      "loss": 4.0212,
+      "step": 18160
+    },
+    {
+      "epoch": 0.18161,
+      "grad_norm": 1.4440238058794297,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 18161
+    },
+    {
+      "epoch": 0.18162,
+      "grad_norm": 1.2055848111490026,
+      "learning_rate": 0.003,
+      "loss": 4.0321,
+      "step": 18162
+    },
+    {
+      "epoch": 0.18163,
+      "grad_norm": 1.3472878879325914,
+      "learning_rate": 0.003,
+      "loss": 4.012,
+      "step": 18163
+    },
+    {
+      "epoch": 0.18164,
+      "grad_norm": 1.261742286541519,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 18164
+    },
+    {
+      "epoch": 0.18165,
+      "grad_norm": 1.3588301712393138,
+      "learning_rate": 0.003,
+      "loss": 4.0154,
+      "step": 18165
+    },
+    {
+      "epoch": 0.18166,
+      "grad_norm": 1.1585767016001705,
+      "learning_rate": 0.003,
+      "loss": 3.995,
+      "step": 18166
+    },
+    {
+      "epoch": 0.18167,
+      "grad_norm": 1.3441348595731184,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 18167
+    },
+    {
+      "epoch": 0.18168,
+      "grad_norm": 1.173183522395183,
+      "learning_rate": 0.003,
+      "loss": 4.0104,
+      "step": 18168
+    },
+    {
+      "epoch": 0.18169,
+      "grad_norm": 1.533036177909173,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 18169
+    },
+    {
+      "epoch": 0.1817,
+      "grad_norm": 1.2600044816955933,
+      "learning_rate": 0.003,
+      "loss": 4.001,
+      "step": 18170
+    },
+    {
+      "epoch": 0.18171,
+      "grad_norm": 1.3100149997239376,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 18171
+    },
+    {
+      "epoch": 0.18172,
+      "grad_norm": 1.2136390858732689,
+      "learning_rate": 0.003,
+      "loss": 4.0057,
+      "step": 18172
+    },
+    {
+      "epoch": 0.18173,
+      "grad_norm": 1.2607335650878135,
+      "learning_rate": 0.003,
+      "loss": 3.9934,
+      "step": 18173
+    },
+    {
+      "epoch": 0.18174,
+      "grad_norm": 1.036453337144555,
+      "learning_rate": 0.003,
+      "loss": 4.0244,
+      "step": 18174
+    },
+    {
+      "epoch": 0.18175,
+      "grad_norm": 1.4523655146583063,
+      "learning_rate": 0.003,
+      "loss": 4.033,
+      "step": 18175
+    },
+    {
+      "epoch": 0.18176,
+      "grad_norm": 1.048780729980621,
+      "learning_rate": 0.003,
+      "loss": 4.0134,
+      "step": 18176
+    },
+    {
+      "epoch": 0.18177,
+      "grad_norm": 1.5229064863778266,
+      "learning_rate": 0.003,
+      "loss": 4.0285,
+      "step": 18177
+    },
+    {
+      "epoch": 0.18178,
+      "grad_norm": 1.1509516821016428,
+      "learning_rate": 0.003,
+      "loss": 4.0295,
+      "step": 18178
+    },
+    {
+      "epoch": 0.18179,
+      "grad_norm": 1.759995085427657,
+      "learning_rate": 0.003,
+      "loss": 3.9865,
+      "step": 18179
+    },
+    {
+      "epoch": 0.1818,
+      "grad_norm": 1.0552996974550792,
+      "learning_rate": 0.003,
+      "loss": 4.0169,
+      "step": 18180
+    },
+    {
+      "epoch": 0.18181,
+      "grad_norm": 1.30115622957003,
+      "learning_rate": 0.003,
+      "loss": 4.0109,
+      "step": 18181
+    },
+    {
+      "epoch": 0.18182,
+      "grad_norm": 1.3024146378378039,
+      "learning_rate": 0.003,
+      "loss": 3.9935,
+      "step": 18182
+    },
+    {
+      "epoch": 0.18183,
+      "grad_norm": 1.2110209860613366,
+      "learning_rate": 0.003,
+      "loss": 4.0006,
+      "step": 18183
+    },
+    {
+      "epoch": 0.18184,
+      "grad_norm": 1.3931575048972484,
+      "learning_rate": 0.003,
+      "loss": 4.0285,
+      "step": 18184
+    },
+    {
+      "epoch": 0.18185,
+      "grad_norm": 1.3834328466363612,
+      "learning_rate": 0.003,
+      "loss": 4.0355,
+      "step": 18185
+    },
+    {
+      "epoch": 0.18186,
+      "grad_norm": 1.2536345227137324,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 18186
+    },
+    {
+      "epoch": 0.18187,
+      "grad_norm": 1.1041603580401191,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 18187
+    },
+    {
+      "epoch": 0.18188,
+      "grad_norm": 1.2536065909520349,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 18188
+    },
+    {
+      "epoch": 0.18189,
+      "grad_norm": 1.117385507479427,
+      "learning_rate": 0.003,
+      "loss": 3.9992,
+      "step": 18189
+    },
+    {
+      "epoch": 0.1819,
+      "grad_norm": 1.2983498459511376,
+      "learning_rate": 0.003,
+      "loss": 4.007,
+      "step": 18190
+    },
+    {
+      "epoch": 0.18191,
+      "grad_norm": 1.2184459973642885,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 18191
+    },
+    {
+      "epoch": 0.18192,
+      "grad_norm": 1.1509796342701153,
+      "learning_rate": 0.003,
+      "loss": 4.0247,
+      "step": 18192
+    },
+    {
+      "epoch": 0.18193,
+      "grad_norm": 1.2928667659153972,
+      "learning_rate": 0.003,
+      "loss": 4.0008,
+      "step": 18193
+    },
+    {
+      "epoch": 0.18194,
+      "grad_norm": 1.3229778827050338,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 18194
+    },
+    {
+      "epoch": 0.18195,
+      "grad_norm": 1.2935896839958851,
+      "learning_rate": 0.003,
+      "loss": 4.0223,
+      "step": 18195
+    },
+    {
+      "epoch": 0.18196,
+      "grad_norm": 1.188892636841423,
+      "learning_rate": 0.003,
+      "loss": 3.9975,
+      "step": 18196
+    },
+    {
+      "epoch": 0.18197,
+      "grad_norm": 1.2851214383382479,
+      "learning_rate": 0.003,
+      "loss": 4.0042,
+      "step": 18197
+    },
+    {
+      "epoch": 0.18198,
+      "grad_norm": 1.3852784994657443,
+      "learning_rate": 0.003,
+      "loss": 4.0024,
+      "step": 18198
+    },
+    {
+      "epoch": 0.18199,
+      "grad_norm": 1.4143040656970995,
+      "learning_rate": 0.003,
+      "loss": 4.0149,
+      "step": 18199
+    },
+    {
+      "epoch": 0.182,
+      "grad_norm": 1.350309120327819,
+      "learning_rate": 0.003,
+      "loss": 3.9973,
+      "step": 18200
+    },
+    {
+      "epoch": 0.18201,
+      "grad_norm": 1.2407464281534468,
+      "learning_rate": 0.003,
+      "loss": 4.0234,
+      "step": 18201
+    },
+    {
+      "epoch": 0.18202,
+      "grad_norm": 1.2119305968921505,
+      "learning_rate": 0.003,
+      "loss": 3.9957,
+      "step": 18202
+    },
+    {
+      "epoch": 0.18203,
+      "grad_norm": 1.2101898464577356,
+      "learning_rate": 0.003,
+      "loss": 3.9943,
+      "step": 18203
+    },
+    {
+      "epoch": 0.18204,
+      "grad_norm": 1.4768420872114572,
+      "learning_rate": 0.003,
+      "loss": 3.9887,
+      "step": 18204
+    },
+    {
+      "epoch": 0.18205,
+      "grad_norm": 1.0963125232780624,
+      "learning_rate": 0.003,
+      "loss": 3.976,
+      "step": 18205
+    },
+    {
+      "epoch": 0.18206,
+      "grad_norm": 1.4907463450281417,
+      "learning_rate": 0.003,
+      "loss": 3.9952,
+      "step": 18206
+    },
+    {
+      "epoch": 0.18207,
+      "grad_norm": 1.08251403467361,
+      "learning_rate": 0.003,
+      "loss": 4.0084,
+      "step": 18207
+    },
+    {
+      "epoch": 0.18208,
+      "grad_norm": 1.3712794594338669,
+      "learning_rate": 0.003,
+      "loss": 4.0292,
+      "step": 18208
+    },
+    {
+      "epoch": 0.18209,
+      "grad_norm": 0.9978160782565596,
+      "learning_rate": 0.003,
+      "loss": 3.9935,
+      "step": 18209
+    },
+    {
+      "epoch": 0.1821,
+      "grad_norm": 1.3462577806423313,
+      "learning_rate": 0.003,
+      "loss": 4.016,
+      "step": 18210
+    },
+    {
+      "epoch": 0.18211,
+      "grad_norm": 1.0638576140205505,
+      "learning_rate": 0.003,
+      "loss": 4.0015,
+      "step": 18211
+    },
+    {
+      "epoch": 0.18212,
+      "grad_norm": 1.487066531389715,
+      "learning_rate": 0.003,
+      "loss": 4.0007,
+      "step": 18212
+    },
+    {
+      "epoch": 0.18213,
+      "grad_norm": 1.2345978757776293,
+      "learning_rate": 0.003,
+      "loss": 4.0263,
+      "step": 18213
+    },
+    {
+      "epoch": 0.18214,
+      "grad_norm": 1.3367698913746935,
+      "learning_rate": 0.003,
+      "loss": 4.0009,
+      "step": 18214
+    },
+    {
+      "epoch": 0.18215,
+      "grad_norm": 1.2982170491306446,
+      "learning_rate": 0.003,
+      "loss": 4.0216,
+      "step": 18215
+    },
+    {
+      "epoch": 0.18216,
+      "grad_norm": 1.4710501586205065,
+      "learning_rate": 0.003,
+      "loss": 3.991,
+      "step": 18216
+    },
+    {
+      "epoch": 0.18217,
+      "grad_norm": 0.9724905612179099,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 18217
+    },
+    {
+      "epoch": 0.18218,
+      "grad_norm": 1.243080895345441,
+      "learning_rate": 0.003,
+      "loss": 3.9837,
+      "step": 18218
+    },
+    {
+      "epoch": 0.18219,
+      "grad_norm": 1.1583332300816789,
+      "learning_rate": 0.003,
+      "loss": 4.0238,
+      "step": 18219
+    },
+    {
+      "epoch": 0.1822,
+      "grad_norm": 1.4061740171975021,
+      "learning_rate": 0.003,
+      "loss": 4.0288,
+      "step": 18220
+    },
+    {
+      "epoch": 0.18221,
+      "grad_norm": 1.3347184015308404,
+      "learning_rate": 0.003,
+      "loss": 4.0408,
+      "step": 18221
+    },
+    {
+      "epoch": 0.18222,
+      "grad_norm": 1.197649749847447,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 18222
+    },
+    {
+      "epoch": 0.18223,
+      "grad_norm": 1.1559623563922403,
+      "learning_rate": 0.003,
+      "loss": 4.0275,
+      "step": 18223
+    },
+    {
+      "epoch": 0.18224,
+      "grad_norm": 1.3258862057335294,
+      "learning_rate": 0.003,
+      "loss": 3.9942,
+      "step": 18224
+    },
+    {
+      "epoch": 0.18225,
+      "grad_norm": 1.3489691972208808,
+      "learning_rate": 0.003,
+      "loss": 4.0122,
+      "step": 18225
+    },
+    {
+      "epoch": 0.18226,
+      "grad_norm": 1.4596546168913027,
+      "learning_rate": 0.003,
+      "loss": 4.0375,
+      "step": 18226
+    },
+    {
+      "epoch": 0.18227,
+      "grad_norm": 1.1011420292857328,
+      "learning_rate": 0.003,
+      "loss": 4.035,
+      "step": 18227
+    },
+    {
+      "epoch": 0.18228,
+      "grad_norm": 1.4691879728382518,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 18228
+    },
+    {
+      "epoch": 0.18229,
+      "grad_norm": 1.0600027382272688,
+      "learning_rate": 0.003,
+      "loss": 3.9943,
+      "step": 18229
+    },
+    {
+      "epoch": 0.1823,
+      "grad_norm": 1.91440668806715,
+      "learning_rate": 0.003,
+      "loss": 4.0409,
+      "step": 18230
+    },
+    {
+      "epoch": 0.18231,
+      "grad_norm": 1.087272659097562,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 18231
+    },
+    {
+      "epoch": 0.18232,
+      "grad_norm": 1.3160489393275667,
+      "learning_rate": 0.003,
+      "loss": 4.0215,
+      "step": 18232
+    },
+    {
+      "epoch": 0.18233,
+      "grad_norm": 1.205997450393259,
+      "learning_rate": 0.003,
+      "loss": 4.0215,
+      "step": 18233
+    },
+    {
+      "epoch": 0.18234,
+      "grad_norm": 1.2852537514127869,
+      "learning_rate": 0.003,
+      "loss": 4.0001,
+      "step": 18234
+    },
+    {
+      "epoch": 0.18235,
+      "grad_norm": 1.3131140085067154,
+      "learning_rate": 0.003,
+      "loss": 4.0356,
+      "step": 18235
+    },
+    {
+      "epoch": 0.18236,
+      "grad_norm": 1.3322776741884512,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 18236
+    },
+    {
+      "epoch": 0.18237,
+      "grad_norm": 1.2484259749190518,
+      "learning_rate": 0.003,
+      "loss": 4.015,
+      "step": 18237
+    },
+    {
+      "epoch": 0.18238,
+      "grad_norm": 1.1687486871042316,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 18238
+    },
+    {
+      "epoch": 0.18239,
+      "grad_norm": 1.069335222221601,
+      "learning_rate": 0.003,
+      "loss": 3.9884,
+      "step": 18239
+    },
+    {
+      "epoch": 0.1824,
+      "grad_norm": 1.555965179687311,
+      "learning_rate": 0.003,
+      "loss": 3.9891,
+      "step": 18240
+    },
+    {
+      "epoch": 0.18241,
+      "grad_norm": 1.172706123428621,
+      "learning_rate": 0.003,
+      "loss": 4.0242,
+      "step": 18241
+    },
+    {
+      "epoch": 0.18242,
+      "grad_norm": 1.6096084141251266,
+      "learning_rate": 0.003,
+      "loss": 3.984,
+      "step": 18242
+    },
+    {
+      "epoch": 0.18243,
+      "grad_norm": 1.1914307307720635,
+      "learning_rate": 0.003,
+      "loss": 3.9873,
+      "step": 18243
+    },
+    {
+      "epoch": 0.18244,
+      "grad_norm": 1.3138377738427773,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 18244
+    },
+    {
+      "epoch": 0.18245,
+      "grad_norm": 1.3413909707937954,
+      "learning_rate": 0.003,
+      "loss": 4.0278,
+      "step": 18245
+    },
+    {
+      "epoch": 0.18246,
+      "grad_norm": 1.0939883780957658,
+      "learning_rate": 0.003,
+      "loss": 4.0018,
+      "step": 18246
+    },
+    {
+      "epoch": 0.18247,
+      "grad_norm": 1.2312089126378756,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 18247
+    },
+    {
+      "epoch": 0.18248,
+      "grad_norm": 1.3704760427576983,
+      "learning_rate": 0.003,
+      "loss": 4.0316,
+      "step": 18248
+    },
+    {
+      "epoch": 0.18249,
+      "grad_norm": 1.1121469068817809,
+      "learning_rate": 0.003,
+      "loss": 4.0047,
+      "step": 18249
+    },
+    {
+      "epoch": 0.1825,
+      "grad_norm": 1.2455867737055366,
+      "learning_rate": 0.003,
+      "loss": 4.0303,
+      "step": 18250
+    },
+    {
+      "epoch": 0.18251,
+      "grad_norm": 1.5324873960709116,
+      "learning_rate": 0.003,
+      "loss": 4.0321,
+      "step": 18251
+    },
+    {
+      "epoch": 0.18252,
+      "grad_norm": 1.0015715031516774,
+      "learning_rate": 0.003,
+      "loss": 4.0447,
+      "step": 18252
+    },
+    {
+      "epoch": 0.18253,
+      "grad_norm": 1.130718883969915,
+      "learning_rate": 0.003,
+      "loss": 3.9968,
+      "step": 18253
+    },
+    {
+      "epoch": 0.18254,
+      "grad_norm": 1.3137672179215365,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 18254
+    },
+    {
+      "epoch": 0.18255,
+      "grad_norm": 1.3104229198211335,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 18255
+    },
+    {
+      "epoch": 0.18256,
+      "grad_norm": 0.9945094424588728,
+      "learning_rate": 0.003,
+      "loss": 4.0033,
+      "step": 18256
+    },
+    {
+      "epoch": 0.18257,
+      "grad_norm": 1.7183461639952,
+      "learning_rate": 0.003,
+      "loss": 4.0222,
+      "step": 18257
+    },
+    {
+      "epoch": 0.18258,
+      "grad_norm": 1.1880604901138898,
+      "learning_rate": 0.003,
+      "loss": 4.0333,
+      "step": 18258
+    },
+    {
+      "epoch": 0.18259,
+      "grad_norm": 1.1040386368554849,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 18259
+    },
+    {
+      "epoch": 0.1826,
+      "grad_norm": 1.5306036978635968,
+      "learning_rate": 0.003,
+      "loss": 3.9915,
+      "step": 18260
+    },
+    {
+      "epoch": 0.18261,
+      "grad_norm": 1.3150678557370818,
+      "learning_rate": 0.003,
+      "loss": 3.9971,
+      "step": 18261
+    },
+    {
+      "epoch": 0.18262,
+      "grad_norm": 1.409168557615195,
+      "learning_rate": 0.003,
+      "loss": 4.0123,
+      "step": 18262
+    },
+    {
+      "epoch": 0.18263,
+      "grad_norm": 1.3989714258024877,
+      "learning_rate": 0.003,
+      "loss": 4.0236,
+      "step": 18263
+    },
+    {
+      "epoch": 0.18264,
+      "grad_norm": 1.344570123510531,
+      "learning_rate": 0.003,
+      "loss": 4.0312,
+      "step": 18264
+    },
+    {
+      "epoch": 0.18265,
+      "grad_norm": 1.123239125575655,
+      "learning_rate": 0.003,
+      "loss": 4.0167,
+      "step": 18265
+    },
+    {
+      "epoch": 0.18266,
+      "grad_norm": 1.3037603359249763,
+      "learning_rate": 0.003,
+      "loss": 4.0064,
+      "step": 18266
+    },
+    {
+      "epoch": 0.18267,
+      "grad_norm": 1.2861279634682024,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 18267
+    },
+    {
+      "epoch": 0.18268,
+      "grad_norm": 1.3954523620095873,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 18268
+    },
+    {
+      "epoch": 0.18269,
+      "grad_norm": 1.232497391571598,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 18269
+    },
+    {
+      "epoch": 0.1827,
+      "grad_norm": 1.2787868742776072,
+      "learning_rate": 0.003,
+      "loss": 4.0053,
+      "step": 18270
+    },
+    {
+      "epoch": 0.18271,
+      "grad_norm": 1.2320432357390205,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 18271
+    },
+    {
+      "epoch": 0.18272,
+      "grad_norm": 1.3320307314330602,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 18272
+    },
+    {
+      "epoch": 0.18273,
+      "grad_norm": 1.3135210430179733,
+      "learning_rate": 0.003,
+      "loss": 3.9952,
+      "step": 18273
+    },
+    {
+      "epoch": 0.18274,
+      "grad_norm": 1.1245856238003318,
+      "learning_rate": 0.003,
+      "loss": 4.0333,
+      "step": 18274
+    },
+    {
+      "epoch": 0.18275,
+      "grad_norm": 1.447129762793166,
+      "learning_rate": 0.003,
+      "loss": 4.0259,
+      "step": 18275
+    },
+    {
+      "epoch": 0.18276,
+      "grad_norm": 1.1511065778756517,
+      "learning_rate": 0.003,
+      "loss": 4.0151,
+      "step": 18276
+    },
+    {
+      "epoch": 0.18277,
+      "grad_norm": 1.4510257946581455,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 18277
+    },
+    {
+      "epoch": 0.18278,
+      "grad_norm": 1.268295122455787,
+      "learning_rate": 0.003,
+      "loss": 4.0111,
+      "step": 18278
+    },
+    {
+      "epoch": 0.18279,
+      "grad_norm": 1.2968887235526405,
+      "learning_rate": 0.003,
+      "loss": 4.0188,
+      "step": 18279
+    },
+    {
+      "epoch": 0.1828,
+      "grad_norm": 1.3690968920432314,
+      "learning_rate": 0.003,
+      "loss": 4.02,
+      "step": 18280
+    },
+    {
+      "epoch": 0.18281,
+      "grad_norm": 1.2938902793547082,
+      "learning_rate": 0.003,
+      "loss": 4.0328,
+      "step": 18281
+    },
+    {
+      "epoch": 0.18282,
+      "grad_norm": 1.1835470553092178,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 18282
+    },
+    {
+      "epoch": 0.18283,
+      "grad_norm": 1.1957265377601571,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 18283
+    },
+    {
+      "epoch": 0.18284,
+      "grad_norm": 1.391943196815868,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 18284
+    },
+    {
+      "epoch": 0.18285,
+      "grad_norm": 1.107369458028556,
+      "learning_rate": 0.003,
+      "loss": 3.9855,
+      "step": 18285
+    },
+    {
+      "epoch": 0.18286,
+      "grad_norm": 1.1283881078129234,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 18286
+    },
+    {
+      "epoch": 0.18287,
+      "grad_norm": 1.4298950698020385,
+      "learning_rate": 0.003,
+      "loss": 4.0278,
+      "step": 18287
+    },
+    {
+      "epoch": 0.18288,
+      "grad_norm": 1.0464529595741836,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 18288
+    },
+    {
+      "epoch": 0.18289,
+      "grad_norm": 1.5449404053034377,
+      "learning_rate": 0.003,
+      "loss": 4.0043,
+      "step": 18289
+    },
+    {
+      "epoch": 0.1829,
+      "grad_norm": 1.1599745083921265,
+      "learning_rate": 0.003,
+      "loss": 3.9852,
+      "step": 18290
+    },
+    {
+      "epoch": 0.18291,
+      "grad_norm": 1.3878360335531605,
+      "learning_rate": 0.003,
+      "loss": 3.9917,
+      "step": 18291
+    },
+    {
+      "epoch": 0.18292,
+      "grad_norm": 1.128394557326477,
+      "learning_rate": 0.003,
+      "loss": 4.0349,
+      "step": 18292
+    },
+    {
+      "epoch": 0.18293,
+      "grad_norm": 1.2015910000500716,
+      "learning_rate": 0.003,
+      "loss": 4.0154,
+      "step": 18293
+    },
+    {
+      "epoch": 0.18294,
+      "grad_norm": 1.495744620608908,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 18294
+    },
+    {
+      "epoch": 0.18295,
+      "grad_norm": 1.188194593122797,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 18295
+    },
+    {
+      "epoch": 0.18296,
+      "grad_norm": 1.5503573027787667,
+      "learning_rate": 0.003,
+      "loss": 4.0359,
+      "step": 18296
+    },
+    {
+      "epoch": 0.18297,
+      "grad_norm": 1.0139028798028031,
+      "learning_rate": 0.003,
+      "loss": 4.0272,
+      "step": 18297
+    },
+    {
+      "epoch": 0.18298,
+      "grad_norm": 1.5246622301862984,
+      "learning_rate": 0.003,
+      "loss": 4.0565,
+      "step": 18298
+    },
+    {
+      "epoch": 0.18299,
+      "grad_norm": 0.971631792285627,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 18299
+    },
+    {
+      "epoch": 0.183,
+      "grad_norm": 1.2882693419149442,
+      "learning_rate": 0.003,
+      "loss": 4.025,
+      "step": 18300
+    },
+    {
+      "epoch": 0.18301,
+      "grad_norm": 1.295287202260425,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 18301
+    },
+    {
+      "epoch": 0.18302,
+      "grad_norm": 1.2151581662507234,
+      "learning_rate": 0.003,
+      "loss": 4.0361,
+      "step": 18302
+    },
+    {
+      "epoch": 0.18303,
+      "grad_norm": 1.2384063271278432,
+      "learning_rate": 0.003,
+      "loss": 4.0212,
+      "step": 18303
+    },
+    {
+      "epoch": 0.18304,
+      "grad_norm": 1.383694473007289,
+      "learning_rate": 0.003,
+      "loss": 4.0435,
+      "step": 18304
+    },
+    {
+      "epoch": 0.18305,
+      "grad_norm": 1.1996409587123917,
+      "learning_rate": 0.003,
+      "loss": 3.9982,
+      "step": 18305
+    },
+    {
+      "epoch": 0.18306,
+      "grad_norm": 1.2824445651452854,
+      "learning_rate": 0.003,
+      "loss": 4.0089,
+      "step": 18306
+    },
+    {
+      "epoch": 0.18307,
+      "grad_norm": 1.2826231555044272,
+      "learning_rate": 0.003,
+      "loss": 3.9841,
+      "step": 18307
+    },
+    {
+      "epoch": 0.18308,
+      "grad_norm": 1.3253483334986864,
+      "learning_rate": 0.003,
+      "loss": 3.9949,
+      "step": 18308
+    },
+    {
+      "epoch": 0.18309,
+      "grad_norm": 1.3339497408438519,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 18309
+    },
+    {
+      "epoch": 0.1831,
+      "grad_norm": 1.335636097592298,
+      "learning_rate": 0.003,
+      "loss": 4.021,
+      "step": 18310
+    },
+    {
+      "epoch": 0.18311,
+      "grad_norm": 1.201242319528529,
+      "learning_rate": 0.003,
+      "loss": 3.9996,
+      "step": 18311
+    },
+    {
+      "epoch": 0.18312,
+      "grad_norm": 1.202663178066936,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 18312
+    },
+    {
+      "epoch": 0.18313,
+      "grad_norm": 1.0739801360247447,
+      "learning_rate": 0.003,
+      "loss": 3.9366,
+      "step": 18313
+    },
+    {
+      "epoch": 0.18314,
+      "grad_norm": 1.3549682080063843,
+      "learning_rate": 0.003,
+      "loss": 4.0064,
+      "step": 18314
+    },
+    {
+      "epoch": 0.18315,
+      "grad_norm": 1.2451047465446101,
+      "learning_rate": 0.003,
+      "loss": 4.0139,
+      "step": 18315
+    },
+    {
+      "epoch": 0.18316,
+      "grad_norm": 1.1503352607657136,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 18316
+    },
+    {
+      "epoch": 0.18317,
+      "grad_norm": 1.4214259749166296,
+      "learning_rate": 0.003,
+      "loss": 4.0323,
+      "step": 18317
+    },
+    {
+      "epoch": 0.18318,
+      "grad_norm": 1.1636299959381882,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 18318
+    },
+    {
+      "epoch": 0.18319,
+      "grad_norm": 1.637779383067376,
+      "learning_rate": 0.003,
+      "loss": 3.9762,
+      "step": 18319
+    },
+    {
+      "epoch": 0.1832,
+      "grad_norm": 1.1294077294081348,
+      "learning_rate": 0.003,
+      "loss": 4.0047,
+      "step": 18320
+    },
+    {
+      "epoch": 0.18321,
+      "grad_norm": 1.595618162969652,
+      "learning_rate": 0.003,
+      "loss": 4.0799,
+      "step": 18321
+    },
+    {
+      "epoch": 0.18322,
+      "grad_norm": 1.385932302929975,
+      "learning_rate": 0.003,
+      "loss": 4.0239,
+      "step": 18322
+    },
+    {
+      "epoch": 0.18323,
+      "grad_norm": 1.148155339367707,
+      "learning_rate": 0.003,
+      "loss": 4.0056,
+      "step": 18323
+    },
+    {
+      "epoch": 0.18324,
+      "grad_norm": 1.2766032145718194,
+      "learning_rate": 0.003,
+      "loss": 4.022,
+      "step": 18324
+    },
+    {
+      "epoch": 0.18325,
+      "grad_norm": 1.348066500177508,
+      "learning_rate": 0.003,
+      "loss": 4.0112,
+      "step": 18325
+    },
+    {
+      "epoch": 0.18326,
+      "grad_norm": 1.1033829098676295,
+      "learning_rate": 0.003,
+      "loss": 4.0007,
+      "step": 18326
+    },
+    {
+      "epoch": 0.18327,
+      "grad_norm": 1.594138909184963,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 18327
+    },
+    {
+      "epoch": 0.18328,
+      "grad_norm": 0.9913174047152843,
+      "learning_rate": 0.003,
+      "loss": 4.0181,
+      "step": 18328
+    },
+    {
+      "epoch": 0.18329,
+      "grad_norm": 1.451935960013849,
+      "learning_rate": 0.003,
+      "loss": 3.9923,
+      "step": 18329
+    },
+    {
+      "epoch": 0.1833,
+      "grad_norm": 1.1632710985775636,
+      "learning_rate": 0.003,
+      "loss": 4.0468,
+      "step": 18330
+    },
+    {
+      "epoch": 0.18331,
+      "grad_norm": 1.4401507582279762,
+      "learning_rate": 0.003,
+      "loss": 3.9937,
+      "step": 18331
+    },
+    {
+      "epoch": 0.18332,
+      "grad_norm": 1.134330387221328,
+      "learning_rate": 0.003,
+      "loss": 4.0037,
+      "step": 18332
+    },
+    {
+      "epoch": 0.18333,
+      "grad_norm": 1.406834878988515,
+      "learning_rate": 0.003,
+      "loss": 4.0242,
+      "step": 18333
+    },
+    {
+      "epoch": 0.18334,
+      "grad_norm": 1.2369064747229477,
+      "learning_rate": 0.003,
+      "loss": 3.9862,
+      "step": 18334
+    },
+    {
+      "epoch": 0.18335,
+      "grad_norm": 1.1604113149220496,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 18335
+    },
+    {
+      "epoch": 0.18336,
+      "grad_norm": 1.3460719303584274,
+      "learning_rate": 0.003,
+      "loss": 3.9891,
+      "step": 18336
+    },
+    {
+      "epoch": 0.18337,
+      "grad_norm": 1.2055253712252243,
+      "learning_rate": 0.003,
+      "loss": 4.0118,
+      "step": 18337
+    },
+    {
+      "epoch": 0.18338,
+      "grad_norm": 1.252502635335824,
+      "learning_rate": 0.003,
+      "loss": 3.9911,
+      "step": 18338
+    },
+    {
+      "epoch": 0.18339,
+      "grad_norm": 1.1572220045392678,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 18339
+    },
+    {
+      "epoch": 0.1834,
+      "grad_norm": 1.3235373092444123,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 18340
+    },
+    {
+      "epoch": 0.18341,
+      "grad_norm": 1.2542925521645085,
+      "learning_rate": 0.003,
+      "loss": 3.9924,
+      "step": 18341
+    },
+    {
+      "epoch": 0.18342,
+      "grad_norm": 1.2525317962625822,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 18342
+    },
+    {
+      "epoch": 0.18343,
+      "grad_norm": 1.23293947973228,
+      "learning_rate": 0.003,
+      "loss": 3.9936,
+      "step": 18343
+    },
+    {
+      "epoch": 0.18344,
+      "grad_norm": 1.2771981048908279,
+      "learning_rate": 0.003,
+      "loss": 4.0236,
+      "step": 18344
+    },
+    {
+      "epoch": 0.18345,
+      "grad_norm": 1.24303441554185,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 18345
+    },
+    {
+      "epoch": 0.18346,
+      "grad_norm": 1.1896657466868845,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 18346
+    },
+    {
+      "epoch": 0.18347,
+      "grad_norm": 1.3315160070580436,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 18347
+    },
+    {
+      "epoch": 0.18348,
+      "grad_norm": 1.1130315426915258,
+      "learning_rate": 0.003,
+      "loss": 4.0187,
+      "step": 18348
+    },
+    {
+      "epoch": 0.18349,
+      "grad_norm": 1.534654876215884,
+      "learning_rate": 0.003,
+      "loss": 4.0128,
+      "step": 18349
+    },
+    {
+      "epoch": 0.1835,
+      "grad_norm": 1.091582673242823,
+      "learning_rate": 0.003,
+      "loss": 4.0092,
+      "step": 18350
+    },
+    {
+      "epoch": 0.18351,
+      "grad_norm": 1.236814118415645,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 18351
+    },
+    {
+      "epoch": 0.18352,
+      "grad_norm": 1.1432737189332145,
+      "learning_rate": 0.003,
+      "loss": 3.9988,
+      "step": 18352
+    },
+    {
+      "epoch": 0.18353,
+      "grad_norm": 1.3846882584393398,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 18353
+    },
+    {
+      "epoch": 0.18354,
+      "grad_norm": 1.3679455609199003,
+      "learning_rate": 0.003,
+      "loss": 4.0177,
+      "step": 18354
+    },
+    {
+      "epoch": 0.18355,
+      "grad_norm": 1.2603503593575665,
+      "learning_rate": 0.003,
+      "loss": 4.0184,
+      "step": 18355
+    },
+    {
+      "epoch": 0.18356,
+      "grad_norm": 1.1296201645828623,
+      "learning_rate": 0.003,
+      "loss": 3.9827,
+      "step": 18356
+    },
+    {
+      "epoch": 0.18357,
+      "grad_norm": 1.3538616687265028,
+      "learning_rate": 0.003,
+      "loss": 3.9974,
+      "step": 18357
+    },
+    {
+      "epoch": 0.18358,
+      "grad_norm": 1.3092564527192359,
+      "learning_rate": 0.003,
+      "loss": 4.0046,
+      "step": 18358
+    },
+    {
+      "epoch": 0.18359,
+      "grad_norm": 1.4992074553152586,
+      "learning_rate": 0.003,
+      "loss": 4.0427,
+      "step": 18359
+    },
+    {
+      "epoch": 0.1836,
+      "grad_norm": 0.9900264088805295,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 18360
+    },
+    {
+      "epoch": 0.18361,
+      "grad_norm": 1.4370949498241343,
+      "learning_rate": 0.003,
+      "loss": 3.9861,
+      "step": 18361
+    },
+    {
+      "epoch": 0.18362,
+      "grad_norm": 1.1886338472278208,
+      "learning_rate": 0.003,
+      "loss": 4.0021,
+      "step": 18362
+    },
+    {
+      "epoch": 0.18363,
+      "grad_norm": 1.2032449147560684,
+      "learning_rate": 0.003,
+      "loss": 3.9948,
+      "step": 18363
+    },
+    {
+      "epoch": 0.18364,
+      "grad_norm": 1.4954201193748469,
+      "learning_rate": 0.003,
+      "loss": 4.0545,
+      "step": 18364
+    },
+    {
+      "epoch": 0.18365,
+      "grad_norm": 1.1318946680178235,
+      "learning_rate": 0.003,
+      "loss": 4.003,
+      "step": 18365
+    },
+    {
+      "epoch": 0.18366,
+      "grad_norm": 1.4399574101203345,
+      "learning_rate": 0.003,
+      "loss": 4.0093,
+      "step": 18366
+    },
+    {
+      "epoch": 0.18367,
+      "grad_norm": 1.1804648048962672,
+      "learning_rate": 0.003,
+      "loss": 4.0163,
+      "step": 18367
+    },
+    {
+      "epoch": 0.18368,
+      "grad_norm": 1.5449144085447852,
+      "learning_rate": 0.003,
+      "loss": 3.9977,
+      "step": 18368
+    },
+    {
+      "epoch": 0.18369,
+      "grad_norm": 1.3209377674395755,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 18369
+    },
+    {
+      "epoch": 0.1837,
+      "grad_norm": 1.3595374939224947,
+      "learning_rate": 0.003,
+      "loss": 3.9909,
+      "step": 18370
+    },
+    {
+      "epoch": 0.18371,
+      "grad_norm": 1.370017133050456,
+      "learning_rate": 0.003,
+      "loss": 3.9871,
+      "step": 18371
+    },
+    {
+      "epoch": 0.18372,
+      "grad_norm": 1.2081595883407212,
+      "learning_rate": 0.003,
+      "loss": 4.0187,
+      "step": 18372
+    },
+    {
+      "epoch": 0.18373,
+      "grad_norm": 1.3355722930673857,
+      "learning_rate": 0.003,
+      "loss": 4.0324,
+      "step": 18373
+    },
+    {
+      "epoch": 0.18374,
+      "grad_norm": 1.1087151728729547,
+      "learning_rate": 0.003,
+      "loss": 4.0017,
+      "step": 18374
+    },
+    {
+      "epoch": 0.18375,
+      "grad_norm": 1.3750018845575622,
+      "learning_rate": 0.003,
+      "loss": 4.0051,
+      "step": 18375
+    },
+    {
+      "epoch": 0.18376,
+      "grad_norm": 1.1526653119120704,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 18376
+    },
+    {
+      "epoch": 0.18377,
+      "grad_norm": 1.3548582332658599,
+      "learning_rate": 0.003,
+      "loss": 3.9867,
+      "step": 18377
+    },
+    {
+      "epoch": 0.18378,
+      "grad_norm": 1.2034073123719202,
+      "learning_rate": 0.003,
+      "loss": 3.9981,
+      "step": 18378
+    },
+    {
+      "epoch": 0.18379,
+      "grad_norm": 1.2645937649587442,
+      "learning_rate": 0.003,
+      "loss": 4.0174,
+      "step": 18379
+    },
+    {
+      "epoch": 0.1838,
+      "grad_norm": 1.2897176376196136,
+      "learning_rate": 0.003,
+      "loss": 4.0255,
+      "step": 18380
+    },
+    {
+      "epoch": 0.18381,
+      "grad_norm": 1.1275837421245836,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 18381
+    },
+    {
+      "epoch": 0.18382,
+      "grad_norm": 1.2978066880951926,
+      "learning_rate": 0.003,
+      "loss": 4.0111,
+      "step": 18382
+    },
+    {
+      "epoch": 0.18383,
+      "grad_norm": 1.128515129388203,
+      "learning_rate": 0.003,
+      "loss": 4.0009,
+      "step": 18383
+    },
+    {
+      "epoch": 0.18384,
+      "grad_norm": 1.3590720577900184,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 18384
+    },
+    {
+      "epoch": 0.18385,
+      "grad_norm": 1.2900152539991172,
+      "learning_rate": 0.003,
+      "loss": 4.0052,
+      "step": 18385
+    },
+    {
+      "epoch": 0.18386,
+      "grad_norm": 1.2926998050864664,
+      "learning_rate": 0.003,
+      "loss": 3.9859,
+      "step": 18386
+    },
+    {
+      "epoch": 0.18387,
+      "grad_norm": 1.3083233474391918,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 18387
+    },
+    {
+      "epoch": 0.18388,
+      "grad_norm": 1.2745656581962876,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 18388
+    },
+    {
+      "epoch": 0.18389,
+      "grad_norm": 1.1794493601314162,
+      "learning_rate": 0.003,
+      "loss": 4.0182,
+      "step": 18389
+    },
+    {
+      "epoch": 0.1839,
+      "grad_norm": 1.3066139417195313,
+      "learning_rate": 0.003,
+      "loss": 4.0113,
+      "step": 18390
+    },
+    {
+      "epoch": 0.18391,
+      "grad_norm": 1.4504619624419817,
+      "learning_rate": 0.003,
+      "loss": 3.996,
+      "step": 18391
+    },
+    {
+      "epoch": 0.18392,
+      "grad_norm": 1.3341319042643156,
+      "learning_rate": 0.003,
+      "loss": 3.9837,
+      "step": 18392
+    },
+    {
+      "epoch": 0.18393,
+      "grad_norm": 1.3105393538818524,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 18393
+    },
+    {
+      "epoch": 0.18394,
+      "grad_norm": 1.4607388669237813,
+      "learning_rate": 0.003,
+      "loss": 3.9986,
+      "step": 18394
+    },
+    {
+      "epoch": 0.18395,
+      "grad_norm": 0.9995550105643105,
+      "learning_rate": 0.003,
+      "loss": 3.9887,
+      "step": 18395
+    },
+    {
+      "epoch": 0.18396,
+      "grad_norm": 1.384886578542871,
+      "learning_rate": 0.003,
+      "loss": 4.0389,
+      "step": 18396
+    },
+    {
+      "epoch": 0.18397,
+      "grad_norm": 1.031830825123548,
+      "learning_rate": 0.003,
+      "loss": 4.0283,
+      "step": 18397
+    },
+    {
+      "epoch": 0.18398,
+      "grad_norm": 1.4989640568496108,
+      "learning_rate": 0.003,
+      "loss": 4.0113,
+      "step": 18398
+    },
+    {
+      "epoch": 0.18399,
+      "grad_norm": 1.2320834742620708,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 18399
+    },
+    {
+      "epoch": 0.184,
+      "grad_norm": 1.1692123907725664,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 18400
+    },
+    {
+      "epoch": 0.18401,
+      "grad_norm": 1.3031665663242062,
+      "learning_rate": 0.003,
+      "loss": 4.0225,
+      "step": 18401
+    },
+    {
+      "epoch": 0.18402,
+      "grad_norm": 1.1249323012008554,
+      "learning_rate": 0.003,
+      "loss": 4.0073,
+      "step": 18402
+    },
+    {
+      "epoch": 0.18403,
+      "grad_norm": 1.198204293360691,
+      "learning_rate": 0.003,
+      "loss": 3.9766,
+      "step": 18403
+    },
+    {
+      "epoch": 0.18404,
+      "grad_norm": 1.4103508443276054,
+      "learning_rate": 0.003,
+      "loss": 4.0116,
+      "step": 18404
+    },
+    {
+      "epoch": 0.18405,
+      "grad_norm": 1.2898062167184823,
+      "learning_rate": 0.003,
+      "loss": 4.007,
+      "step": 18405
+    },
+    {
+      "epoch": 0.18406,
+      "grad_norm": 1.3525117916984277,
+      "learning_rate": 0.003,
+      "loss": 4.0247,
+      "step": 18406
+    },
+    {
+      "epoch": 0.18407,
+      "grad_norm": 1.3484237042989866,
+      "learning_rate": 0.003,
+      "loss": 3.9999,
+      "step": 18407
+    },
+    {
+      "epoch": 0.18408,
+      "grad_norm": 1.4461309714025212,
+      "learning_rate": 0.003,
+      "loss": 4.0224,
+      "step": 18408
+    },
+    {
+      "epoch": 0.18409,
+      "grad_norm": 1.2975573996791996,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 18409
+    },
+    {
+      "epoch": 0.1841,
+      "grad_norm": 1.2114947855094296,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 18410
+    },
+    {
+      "epoch": 0.18411,
+      "grad_norm": 1.2452581120611896,
+      "learning_rate": 0.003,
+      "loss": 4.0299,
+      "step": 18411
+    },
+    {
+      "epoch": 0.18412,
+      "grad_norm": 1.470607565911789,
+      "learning_rate": 0.003,
+      "loss": 4.0419,
+      "step": 18412
+    },
+    {
+      "epoch": 0.18413,
+      "grad_norm": 1.2262106788202565,
+      "learning_rate": 0.003,
+      "loss": 3.9938,
+      "step": 18413
+    },
+    {
+      "epoch": 0.18414,
+      "grad_norm": 1.2580826518725825,
+      "learning_rate": 0.003,
+      "loss": 4.0052,
+      "step": 18414
+    },
+    {
+      "epoch": 0.18415,
+      "grad_norm": 1.2762030597233442,
+      "learning_rate": 0.003,
+      "loss": 4.0,
+      "step": 18415
+    },
+    {
+      "epoch": 0.18416,
+      "grad_norm": 1.3006937132738488,
+      "learning_rate": 0.003,
+      "loss": 3.9986,
+      "step": 18416
+    },
+    {
+      "epoch": 0.18417,
+      "grad_norm": 1.1144728301235787,
+      "learning_rate": 0.003,
+      "loss": 4.0349,
+      "step": 18417
+    },
+    {
+      "epoch": 0.18418,
+      "grad_norm": 1.2882672348453161,
+      "learning_rate": 0.003,
+      "loss": 3.9854,
+      "step": 18418
+    },
+    {
+      "epoch": 0.18419,
+      "grad_norm": 1.2156632443483395,
+      "learning_rate": 0.003,
+      "loss": 3.9885,
+      "step": 18419
+    },
+    {
+      "epoch": 0.1842,
+      "grad_norm": 1.275090643963137,
+      "learning_rate": 0.003,
+      "loss": 4.0287,
+      "step": 18420
+    },
+    {
+      "epoch": 0.18421,
+      "grad_norm": 1.1046831206828236,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 18421
+    },
+    {
+      "epoch": 0.18422,
+      "grad_norm": 1.458164902379381,
+      "learning_rate": 0.003,
+      "loss": 4.0164,
+      "step": 18422
+    },
+    {
+      "epoch": 0.18423,
+      "grad_norm": 1.2523878984847892,
+      "learning_rate": 0.003,
+      "loss": 3.9923,
+      "step": 18423
+    },
+    {
+      "epoch": 0.18424,
+      "grad_norm": 1.2214335287255744,
+      "learning_rate": 0.003,
+      "loss": 4.0237,
+      "step": 18424
+    },
+    {
+      "epoch": 0.18425,
+      "grad_norm": 1.298262007903684,
+      "learning_rate": 0.003,
+      "loss": 3.9707,
+      "step": 18425
+    },
+    {
+      "epoch": 0.18426,
+      "grad_norm": 1.297408437181403,
+      "learning_rate": 0.003,
+      "loss": 4.0049,
+      "step": 18426
+    },
+    {
+      "epoch": 0.18427,
+      "grad_norm": 1.4194071833600868,
+      "learning_rate": 0.003,
+      "loss": 4.0259,
+      "step": 18427
+    },
+    {
+      "epoch": 0.18428,
+      "grad_norm": 1.175283425110903,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 18428
+    },
+    {
+      "epoch": 0.18429,
+      "grad_norm": 1.1551904179812944,
+      "learning_rate": 0.003,
+      "loss": 4.0167,
+      "step": 18429
+    },
+    {
+      "epoch": 0.1843,
+      "grad_norm": 1.2026732845787054,
+      "learning_rate": 0.003,
+      "loss": 3.983,
+      "step": 18430
+    },
+    {
+      "epoch": 0.18431,
+      "grad_norm": 1.2434555943262482,
+      "learning_rate": 0.003,
+      "loss": 3.9925,
+      "step": 18431
+    },
+    {
+      "epoch": 0.18432,
+      "grad_norm": 1.1768527459774172,
+      "learning_rate": 0.003,
+      "loss": 3.9944,
+      "step": 18432
+    },
+    {
+      "epoch": 0.18433,
+      "grad_norm": 1.240412598853343,
+      "learning_rate": 0.003,
+      "loss": 3.9897,
+      "step": 18433
+    },
+    {
+      "epoch": 0.18434,
+      "grad_norm": 1.2692967701726239,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 18434
+    },
+    {
+      "epoch": 0.18435,
+      "grad_norm": 1.4220878500122178,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 18435
+    },
+    {
+      "epoch": 0.18436,
+      "grad_norm": 1.1348401403953556,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 18436
+    },
+    {
+      "epoch": 0.18437,
+      "grad_norm": 1.2374013020325947,
+      "learning_rate": 0.003,
+      "loss": 4.0353,
+      "step": 18437
+    },
+    {
+      "epoch": 0.18438,
+      "grad_norm": 1.2858501719262228,
+      "learning_rate": 0.003,
+      "loss": 4.0003,
+      "step": 18438
+    },
+    {
+      "epoch": 0.18439,
+      "grad_norm": 1.156999599660775,
+      "learning_rate": 0.003,
+      "loss": 3.9829,
+      "step": 18439
+    },
+    {
+      "epoch": 0.1844,
+      "grad_norm": 1.3274603523403214,
+      "learning_rate": 0.003,
+      "loss": 3.999,
+      "step": 18440
+    },
+    {
+      "epoch": 0.18441,
+      "grad_norm": 1.2713213342559737,
+      "learning_rate": 0.003,
+      "loss": 4.0016,
+      "step": 18441
+    },
+    {
+      "epoch": 0.18442,
+      "grad_norm": 1.3468046555432271,
+      "learning_rate": 0.003,
+      "loss": 3.9723,
+      "step": 18442
+    },
+    {
+      "epoch": 0.18443,
+      "grad_norm": 1.3106291424707803,
+      "learning_rate": 0.003,
+      "loss": 4.0211,
+      "step": 18443
+    },
+    {
+      "epoch": 0.18444,
+      "grad_norm": 1.250152339274203,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 18444
+    },
+    {
+      "epoch": 0.18445,
+      "grad_norm": 1.4384507501738175,
+      "learning_rate": 0.003,
+      "loss": 4.0123,
+      "step": 18445
+    },
+    {
+      "epoch": 0.18446,
+      "grad_norm": 1.227753855892706,
+      "learning_rate": 0.003,
+      "loss": 4.0092,
+      "step": 18446
+    },
+    {
+      "epoch": 0.18447,
+      "grad_norm": 1.3188968565202013,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 18447
+    },
+    {
+      "epoch": 0.18448,
+      "grad_norm": 1.3824282906882581,
+      "learning_rate": 0.003,
+      "loss": 4.0154,
+      "step": 18448
+    },
+    {
+      "epoch": 0.18449,
+      "grad_norm": 1.1178579815144731,
+      "learning_rate": 0.003,
+      "loss": 3.9984,
+      "step": 18449
+    },
+    {
+      "epoch": 0.1845,
+      "grad_norm": 1.4560887414596302,
+      "learning_rate": 0.003,
+      "loss": 3.999,
+      "step": 18450
+    },
+    {
+      "epoch": 0.18451,
+      "grad_norm": 0.9358257931900706,
+      "learning_rate": 0.003,
+      "loss": 4.0113,
+      "step": 18451
+    },
+    {
+      "epoch": 0.18452,
+      "grad_norm": 1.4848874911550862,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 18452
+    },
+    {
+      "epoch": 0.18453,
+      "grad_norm": 1.338522971259659,
+      "learning_rate": 0.003,
+      "loss": 4.057,
+      "step": 18453
+    },
+    {
+      "epoch": 0.18454,
+      "grad_norm": 1.4725522007476801,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 18454
+    },
+    {
+      "epoch": 0.18455,
+      "grad_norm": 1.1681160129084804,
+      "learning_rate": 0.003,
+      "loss": 3.9836,
+      "step": 18455
+    },
+    {
+      "epoch": 0.18456,
+      "grad_norm": 1.327267783294372,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 18456
+    },
+    {
+      "epoch": 0.18457,
+      "grad_norm": 1.2578832106370543,
+      "learning_rate": 0.003,
+      "loss": 4.035,
+      "step": 18457
+    },
+    {
+      "epoch": 0.18458,
+      "grad_norm": 1.348036752864334,
+      "learning_rate": 0.003,
+      "loss": 4.0421,
+      "step": 18458
+    },
+    {
+      "epoch": 0.18459,
+      "grad_norm": 1.3769169085120323,
+      "learning_rate": 0.003,
+      "loss": 3.9949,
+      "step": 18459
+    },
+    {
+      "epoch": 0.1846,
+      "grad_norm": 1.7663306920162887,
+      "learning_rate": 0.003,
+      "loss": 3.9871,
+      "step": 18460
+    },
+    {
+      "epoch": 0.18461,
+      "grad_norm": 1.1346456245594003,
+      "learning_rate": 0.003,
+      "loss": 4.0079,
+      "step": 18461
+    },
+    {
+      "epoch": 0.18462,
+      "grad_norm": 1.2327644341051074,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 18462
+    },
+    {
+      "epoch": 0.18463,
+      "grad_norm": 1.1281774861720366,
+      "learning_rate": 0.003,
+      "loss": 4.0258,
+      "step": 18463
+    },
+    {
+      "epoch": 0.18464,
+      "grad_norm": 1.2859152465019796,
+      "learning_rate": 0.003,
+      "loss": 4.022,
+      "step": 18464
+    },
+    {
+      "epoch": 0.18465,
+      "grad_norm": 1.3017491811776885,
+      "learning_rate": 0.003,
+      "loss": 4.0327,
+      "step": 18465
+    },
+    {
+      "epoch": 0.18466,
+      "grad_norm": 1.2218602647698296,
+      "learning_rate": 0.003,
+      "loss": 3.9977,
+      "step": 18466
+    },
+    {
+      "epoch": 0.18467,
+      "grad_norm": 1.3017897342152858,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 18467
+    },
+    {
+      "epoch": 0.18468,
+      "grad_norm": 1.1866958547635929,
+      "learning_rate": 0.003,
+      "loss": 4.0236,
+      "step": 18468
+    },
+    {
+      "epoch": 0.18469,
+      "grad_norm": 1.1458159820620728,
+      "learning_rate": 0.003,
+      "loss": 4.0365,
+      "step": 18469
+    },
+    {
+      "epoch": 0.1847,
+      "grad_norm": 1.2302168602397137,
+      "learning_rate": 0.003,
+      "loss": 3.9813,
+      "step": 18470
+    },
+    {
+      "epoch": 0.18471,
+      "grad_norm": 1.3917697686622776,
+      "learning_rate": 0.003,
+      "loss": 4.0241,
+      "step": 18471
+    },
+    {
+      "epoch": 0.18472,
+      "grad_norm": 1.1051796677592909,
+      "learning_rate": 0.003,
+      "loss": 3.9917,
+      "step": 18472
+    },
+    {
+      "epoch": 0.18473,
+      "grad_norm": 1.4759237634105464,
+      "learning_rate": 0.003,
+      "loss": 4.0116,
+      "step": 18473
+    },
+    {
+      "epoch": 0.18474,
+      "grad_norm": 1.4656747300837214,
+      "learning_rate": 0.003,
+      "loss": 4.0064,
+      "step": 18474
+    },
+    {
+      "epoch": 0.18475,
+      "grad_norm": 1.299117857979297,
+      "learning_rate": 0.003,
+      "loss": 4.0129,
+      "step": 18475
+    },
+    {
+      "epoch": 0.18476,
+      "grad_norm": 1.238280623167555,
+      "learning_rate": 0.003,
+      "loss": 4.0167,
+      "step": 18476
+    },
+    {
+      "epoch": 0.18477,
+      "grad_norm": 1.4048847403795164,
+      "learning_rate": 0.003,
+      "loss": 3.9976,
+      "step": 18477
+    },
+    {
+      "epoch": 0.18478,
+      "grad_norm": 1.1090434443199049,
+      "learning_rate": 0.003,
+      "loss": 4.0167,
+      "step": 18478
+    },
+    {
+      "epoch": 0.18479,
+      "grad_norm": 1.4213046723745388,
+      "learning_rate": 0.003,
+      "loss": 4.0082,
+      "step": 18479
+    },
+    {
+      "epoch": 0.1848,
+      "grad_norm": 1.279818057568252,
+      "learning_rate": 0.003,
+      "loss": 3.9754,
+      "step": 18480
+    },
+    {
+      "epoch": 0.18481,
+      "grad_norm": 1.3643906040670437,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 18481
+    },
+    {
+      "epoch": 0.18482,
+      "grad_norm": 1.331545735966065,
+      "learning_rate": 0.003,
+      "loss": 3.9828,
+      "step": 18482
+    },
+    {
+      "epoch": 0.18483,
+      "grad_norm": 1.4582865434308194,
+      "learning_rate": 0.003,
+      "loss": 4.0376,
+      "step": 18483
+    },
+    {
+      "epoch": 0.18484,
+      "grad_norm": 1.1033483229133811,
+      "learning_rate": 0.003,
+      "loss": 3.9904,
+      "step": 18484
+    },
+    {
+      "epoch": 0.18485,
+      "grad_norm": 1.220863438035,
+      "learning_rate": 0.003,
+      "loss": 4.0113,
+      "step": 18485
+    },
+    {
+      "epoch": 0.18486,
+      "grad_norm": 1.1819377221490115,
+      "learning_rate": 0.003,
+      "loss": 3.97,
+      "step": 18486
+    },
+    {
+      "epoch": 0.18487,
+      "grad_norm": 1.2913761031851838,
+      "learning_rate": 0.003,
+      "loss": 3.9965,
+      "step": 18487
+    },
+    {
+      "epoch": 0.18488,
+      "grad_norm": 1.3534671638086488,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 18488
+    },
+    {
+      "epoch": 0.18489,
+      "grad_norm": 1.4161610107981784,
+      "learning_rate": 0.003,
+      "loss": 4.022,
+      "step": 18489
+    },
+    {
+      "epoch": 0.1849,
+      "grad_norm": 1.1101131904508508,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 18490
+    },
+    {
+      "epoch": 0.18491,
+      "grad_norm": 1.4841512493611402,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 18491
+    },
+    {
+      "epoch": 0.18492,
+      "grad_norm": 1.075679689426423,
+      "learning_rate": 0.003,
+      "loss": 3.9958,
+      "step": 18492
+    },
+    {
+      "epoch": 0.18493,
+      "grad_norm": 1.5403311468925576,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 18493
+    },
+    {
+      "epoch": 0.18494,
+      "grad_norm": 1.0948628348607796,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 18494
+    },
+    {
+      "epoch": 0.18495,
+      "grad_norm": 1.5168282201704515,
+      "learning_rate": 0.003,
+      "loss": 4.0079,
+      "step": 18495
+    },
+    {
+      "epoch": 0.18496,
+      "grad_norm": 1.066571763307485,
+      "learning_rate": 0.003,
+      "loss": 3.9701,
+      "step": 18496
+    },
+    {
+      "epoch": 0.18497,
+      "grad_norm": 1.4482722506631425,
+      "learning_rate": 0.003,
+      "loss": 3.9963,
+      "step": 18497
+    },
+    {
+      "epoch": 0.18498,
+      "grad_norm": 1.3925698544647553,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 18498
+    },
+    {
+      "epoch": 0.18499,
+      "grad_norm": 1.407952620323706,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 18499
+    },
+    {
+      "epoch": 0.185,
+      "grad_norm": 1.2013361484565197,
+      "learning_rate": 0.003,
+      "loss": 4.0277,
+      "step": 18500
+    },
+    {
+      "epoch": 0.18501,
+      "grad_norm": 1.358993966037197,
+      "learning_rate": 0.003,
+      "loss": 3.9786,
+      "step": 18501
+    },
+    {
+      "epoch": 0.18502,
+      "grad_norm": 1.251426452479606,
+      "learning_rate": 0.003,
+      "loss": 4.0006,
+      "step": 18502
+    },
+    {
+      "epoch": 0.18503,
+      "grad_norm": 1.3535328239988025,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 18503
+    },
+    {
+      "epoch": 0.18504,
+      "grad_norm": 1.2846308012873837,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 18504
+    },
+    {
+      "epoch": 0.18505,
+      "grad_norm": 1.3831662011923265,
+      "learning_rate": 0.003,
+      "loss": 3.9932,
+      "step": 18505
+    },
+    {
+      "epoch": 0.18506,
+      "grad_norm": 1.3756100946984784,
+      "learning_rate": 0.003,
+      "loss": 4.0411,
+      "step": 18506
+    },
+    {
+      "epoch": 0.18507,
+      "grad_norm": 1.2082064307048737,
+      "learning_rate": 0.003,
+      "loss": 3.9978,
+      "step": 18507
+    },
+    {
+      "epoch": 0.18508,
+      "grad_norm": 1.2346584097097666,
+      "learning_rate": 0.003,
+      "loss": 4.0153,
+      "step": 18508
+    },
+    {
+      "epoch": 0.18509,
+      "grad_norm": 1.1118011345861378,
+      "learning_rate": 0.003,
+      "loss": 3.984,
+      "step": 18509
+    },
+    {
+      "epoch": 0.1851,
+      "grad_norm": 1.4618268255864204,
+      "learning_rate": 0.003,
+      "loss": 4.0398,
+      "step": 18510
+    },
+    {
+      "epoch": 0.18511,
+      "grad_norm": 1.330748768986842,
+      "learning_rate": 0.003,
+      "loss": 4.0485,
+      "step": 18511
+    },
+    {
+      "epoch": 0.18512,
+      "grad_norm": 1.1974218340672207,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 18512
+    },
+    {
+      "epoch": 0.18513,
+      "grad_norm": 1.3318792603017142,
+      "learning_rate": 0.003,
+      "loss": 4.0224,
+      "step": 18513
+    },
+    {
+      "epoch": 0.18514,
+      "grad_norm": 1.0947871098533344,
+      "learning_rate": 0.003,
+      "loss": 4.0308,
+      "step": 18514
+    },
+    {
+      "epoch": 0.18515,
+      "grad_norm": 1.4433388959752576,
+      "learning_rate": 0.003,
+      "loss": 3.972,
+      "step": 18515
+    },
+    {
+      "epoch": 0.18516,
+      "grad_norm": 1.061057613490036,
+      "learning_rate": 0.003,
+      "loss": 4.0082,
+      "step": 18516
+    },
+    {
+      "epoch": 0.18517,
+      "grad_norm": 1.4735050302149566,
+      "learning_rate": 0.003,
+      "loss": 4.0135,
+      "step": 18517
+    },
+    {
+      "epoch": 0.18518,
+      "grad_norm": 1.1761057438168303,
+      "learning_rate": 0.003,
+      "loss": 3.9849,
+      "step": 18518
+    },
+    {
+      "epoch": 0.18519,
+      "grad_norm": 1.3637153392472225,
+      "learning_rate": 0.003,
+      "loss": 3.997,
+      "step": 18519
+    },
+    {
+      "epoch": 0.1852,
+      "grad_norm": 0.9907075153743939,
+      "learning_rate": 0.003,
+      "loss": 3.9988,
+      "step": 18520
+    },
+    {
+      "epoch": 0.18521,
+      "grad_norm": 1.4587027918044375,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 18521
+    },
+    {
+      "epoch": 0.18522,
+      "grad_norm": 1.065505001624872,
+      "learning_rate": 0.003,
+      "loss": 4.0065,
+      "step": 18522
+    },
+    {
+      "epoch": 0.18523,
+      "grad_norm": 1.2618717307106824,
+      "learning_rate": 0.003,
+      "loss": 3.9958,
+      "step": 18523
+    },
+    {
+      "epoch": 0.18524,
+      "grad_norm": 1.2549102381618809,
+      "learning_rate": 0.003,
+      "loss": 4.0306,
+      "step": 18524
+    },
+    {
+      "epoch": 0.18525,
+      "grad_norm": 1.1756058585589122,
+      "learning_rate": 0.003,
+      "loss": 3.9986,
+      "step": 18525
+    },
+    {
+      "epoch": 0.18526,
+      "grad_norm": 1.308331068971566,
+      "learning_rate": 0.003,
+      "loss": 4.0031,
+      "step": 18526
+    },
+    {
+      "epoch": 0.18527,
+      "grad_norm": 1.2308889043734412,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 18527
+    },
+    {
+      "epoch": 0.18528,
+      "grad_norm": 1.341743389444327,
+      "learning_rate": 0.003,
+      "loss": 4.032,
+      "step": 18528
+    },
+    {
+      "epoch": 0.18529,
+      "grad_norm": 1.2788693846335635,
+      "learning_rate": 0.003,
+      "loss": 3.9932,
+      "step": 18529
+    },
+    {
+      "epoch": 0.1853,
+      "grad_norm": 1.212905158240625,
+      "learning_rate": 0.003,
+      "loss": 4.007,
+      "step": 18530
+    },
+    {
+      "epoch": 0.18531,
+      "grad_norm": 1.2585666115433096,
+      "learning_rate": 0.003,
+      "loss": 3.9939,
+      "step": 18531
+    },
+    {
+      "epoch": 0.18532,
+      "grad_norm": 1.6025444100201056,
+      "learning_rate": 0.003,
+      "loss": 4.0365,
+      "step": 18532
+    },
+    {
+      "epoch": 0.18533,
+      "grad_norm": 1.2214621764586233,
+      "learning_rate": 0.003,
+      "loss": 4.0366,
+      "step": 18533
+    },
+    {
+      "epoch": 0.18534,
+      "grad_norm": 1.430485782386072,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 18534
+    },
+    {
+      "epoch": 0.18535,
+      "grad_norm": 1.3164521462885879,
+      "learning_rate": 0.003,
+      "loss": 4.0331,
+      "step": 18535
+    },
+    {
+      "epoch": 0.18536,
+      "grad_norm": 1.276515990912035,
+      "learning_rate": 0.003,
+      "loss": 3.9992,
+      "step": 18536
+    },
+    {
+      "epoch": 0.18537,
+      "grad_norm": 1.2701183695610685,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 18537
+    },
+    {
+      "epoch": 0.18538,
+      "grad_norm": 1.1189715005336927,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 18538
+    },
+    {
+      "epoch": 0.18539,
+      "grad_norm": 1.3619235261804277,
+      "learning_rate": 0.003,
+      "loss": 4.0237,
+      "step": 18539
+    },
+    {
+      "epoch": 0.1854,
+      "grad_norm": 1.233576204494665,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 18540
+    },
+    {
+      "epoch": 0.18541,
+      "grad_norm": 1.3692736931002547,
+      "learning_rate": 0.003,
+      "loss": 3.9954,
+      "step": 18541
+    },
+    {
+      "epoch": 0.18542,
+      "grad_norm": 1.0912947060898697,
+      "learning_rate": 0.003,
+      "loss": 4.0011,
+      "step": 18542
+    },
+    {
+      "epoch": 0.18543,
+      "grad_norm": 1.4822550236144991,
+      "learning_rate": 0.003,
+      "loss": 4.0155,
+      "step": 18543
+    },
+    {
+      "epoch": 0.18544,
+      "grad_norm": 1.0481702315363652,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 18544
+    },
+    {
+      "epoch": 0.18545,
+      "grad_norm": 1.3190514168279799,
+      "learning_rate": 0.003,
+      "loss": 3.9745,
+      "step": 18545
+    },
+    {
+      "epoch": 0.18546,
+      "grad_norm": 1.1342856434951607,
+      "learning_rate": 0.003,
+      "loss": 3.9888,
+      "step": 18546
+    },
+    {
+      "epoch": 0.18547,
+      "grad_norm": 1.4563981963795782,
+      "learning_rate": 0.003,
+      "loss": 4.0323,
+      "step": 18547
+    },
+    {
+      "epoch": 0.18548,
+      "grad_norm": 1.1505326612441675,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 18548
+    },
+    {
+      "epoch": 0.18549,
+      "grad_norm": 1.348571565178306,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 18549
+    },
+    {
+      "epoch": 0.1855,
+      "grad_norm": 1.2459203506838015,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 18550
+    },
+    {
+      "epoch": 0.18551,
+      "grad_norm": 1.3952337410832205,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 18551
+    },
+    {
+      "epoch": 0.18552,
+      "grad_norm": 1.2513571202488907,
+      "learning_rate": 0.003,
+      "loss": 4.0436,
+      "step": 18552
+    },
+    {
+      "epoch": 0.18553,
+      "grad_norm": 1.308467551674208,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 18553
+    },
+    {
+      "epoch": 0.18554,
+      "grad_norm": 1.0101160805280176,
+      "learning_rate": 0.003,
+      "loss": 4.0005,
+      "step": 18554
+    },
+    {
+      "epoch": 0.18555,
+      "grad_norm": 1.201354445868851,
+      "learning_rate": 0.003,
+      "loss": 4.0078,
+      "step": 18555
+    },
+    {
+      "epoch": 0.18556,
+      "grad_norm": 1.3008756176695497,
+      "learning_rate": 0.003,
+      "loss": 4.0224,
+      "step": 18556
+    },
+    {
+      "epoch": 0.18557,
+      "grad_norm": 1.1840527587871843,
+      "learning_rate": 0.003,
+      "loss": 3.9809,
+      "step": 18557
+    },
+    {
+      "epoch": 0.18558,
+      "grad_norm": 1.5502327699695684,
+      "learning_rate": 0.003,
+      "loss": 4.0029,
+      "step": 18558
+    },
+    {
+      "epoch": 0.18559,
+      "grad_norm": 1.1948999804123228,
+      "learning_rate": 0.003,
+      "loss": 3.9976,
+      "step": 18559
+    },
+    {
+      "epoch": 0.1856,
+      "grad_norm": 1.2310717704591958,
+      "learning_rate": 0.003,
+      "loss": 4.0093,
+      "step": 18560
+    },
+    {
+      "epoch": 0.18561,
+      "grad_norm": 1.2907980238799903,
+      "learning_rate": 0.003,
+      "loss": 3.9989,
+      "step": 18561
+    },
+    {
+      "epoch": 0.18562,
+      "grad_norm": 1.3389978550625135,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 18562
+    },
+    {
+      "epoch": 0.18563,
+      "grad_norm": 1.2244157868322538,
+      "learning_rate": 0.003,
+      "loss": 3.9791,
+      "step": 18563
+    },
+    {
+      "epoch": 0.18564,
+      "grad_norm": 1.1847773023779853,
+      "learning_rate": 0.003,
+      "loss": 3.989,
+      "step": 18564
+    },
+    {
+      "epoch": 0.18565,
+      "grad_norm": 1.5214512122252601,
+      "learning_rate": 0.003,
+      "loss": 4.0326,
+      "step": 18565
+    },
+    {
+      "epoch": 0.18566,
+      "grad_norm": 1.0766529054176561,
+      "learning_rate": 0.003,
+      "loss": 3.9876,
+      "step": 18566
+    },
+    {
+      "epoch": 0.18567,
+      "grad_norm": 1.4010076254541792,
+      "learning_rate": 0.003,
+      "loss": 4.0112,
+      "step": 18567
+    },
+    {
+      "epoch": 0.18568,
+      "grad_norm": 0.991739314032095,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 18568
+    },
+    {
+      "epoch": 0.18569,
+      "grad_norm": 1.2838558466306362,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 18569
+    },
+    {
+      "epoch": 0.1857,
+      "grad_norm": 1.220292414323158,
+      "learning_rate": 0.003,
+      "loss": 4.0286,
+      "step": 18570
+    },
+    {
+      "epoch": 0.18571,
+      "grad_norm": 1.322845412629368,
+      "learning_rate": 0.003,
+      "loss": 4.0304,
+      "step": 18571
+    },
+    {
+      "epoch": 0.18572,
+      "grad_norm": 1.3582577169073362,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 18572
+    },
+    {
+      "epoch": 0.18573,
+      "grad_norm": 1.200184426982225,
+      "learning_rate": 0.003,
+      "loss": 4.0105,
+      "step": 18573
+    },
+    {
+      "epoch": 0.18574,
+      "grad_norm": 1.5527522075179154,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 18574
+    },
+    {
+      "epoch": 0.18575,
+      "grad_norm": 1.1582205921040074,
+      "learning_rate": 0.003,
+      "loss": 4.0375,
+      "step": 18575
+    },
+    {
+      "epoch": 0.18576,
+      "grad_norm": 1.2330870377840064,
+      "learning_rate": 0.003,
+      "loss": 4.0162,
+      "step": 18576
+    },
+    {
+      "epoch": 0.18577,
+      "grad_norm": 1.2252907480898436,
+      "learning_rate": 0.003,
+      "loss": 3.99,
+      "step": 18577
+    },
+    {
+      "epoch": 0.18578,
+      "grad_norm": 1.023257381028075,
+      "learning_rate": 0.003,
+      "loss": 3.9839,
+      "step": 18578
+    },
+    {
+      "epoch": 0.18579,
+      "grad_norm": 1.4218033839458162,
+      "learning_rate": 0.003,
+      "loss": 4.041,
+      "step": 18579
+    },
+    {
+      "epoch": 0.1858,
+      "grad_norm": 1.3118197249151076,
+      "learning_rate": 0.003,
+      "loss": 3.985,
+      "step": 18580
+    },
+    {
+      "epoch": 0.18581,
+      "grad_norm": 1.3664557565587128,
+      "learning_rate": 0.003,
+      "loss": 3.9866,
+      "step": 18581
+    },
+    {
+      "epoch": 0.18582,
+      "grad_norm": 1.4781456426824633,
+      "learning_rate": 0.003,
+      "loss": 4.0194,
+      "step": 18582
+    },
+    {
+      "epoch": 0.18583,
+      "grad_norm": 1.128398572392948,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 18583
+    },
+    {
+      "epoch": 0.18584,
+      "grad_norm": 1.6347654062622279,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 18584
+    },
+    {
+      "epoch": 0.18585,
+      "grad_norm": 1.031502279419643,
+      "learning_rate": 0.003,
+      "loss": 3.9954,
+      "step": 18585
+    },
+    {
+      "epoch": 0.18586,
+      "grad_norm": 1.433874910092499,
+      "learning_rate": 0.003,
+      "loss": 3.9959,
+      "step": 18586
+    },
+    {
+      "epoch": 0.18587,
+      "grad_norm": 1.0733447413741035,
+      "learning_rate": 0.003,
+      "loss": 3.9869,
+      "step": 18587
+    },
+    {
+      "epoch": 0.18588,
+      "grad_norm": 1.5475095622967026,
+      "learning_rate": 0.003,
+      "loss": 4.0031,
+      "step": 18588
+    },
+    {
+      "epoch": 0.18589,
+      "grad_norm": 1.0194451496728019,
+      "learning_rate": 0.003,
+      "loss": 4.0098,
+      "step": 18589
+    },
+    {
+      "epoch": 0.1859,
+      "grad_norm": 1.5307823062281203,
+      "learning_rate": 0.003,
+      "loss": 4.0194,
+      "step": 18590
+    },
+    {
+      "epoch": 0.18591,
+      "grad_norm": 1.1275286995807612,
+      "learning_rate": 0.003,
+      "loss": 3.9874,
+      "step": 18591
+    },
+    {
+      "epoch": 0.18592,
+      "grad_norm": 1.2933826175634457,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 18592
+    },
+    {
+      "epoch": 0.18593,
+      "grad_norm": 1.3601993739587592,
+      "learning_rate": 0.003,
+      "loss": 4.0411,
+      "step": 18593
+    },
+    {
+      "epoch": 0.18594,
+      "grad_norm": 1.2130492766252166,
+      "learning_rate": 0.003,
+      "loss": 4.0299,
+      "step": 18594
+    },
+    {
+      "epoch": 0.18595,
+      "grad_norm": 1.2934433237292364,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 18595
+    },
+    {
+      "epoch": 0.18596,
+      "grad_norm": 1.2421427960715283,
+      "learning_rate": 0.003,
+      "loss": 4.0487,
+      "step": 18596
+    },
+    {
+      "epoch": 0.18597,
+      "grad_norm": 1.490141776937668,
+      "learning_rate": 0.003,
+      "loss": 4.0407,
+      "step": 18597
+    },
+    {
+      "epoch": 0.18598,
+      "grad_norm": 1.3658652684554957,
+      "learning_rate": 0.003,
+      "loss": 4.0435,
+      "step": 18598
+    },
+    {
+      "epoch": 0.18599,
+      "grad_norm": 1.286534104846973,
+      "learning_rate": 0.003,
+      "loss": 3.9935,
+      "step": 18599
+    },
+    {
+      "epoch": 0.186,
+      "grad_norm": 1.2191421245523897,
+      "learning_rate": 0.003,
+      "loss": 4.0,
+      "step": 18600
+    },
+    {
+      "epoch": 0.18601,
+      "grad_norm": 1.3072430551357463,
+      "learning_rate": 0.003,
+      "loss": 3.9868,
+      "step": 18601
+    },
+    {
+      "epoch": 0.18602,
+      "grad_norm": 1.242549894399749,
+      "learning_rate": 0.003,
+      "loss": 3.978,
+      "step": 18602
+    },
+    {
+      "epoch": 0.18603,
+      "grad_norm": 1.453242763100252,
+      "learning_rate": 0.003,
+      "loss": 3.9781,
+      "step": 18603
+    },
+    {
+      "epoch": 0.18604,
+      "grad_norm": 1.2777779597545433,
+      "learning_rate": 0.003,
+      "loss": 4.0372,
+      "step": 18604
+    },
+    {
+      "epoch": 0.18605,
+      "grad_norm": 1.2684022449348649,
+      "learning_rate": 0.003,
+      "loss": 4.012,
+      "step": 18605
+    },
+    {
+      "epoch": 0.18606,
+      "grad_norm": 1.1734346703901422,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 18606
+    },
+    {
+      "epoch": 0.18607,
+      "grad_norm": 1.4009858584521866,
+      "learning_rate": 0.003,
+      "loss": 4.0345,
+      "step": 18607
+    },
+    {
+      "epoch": 0.18608,
+      "grad_norm": 0.9551117231543502,
+      "learning_rate": 0.003,
+      "loss": 3.9977,
+      "step": 18608
+    },
+    {
+      "epoch": 0.18609,
+      "grad_norm": 1.3261185996717406,
+      "learning_rate": 0.003,
+      "loss": 4.024,
+      "step": 18609
+    },
+    {
+      "epoch": 0.1861,
+      "grad_norm": 1.0500612330987849,
+      "learning_rate": 0.003,
+      "loss": 4.0315,
+      "step": 18610
+    },
+    {
+      "epoch": 0.18611,
+      "grad_norm": 1.792206931986703,
+      "learning_rate": 0.003,
+      "loss": 4.021,
+      "step": 18611
+    },
+    {
+      "epoch": 0.18612,
+      "grad_norm": 1.0644116367360816,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 18612
+    },
+    {
+      "epoch": 0.18613,
+      "grad_norm": 1.6239202401149886,
+      "learning_rate": 0.003,
+      "loss": 4.061,
+      "step": 18613
+    },
+    {
+      "epoch": 0.18614,
+      "grad_norm": 1.1228693236587073,
+      "learning_rate": 0.003,
+      "loss": 3.9776,
+      "step": 18614
+    },
+    {
+      "epoch": 0.18615,
+      "grad_norm": 1.3021924314613067,
+      "learning_rate": 0.003,
+      "loss": 4.0395,
+      "step": 18615
+    },
+    {
+      "epoch": 0.18616,
+      "grad_norm": 1.3454363448652122,
+      "learning_rate": 0.003,
+      "loss": 4.0112,
+      "step": 18616
+    },
+    {
+      "epoch": 0.18617,
+      "grad_norm": 1.1555190229942982,
+      "learning_rate": 0.003,
+      "loss": 4.0218,
+      "step": 18617
+    },
+    {
+      "epoch": 0.18618,
+      "grad_norm": 1.3870926743097056,
+      "learning_rate": 0.003,
+      "loss": 4.0398,
+      "step": 18618
+    },
+    {
+      "epoch": 0.18619,
+      "grad_norm": 1.4728282282089995,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 18619
+    },
+    {
+      "epoch": 0.1862,
+      "grad_norm": 1.3300007177718682,
+      "learning_rate": 0.003,
+      "loss": 4.0414,
+      "step": 18620
+    },
+    {
+      "epoch": 0.18621,
+      "grad_norm": 1.2998396089776643,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 18621
+    },
+    {
+      "epoch": 0.18622,
+      "grad_norm": 1.663830580793456,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 18622
+    },
+    {
+      "epoch": 0.18623,
+      "grad_norm": 1.1297681880161663,
+      "learning_rate": 0.003,
+      "loss": 4.0088,
+      "step": 18623
+    },
+    {
+      "epoch": 0.18624,
+      "grad_norm": 1.3008520977812261,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 18624
+    },
+    {
+      "epoch": 0.18625,
+      "grad_norm": 1.2806620435849159,
+      "learning_rate": 0.003,
+      "loss": 4.015,
+      "step": 18625
+    },
+    {
+      "epoch": 0.18626,
+      "grad_norm": 1.3016566169943902,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 18626
+    },
+    {
+      "epoch": 0.18627,
+      "grad_norm": 1.2693203511028142,
+      "learning_rate": 0.003,
+      "loss": 4.0334,
+      "step": 18627
+    },
+    {
+      "epoch": 0.18628,
+      "grad_norm": 1.2351005941004773,
+      "learning_rate": 0.003,
+      "loss": 3.9973,
+      "step": 18628
+    },
+    {
+      "epoch": 0.18629,
+      "grad_norm": 1.296226250236291,
+      "learning_rate": 0.003,
+      "loss": 4.0032,
+      "step": 18629
+    },
+    {
+      "epoch": 0.1863,
+      "grad_norm": 1.0927295505933619,
+      "learning_rate": 0.003,
+      "loss": 4.0019,
+      "step": 18630
+    },
+    {
+      "epoch": 0.18631,
+      "grad_norm": 1.289378498409426,
+      "learning_rate": 0.003,
+      "loss": 4.0431,
+      "step": 18631
+    },
+    {
+      "epoch": 0.18632,
+      "grad_norm": 1.3590661123448855,
+      "learning_rate": 0.003,
+      "loss": 3.9972,
+      "step": 18632
+    },
+    {
+      "epoch": 0.18633,
+      "grad_norm": 1.1964437431765969,
+      "learning_rate": 0.003,
+      "loss": 4.0042,
+      "step": 18633
+    },
+    {
+      "epoch": 0.18634,
+      "grad_norm": 1.3144287374617258,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 18634
+    },
+    {
+      "epoch": 0.18635,
+      "grad_norm": 1.0505604453924078,
+      "learning_rate": 0.003,
+      "loss": 4.0349,
+      "step": 18635
+    },
+    {
+      "epoch": 0.18636,
+      "grad_norm": 1.470577508208389,
+      "learning_rate": 0.003,
+      "loss": 4.0005,
+      "step": 18636
+    },
+    {
+      "epoch": 0.18637,
+      "grad_norm": 1.1033046640383908,
+      "learning_rate": 0.003,
+      "loss": 4.0244,
+      "step": 18637
+    },
+    {
+      "epoch": 0.18638,
+      "grad_norm": 1.522697024614939,
+      "learning_rate": 0.003,
+      "loss": 4.0254,
+      "step": 18638
+    },
+    {
+      "epoch": 0.18639,
+      "grad_norm": 1.2251818029759538,
+      "learning_rate": 0.003,
+      "loss": 4.0409,
+      "step": 18639
+    },
+    {
+      "epoch": 0.1864,
+      "grad_norm": 1.3033259395285126,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 18640
+    },
+    {
+      "epoch": 0.18641,
+      "grad_norm": 1.2300175335879666,
+      "learning_rate": 0.003,
+      "loss": 3.9931,
+      "step": 18641
+    },
+    {
+      "epoch": 0.18642,
+      "grad_norm": 1.1259367697089997,
+      "learning_rate": 0.003,
+      "loss": 3.9864,
+      "step": 18642
+    },
+    {
+      "epoch": 0.18643,
+      "grad_norm": 1.444943076921912,
+      "learning_rate": 0.003,
+      "loss": 4.0301,
+      "step": 18643
+    },
+    {
+      "epoch": 0.18644,
+      "grad_norm": 1.0268303432894565,
+      "learning_rate": 0.003,
+      "loss": 3.9937,
+      "step": 18644
+    },
+    {
+      "epoch": 0.18645,
+      "grad_norm": 1.5852151526494689,
+      "learning_rate": 0.003,
+      "loss": 4.0048,
+      "step": 18645
+    },
+    {
+      "epoch": 0.18646,
+      "grad_norm": 1.0043957113578743,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 18646
+    },
+    {
+      "epoch": 0.18647,
+      "grad_norm": 1.785965563009103,
+      "learning_rate": 0.003,
+      "loss": 4.0277,
+      "step": 18647
+    },
+    {
+      "epoch": 0.18648,
+      "grad_norm": 1.0897731949512062,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 18648
+    },
+    {
+      "epoch": 0.18649,
+      "grad_norm": 1.1966497791830422,
+      "learning_rate": 0.003,
+      "loss": 4.0394,
+      "step": 18649
+    },
+    {
+      "epoch": 0.1865,
+      "grad_norm": 1.1030508775016512,
+      "learning_rate": 0.003,
+      "loss": 4.0435,
+      "step": 18650
+    },
+    {
+      "epoch": 0.18651,
+      "grad_norm": 1.4359024778859895,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 18651
+    },
+    {
+      "epoch": 0.18652,
+      "grad_norm": 1.3060847385318657,
+      "learning_rate": 0.003,
+      "loss": 4.044,
+      "step": 18652
+    },
+    {
+      "epoch": 0.18653,
+      "grad_norm": 1.2815378552222991,
+      "learning_rate": 0.003,
+      "loss": 3.9973,
+      "step": 18653
+    },
+    {
+      "epoch": 0.18654,
+      "grad_norm": 1.3081912359863301,
+      "learning_rate": 0.003,
+      "loss": 4.0049,
+      "step": 18654
+    },
+    {
+      "epoch": 0.18655,
+      "grad_norm": 1.1841226312470723,
+      "learning_rate": 0.003,
+      "loss": 4.0155,
+      "step": 18655
+    },
+    {
+      "epoch": 0.18656,
+      "grad_norm": 1.0474684392139904,
+      "learning_rate": 0.003,
+      "loss": 4.0172,
+      "step": 18656
+    },
+    {
+      "epoch": 0.18657,
+      "grad_norm": 1.2089374947444256,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 18657
+    },
+    {
+      "epoch": 0.18658,
+      "grad_norm": 1.2119073765559805,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 18658
+    },
+    {
+      "epoch": 0.18659,
+      "grad_norm": 1.2712102129241536,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 18659
+    },
+    {
+      "epoch": 0.1866,
+      "grad_norm": 1.3573528545499387,
+      "learning_rate": 0.003,
+      "loss": 3.995,
+      "step": 18660
+    },
+    {
+      "epoch": 0.18661,
+      "grad_norm": 1.4130170197773813,
+      "learning_rate": 0.003,
+      "loss": 3.9865,
+      "step": 18661
+    },
+    {
+      "epoch": 0.18662,
+      "grad_norm": 1.1507026696311244,
+      "learning_rate": 0.003,
+      "loss": 4.0081,
+      "step": 18662
+    },
+    {
+      "epoch": 0.18663,
+      "grad_norm": 1.5848939484272642,
+      "learning_rate": 0.003,
+      "loss": 4.0291,
+      "step": 18663
+    },
+    {
+      "epoch": 0.18664,
+      "grad_norm": 1.3501497587585538,
+      "learning_rate": 0.003,
+      "loss": 4.0111,
+      "step": 18664
+    },
+    {
+      "epoch": 0.18665,
+      "grad_norm": 1.3863001903757999,
+      "learning_rate": 0.003,
+      "loss": 4.0021,
+      "step": 18665
+    },
+    {
+      "epoch": 0.18666,
+      "grad_norm": 1.2816916346464242,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 18666
+    },
+    {
+      "epoch": 0.18667,
+      "grad_norm": 1.0273145525259801,
+      "learning_rate": 0.003,
+      "loss": 4.015,
+      "step": 18667
+    },
+    {
+      "epoch": 0.18668,
+      "grad_norm": 1.4102810177452896,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 18668
+    },
+    {
+      "epoch": 0.18669,
+      "grad_norm": 1.0623139759039228,
+      "learning_rate": 0.003,
+      "loss": 3.9903,
+      "step": 18669
+    },
+    {
+      "epoch": 0.1867,
+      "grad_norm": 1.4461637831636758,
+      "learning_rate": 0.003,
+      "loss": 4.0104,
+      "step": 18670
+    },
+    {
+      "epoch": 0.18671,
+      "grad_norm": 1.0309532446981018,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 18671
+    },
+    {
+      "epoch": 0.18672,
+      "grad_norm": 1.5276803462303443,
+      "learning_rate": 0.003,
+      "loss": 4.0163,
+      "step": 18672
+    },
+    {
+      "epoch": 0.18673,
+      "grad_norm": 0.9511090963111622,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 18673
+    },
+    {
+      "epoch": 0.18674,
+      "grad_norm": 1.301143672970116,
+      "learning_rate": 0.003,
+      "loss": 3.9889,
+      "step": 18674
+    },
+    {
+      "epoch": 0.18675,
+      "grad_norm": 1.2147389803487254,
+      "learning_rate": 0.003,
+      "loss": 4.0082,
+      "step": 18675
+    },
+    {
+      "epoch": 0.18676,
+      "grad_norm": 1.110001119192037,
+      "learning_rate": 0.003,
+      "loss": 3.9987,
+      "step": 18676
+    },
+    {
+      "epoch": 0.18677,
+      "grad_norm": 1.5169873876232451,
+      "learning_rate": 0.003,
+      "loss": 4.0233,
+      "step": 18677
+    },
+    {
+      "epoch": 0.18678,
+      "grad_norm": 1.30388104159054,
+      "learning_rate": 0.003,
+      "loss": 4.012,
+      "step": 18678
+    },
+    {
+      "epoch": 0.18679,
+      "grad_norm": 1.4226466874028336,
+      "learning_rate": 0.003,
+      "loss": 3.9958,
+      "step": 18679
+    },
+    {
+      "epoch": 0.1868,
+      "grad_norm": 1.2521023613556785,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 18680
+    },
+    {
+      "epoch": 0.18681,
+      "grad_norm": 1.342154011332591,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 18681
+    },
+    {
+      "epoch": 0.18682,
+      "grad_norm": 1.151351223071168,
+      "learning_rate": 0.003,
+      "loss": 4.0073,
+      "step": 18682
+    },
+    {
+      "epoch": 0.18683,
+      "grad_norm": 1.3783113488733716,
+      "learning_rate": 0.003,
+      "loss": 4.0008,
+      "step": 18683
+    },
+    {
+      "epoch": 0.18684,
+      "grad_norm": 1.0337304440380508,
+      "learning_rate": 0.003,
+      "loss": 4.024,
+      "step": 18684
+    },
+    {
+      "epoch": 0.18685,
+      "grad_norm": 1.5841944883036514,
+      "learning_rate": 0.003,
+      "loss": 4.0122,
+      "step": 18685
+    },
+    {
+      "epoch": 0.18686,
+      "grad_norm": 0.9641070777729437,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 18686
+    },
+    {
+      "epoch": 0.18687,
+      "grad_norm": 1.6626898746580234,
+      "learning_rate": 0.003,
+      "loss": 4.0512,
+      "step": 18687
+    },
+    {
+      "epoch": 0.18688,
+      "grad_norm": 0.9977541504269526,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 18688
+    },
+    {
+      "epoch": 0.18689,
+      "grad_norm": 1.3963131002959657,
+      "learning_rate": 0.003,
+      "loss": 3.9891,
+      "step": 18689
+    },
+    {
+      "epoch": 0.1869,
+      "grad_norm": 1.2682488828591068,
+      "learning_rate": 0.003,
+      "loss": 4.0204,
+      "step": 18690
+    },
+    {
+      "epoch": 0.18691,
+      "grad_norm": 1.21170146686143,
+      "learning_rate": 0.003,
+      "loss": 4.0234,
+      "step": 18691
+    },
+    {
+      "epoch": 0.18692,
+      "grad_norm": 1.4365591225461198,
+      "learning_rate": 0.003,
+      "loss": 4.0012,
+      "step": 18692
+    },
+    {
+      "epoch": 0.18693,
+      "grad_norm": 1.1154666207606887,
+      "learning_rate": 0.003,
+      "loss": 4.0096,
+      "step": 18693
+    },
+    {
+      "epoch": 0.18694,
+      "grad_norm": 1.4480910223178798,
+      "learning_rate": 0.003,
+      "loss": 4.0436,
+      "step": 18694
+    },
+    {
+      "epoch": 0.18695,
+      "grad_norm": 1.5202125009114869,
+      "learning_rate": 0.003,
+      "loss": 4.0099,
+      "step": 18695
+    },
+    {
+      "epoch": 0.18696,
+      "grad_norm": 1.0368577367197942,
+      "learning_rate": 0.003,
+      "loss": 4.0298,
+      "step": 18696
+    },
+    {
+      "epoch": 0.18697,
+      "grad_norm": 1.528670472485098,
+      "learning_rate": 0.003,
+      "loss": 4.003,
+      "step": 18697
+    },
+    {
+      "epoch": 0.18698,
+      "grad_norm": 1.0822675430061508,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 18698
+    },
+    {
+      "epoch": 0.18699,
+      "grad_norm": 1.6733763431881195,
+      "learning_rate": 0.003,
+      "loss": 4.0245,
+      "step": 18699
+    },
+    {
+      "epoch": 0.187,
+      "grad_norm": 1.0336614282538712,
+      "learning_rate": 0.003,
+      "loss": 4.0126,
+      "step": 18700
+    },
+    {
+      "epoch": 0.18701,
+      "grad_norm": 1.306762222550216,
+      "learning_rate": 0.003,
+      "loss": 4.0269,
+      "step": 18701
+    },
+    {
+      "epoch": 0.18702,
+      "grad_norm": 1.453126556274193,
+      "learning_rate": 0.003,
+      "loss": 4.0315,
+      "step": 18702
+    },
+    {
+      "epoch": 0.18703,
+      "grad_norm": 1.0154430609976544,
+      "learning_rate": 0.003,
+      "loss": 4.0194,
+      "step": 18703
+    },
+    {
+      "epoch": 0.18704,
+      "grad_norm": 1.5981658504580856,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 18704
+    },
+    {
+      "epoch": 0.18705,
+      "grad_norm": 1.0265576550733797,
+      "learning_rate": 0.003,
+      "loss": 4.0386,
+      "step": 18705
+    },
+    {
+      "epoch": 0.18706,
+      "grad_norm": 1.3013077718481592,
+      "learning_rate": 0.003,
+      "loss": 4.0014,
+      "step": 18706
+    },
+    {
+      "epoch": 0.18707,
+      "grad_norm": 1.265742895619668,
+      "learning_rate": 0.003,
+      "loss": 4.0129,
+      "step": 18707
+    },
+    {
+      "epoch": 0.18708,
+      "grad_norm": 1.1374193006402677,
+      "learning_rate": 0.003,
+      "loss": 3.9854,
+      "step": 18708
+    },
+    {
+      "epoch": 0.18709,
+      "grad_norm": 1.3431087880743602,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 18709
+    },
+    {
+      "epoch": 0.1871,
+      "grad_norm": 1.314450011320492,
+      "learning_rate": 0.003,
+      "loss": 4.0171,
+      "step": 18710
+    },
+    {
+      "epoch": 0.18711,
+      "grad_norm": 1.3019829829934781,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 18711
+    },
+    {
+      "epoch": 0.18712,
+      "grad_norm": 1.1132955579832229,
+      "learning_rate": 0.003,
+      "loss": 4.026,
+      "step": 18712
+    },
+    {
+      "epoch": 0.18713,
+      "grad_norm": 1.2236759716132808,
+      "learning_rate": 0.003,
+      "loss": 4.0296,
+      "step": 18713
+    },
+    {
+      "epoch": 0.18714,
+      "grad_norm": 1.270980229472738,
+      "learning_rate": 0.003,
+      "loss": 4.0003,
+      "step": 18714
+    },
+    {
+      "epoch": 0.18715,
+      "grad_norm": 1.322438126171909,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 18715
+    },
+    {
+      "epoch": 0.18716,
+      "grad_norm": 1.0512819280225125,
+      "learning_rate": 0.003,
+      "loss": 4.0051,
+      "step": 18716
+    },
+    {
+      "epoch": 0.18717,
+      "grad_norm": 1.458252119498716,
+      "learning_rate": 0.003,
+      "loss": 3.9814,
+      "step": 18717
+    },
+    {
+      "epoch": 0.18718,
+      "grad_norm": 1.0607680755776179,
+      "learning_rate": 0.003,
+      "loss": 3.9928,
+      "step": 18718
+    },
+    {
+      "epoch": 0.18719,
+      "grad_norm": 1.3862670015696035,
+      "learning_rate": 0.003,
+      "loss": 4.0294,
+      "step": 18719
+    },
+    {
+      "epoch": 0.1872,
+      "grad_norm": 1.2237777419380014,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 18720
+    },
+    {
+      "epoch": 0.18721,
+      "grad_norm": 1.376918904288084,
+      "learning_rate": 0.003,
+      "loss": 4.0116,
+      "step": 18721
+    },
+    {
+      "epoch": 0.18722,
+      "grad_norm": 1.170486727504894,
+      "learning_rate": 0.003,
+      "loss": 4.0049,
+      "step": 18722
+    },
+    {
+      "epoch": 0.18723,
+      "grad_norm": 1.3146797562996735,
+      "learning_rate": 0.003,
+      "loss": 4.0368,
+      "step": 18723
+    },
+    {
+      "epoch": 0.18724,
+      "grad_norm": 1.3750013180924125,
+      "learning_rate": 0.003,
+      "loss": 4.0084,
+      "step": 18724
+    },
+    {
+      "epoch": 0.18725,
+      "grad_norm": 1.1278171591869586,
+      "learning_rate": 0.003,
+      "loss": 3.9995,
+      "step": 18725
+    },
+    {
+      "epoch": 0.18726,
+      "grad_norm": 1.3376324892975,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 18726
+    },
+    {
+      "epoch": 0.18727,
+      "grad_norm": 1.057740063162026,
+      "learning_rate": 0.003,
+      "loss": 3.9824,
+      "step": 18727
+    },
+    {
+      "epoch": 0.18728,
+      "grad_norm": 1.3043756435412168,
+      "learning_rate": 0.003,
+      "loss": 4.0163,
+      "step": 18728
+    },
+    {
+      "epoch": 0.18729,
+      "grad_norm": 1.0378976127669879,
+      "learning_rate": 0.003,
+      "loss": 4.0452,
+      "step": 18729
+    },
+    {
+      "epoch": 0.1873,
+      "grad_norm": 1.7193087087203027,
+      "learning_rate": 0.003,
+      "loss": 4.0118,
+      "step": 18730
+    },
+    {
+      "epoch": 0.18731,
+      "grad_norm": 1.116419553335292,
+      "learning_rate": 0.003,
+      "loss": 4.0075,
+      "step": 18731
+    },
+    {
+      "epoch": 0.18732,
+      "grad_norm": 1.3772300591436983,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 18732
+    },
+    {
+      "epoch": 0.18733,
+      "grad_norm": 1.4544935001798796,
+      "learning_rate": 0.003,
+      "loss": 4.012,
+      "step": 18733
+    },
+    {
+      "epoch": 0.18734,
+      "grad_norm": 1.3162098252868233,
+      "learning_rate": 0.003,
+      "loss": 4.0112,
+      "step": 18734
+    },
+    {
+      "epoch": 0.18735,
+      "grad_norm": 1.1633225815507455,
+      "learning_rate": 0.003,
+      "loss": 4.0239,
+      "step": 18735
+    },
+    {
+      "epoch": 0.18736,
+      "grad_norm": 1.5858181808219405,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 18736
+    },
+    {
+      "epoch": 0.18737,
+      "grad_norm": 1.149873887174443,
+      "learning_rate": 0.003,
+      "loss": 4.0345,
+      "step": 18737
+    },
+    {
+      "epoch": 0.18738,
+      "grad_norm": 1.358716025247225,
+      "learning_rate": 0.003,
+      "loss": 3.9983,
+      "step": 18738
+    },
+    {
+      "epoch": 0.18739,
+      "grad_norm": 1.091467221483659,
+      "learning_rate": 0.003,
+      "loss": 4.0278,
+      "step": 18739
+    },
+    {
+      "epoch": 0.1874,
+      "grad_norm": 1.4703193300528377,
+      "learning_rate": 0.003,
+      "loss": 3.9766,
+      "step": 18740
+    },
+    {
+      "epoch": 0.18741,
+      "grad_norm": 1.098064255985667,
+      "learning_rate": 0.003,
+      "loss": 4.0086,
+      "step": 18741
+    },
+    {
+      "epoch": 0.18742,
+      "grad_norm": 1.409983007857012,
+      "learning_rate": 0.003,
+      "loss": 4.006,
+      "step": 18742
+    },
+    {
+      "epoch": 0.18743,
+      "grad_norm": 1.2106177297652705,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 18743
+    },
+    {
+      "epoch": 0.18744,
+      "grad_norm": 1.2815108498530823,
+      "learning_rate": 0.003,
+      "loss": 3.9964,
+      "step": 18744
+    },
+    {
+      "epoch": 0.18745,
+      "grad_norm": 1.1932920186130076,
+      "learning_rate": 0.003,
+      "loss": 3.9933,
+      "step": 18745
+    },
+    {
+      "epoch": 0.18746,
+      "grad_norm": 1.2752691820517976,
+      "learning_rate": 0.003,
+      "loss": 3.9871,
+      "step": 18746
+    },
+    {
+      "epoch": 0.18747,
+      "grad_norm": 1.417029736211795,
+      "learning_rate": 0.003,
+      "loss": 4.04,
+      "step": 18747
+    },
+    {
+      "epoch": 0.18748,
+      "grad_norm": 1.1874280280107192,
+      "learning_rate": 0.003,
+      "loss": 4.0299,
+      "step": 18748
+    },
+    {
+      "epoch": 0.18749,
+      "grad_norm": 1.4466801190568057,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 18749
+    },
+    {
+      "epoch": 0.1875,
+      "grad_norm": 1.128243469158432,
+      "learning_rate": 0.003,
+      "loss": 4.0297,
+      "step": 18750
+    },
+    {
+      "epoch": 0.18751,
+      "grad_norm": 1.3755903593701546,
+      "learning_rate": 0.003,
+      "loss": 4.007,
+      "step": 18751
+    },
+    {
+      "epoch": 0.18752,
+      "grad_norm": 1.286503093271871,
+      "learning_rate": 0.003,
+      "loss": 4.007,
+      "step": 18752
+    },
+    {
+      "epoch": 0.18753,
+      "grad_norm": 1.2690163012949174,
+      "learning_rate": 0.003,
+      "loss": 4.0288,
+      "step": 18753
+    },
+    {
+      "epoch": 0.18754,
+      "grad_norm": 1.3603536497722082,
+      "learning_rate": 0.003,
+      "loss": 4.0279,
+      "step": 18754
+    },
+    {
+      "epoch": 0.18755,
+      "grad_norm": 1.4681285132760695,
+      "learning_rate": 0.003,
+      "loss": 3.992,
+      "step": 18755
+    },
+    {
+      "epoch": 0.18756,
+      "grad_norm": 1.1572490629926064,
+      "learning_rate": 0.003,
+      "loss": 4.0169,
+      "step": 18756
+    },
+    {
+      "epoch": 0.18757,
+      "grad_norm": 1.2509963895410787,
+      "learning_rate": 0.003,
+      "loss": 4.007,
+      "step": 18757
+    },
+    {
+      "epoch": 0.18758,
+      "grad_norm": 1.0698688906875218,
+      "learning_rate": 0.003,
+      "loss": 3.9964,
+      "step": 18758
+    },
+    {
+      "epoch": 0.18759,
+      "grad_norm": 1.2782355106290009,
+      "learning_rate": 0.003,
+      "loss": 4.0377,
+      "step": 18759
+    },
+    {
+      "epoch": 0.1876,
+      "grad_norm": 1.0630946077760033,
+      "learning_rate": 0.003,
+      "loss": 3.9843,
+      "step": 18760
+    },
+    {
+      "epoch": 0.18761,
+      "grad_norm": 1.5442058962361622,
+      "learning_rate": 0.003,
+      "loss": 4.0098,
+      "step": 18761
+    },
+    {
+      "epoch": 0.18762,
+      "grad_norm": 1.1588513781519307,
+      "learning_rate": 0.003,
+      "loss": 4.0287,
+      "step": 18762
+    },
+    {
+      "epoch": 0.18763,
+      "grad_norm": 1.39154666361133,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 18763
+    },
+    {
+      "epoch": 0.18764,
+      "grad_norm": 1.2438414106731435,
+      "learning_rate": 0.003,
+      "loss": 4.0374,
+      "step": 18764
+    },
+    {
+      "epoch": 0.18765,
+      "grad_norm": 1.233326266286673,
+      "learning_rate": 0.003,
+      "loss": 3.9862,
+      "step": 18765
+    },
+    {
+      "epoch": 0.18766,
+      "grad_norm": 1.5246131084850232,
+      "learning_rate": 0.003,
+      "loss": 4.0207,
+      "step": 18766
+    },
+    {
+      "epoch": 0.18767,
+      "grad_norm": 1.1164940103492973,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 18767
+    },
+    {
+      "epoch": 0.18768,
+      "grad_norm": 1.2732871741442202,
+      "learning_rate": 0.003,
+      "loss": 4.0043,
+      "step": 18768
+    },
+    {
+      "epoch": 0.18769,
+      "grad_norm": 1.3576031831596136,
+      "learning_rate": 0.003,
+      "loss": 4.0184,
+      "step": 18769
+    },
+    {
+      "epoch": 0.1877,
+      "grad_norm": 1.4855913600576,
+      "learning_rate": 0.003,
+      "loss": 4.0329,
+      "step": 18770
+    },
+    {
+      "epoch": 0.18771,
+      "grad_norm": 1.0985197522285455,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 18771
+    },
+    {
+      "epoch": 0.18772,
+      "grad_norm": 1.292770457214615,
+      "learning_rate": 0.003,
+      "loss": 4.0323,
+      "step": 18772
+    },
+    {
+      "epoch": 0.18773,
+      "grad_norm": 1.1431768628823065,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 18773
+    },
+    {
+      "epoch": 0.18774,
+      "grad_norm": 1.3401269635220132,
+      "learning_rate": 0.003,
+      "loss": 3.9932,
+      "step": 18774
+    },
+    {
+      "epoch": 0.18775,
+      "grad_norm": 1.4440907767201874,
+      "learning_rate": 0.003,
+      "loss": 4.0153,
+      "step": 18775
+    },
+    {
+      "epoch": 0.18776,
+      "grad_norm": 1.4016436996853219,
+      "learning_rate": 0.003,
+      "loss": 4.0211,
+      "step": 18776
+    },
+    {
+      "epoch": 0.18777,
+      "grad_norm": 1.1107532056560023,
+      "learning_rate": 0.003,
+      "loss": 4.0028,
+      "step": 18777
+    },
+    {
+      "epoch": 0.18778,
+      "grad_norm": 1.4163198512437702,
+      "learning_rate": 0.003,
+      "loss": 4.0267,
+      "step": 18778
+    },
+    {
+      "epoch": 0.18779,
+      "grad_norm": 1.1721215333974473,
+      "learning_rate": 0.003,
+      "loss": 3.9831,
+      "step": 18779
+    },
+    {
+      "epoch": 0.1878,
+      "grad_norm": 1.278257339205773,
+      "learning_rate": 0.003,
+      "loss": 3.9896,
+      "step": 18780
+    },
+    {
+      "epoch": 0.18781,
+      "grad_norm": 1.3053228746542866,
+      "learning_rate": 0.003,
+      "loss": 4.0009,
+      "step": 18781
+    },
+    {
+      "epoch": 0.18782,
+      "grad_norm": 1.1375049537145128,
+      "learning_rate": 0.003,
+      "loss": 3.9648,
+      "step": 18782
+    },
+    {
+      "epoch": 0.18783,
+      "grad_norm": 1.2513112953606718,
+      "learning_rate": 0.003,
+      "loss": 4.0002,
+      "step": 18783
+    },
+    {
+      "epoch": 0.18784,
+      "grad_norm": 1.1693247682321866,
+      "learning_rate": 0.003,
+      "loss": 4.0075,
+      "step": 18784
+    },
+    {
+      "epoch": 0.18785,
+      "grad_norm": 1.3205673873811723,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 18785
+    },
+    {
+      "epoch": 0.18786,
+      "grad_norm": 1.1292968914157666,
+      "learning_rate": 0.003,
+      "loss": 4.0043,
+      "step": 18786
+    },
+    {
+      "epoch": 0.18787,
+      "grad_norm": 1.4473043584109355,
+      "learning_rate": 0.003,
+      "loss": 4.0068,
+      "step": 18787
+    },
+    {
+      "epoch": 0.18788,
+      "grad_norm": 1.0521059265770933,
+      "learning_rate": 0.003,
+      "loss": 4.008,
+      "step": 18788
+    },
+    {
+      "epoch": 0.18789,
+      "grad_norm": 1.4731019996158254,
+      "learning_rate": 0.003,
+      "loss": 4.0196,
+      "step": 18789
+    },
+    {
+      "epoch": 0.1879,
+      "grad_norm": 1.1114846782522791,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 18790
+    },
+    {
+      "epoch": 0.18791,
+      "grad_norm": 1.3651276065078815,
+      "learning_rate": 0.003,
+      "loss": 3.9865,
+      "step": 18791
+    },
+    {
+      "epoch": 0.18792,
+      "grad_norm": 1.093525883067943,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 18792
+    },
+    {
+      "epoch": 0.18793,
+      "grad_norm": 1.2688393406196965,
+      "learning_rate": 0.003,
+      "loss": 4.0342,
+      "step": 18793
+    },
+    {
+      "epoch": 0.18794,
+      "grad_norm": 1.3340696193587918,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 18794
+    },
+    {
+      "epoch": 0.18795,
+      "grad_norm": 1.3630128950756113,
+      "learning_rate": 0.003,
+      "loss": 4.0236,
+      "step": 18795
+    },
+    {
+      "epoch": 0.18796,
+      "grad_norm": 1.212412852205088,
+      "learning_rate": 0.003,
+      "loss": 4.0269,
+      "step": 18796
+    },
+    {
+      "epoch": 0.18797,
+      "grad_norm": 1.4512811090790925,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 18797
+    },
+    {
+      "epoch": 0.18798,
+      "grad_norm": 1.0094139765105765,
+      "learning_rate": 0.003,
+      "loss": 4.0079,
+      "step": 18798
+    },
+    {
+      "epoch": 0.18799,
+      "grad_norm": 1.6586776547444237,
+      "learning_rate": 0.003,
+      "loss": 4.0198,
+      "step": 18799
+    },
+    {
+      "epoch": 0.188,
+      "grad_norm": 1.0297074310474934,
+      "learning_rate": 0.003,
+      "loss": 4.0115,
+      "step": 18800
+    },
+    {
+      "epoch": 0.18801,
+      "grad_norm": 1.734932585171164,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 18801
+    },
+    {
+      "epoch": 0.18802,
+      "grad_norm": 1.0348704782328961,
+      "learning_rate": 0.003,
+      "loss": 4.0056,
+      "step": 18802
+    },
+    {
+      "epoch": 0.18803,
+      "grad_norm": 1.1504415390030405,
+      "learning_rate": 0.003,
+      "loss": 4.012,
+      "step": 18803
+    },
+    {
+      "epoch": 0.18804,
+      "grad_norm": 1.389416151087945,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 18804
+    },
+    {
+      "epoch": 0.18805,
+      "grad_norm": 1.3290382566790426,
+      "learning_rate": 0.003,
+      "loss": 4.0442,
+      "step": 18805
+    },
+    {
+      "epoch": 0.18806,
+      "grad_norm": 1.3859814992056898,
+      "learning_rate": 0.003,
+      "loss": 4.0291,
+      "step": 18806
+    },
+    {
+      "epoch": 0.18807,
+      "grad_norm": 1.3702533713002063,
+      "learning_rate": 0.003,
+      "loss": 3.9935,
+      "step": 18807
+    },
+    {
+      "epoch": 0.18808,
+      "grad_norm": 1.2715168233177794,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 18808
+    },
+    {
+      "epoch": 0.18809,
+      "grad_norm": 1.2704671484468457,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 18809
+    },
+    {
+      "epoch": 0.1881,
+      "grad_norm": 1.3509670439146595,
+      "learning_rate": 0.003,
+      "loss": 3.9942,
+      "step": 18810
+    },
+    {
+      "epoch": 0.18811,
+      "grad_norm": 1.2373546477645962,
+      "learning_rate": 0.003,
+      "loss": 4.0228,
+      "step": 18811
+    },
+    {
+      "epoch": 0.18812,
+      "grad_norm": 1.4410704719575569,
+      "learning_rate": 0.003,
+      "loss": 3.987,
+      "step": 18812
+    },
+    {
+      "epoch": 0.18813,
+      "grad_norm": 1.229364311713738,
+      "learning_rate": 0.003,
+      "loss": 3.9927,
+      "step": 18813
+    },
+    {
+      "epoch": 0.18814,
+      "grad_norm": 1.4114113896071405,
+      "learning_rate": 0.003,
+      "loss": 4.002,
+      "step": 18814
+    },
+    {
+      "epoch": 0.18815,
+      "grad_norm": 1.3822494345951508,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 18815
+    },
+    {
+      "epoch": 0.18816,
+      "grad_norm": 1.3748248890074133,
+      "learning_rate": 0.003,
+      "loss": 3.9876,
+      "step": 18816
+    },
+    {
+      "epoch": 0.18817,
+      "grad_norm": 1.1973746701862775,
+      "learning_rate": 0.003,
+      "loss": 4.047,
+      "step": 18817
+    },
+    {
+      "epoch": 0.18818,
+      "grad_norm": 1.362257965096609,
+      "learning_rate": 0.003,
+      "loss": 4.0171,
+      "step": 18818
+    },
+    {
+      "epoch": 0.18819,
+      "grad_norm": 1.0156086218524036,
+      "learning_rate": 0.003,
+      "loss": 4.0157,
+      "step": 18819
+    },
+    {
+      "epoch": 0.1882,
+      "grad_norm": 1.2566168851036394,
+      "learning_rate": 0.003,
+      "loss": 3.999,
+      "step": 18820
+    },
+    {
+      "epoch": 0.18821,
+      "grad_norm": 1.5324163598981786,
+      "learning_rate": 0.003,
+      "loss": 4.0308,
+      "step": 18821
+    },
+    {
+      "epoch": 0.18822,
+      "grad_norm": 1.2512580689858643,
+      "learning_rate": 0.003,
+      "loss": 4.0171,
+      "step": 18822
+    },
+    {
+      "epoch": 0.18823,
+      "grad_norm": 1.2935741105961847,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 18823
+    },
+    {
+      "epoch": 0.18824,
+      "grad_norm": 1.1644334936516052,
+      "learning_rate": 0.003,
+      "loss": 3.9835,
+      "step": 18824
+    },
+    {
+      "epoch": 0.18825,
+      "grad_norm": 1.3761851337505675,
+      "learning_rate": 0.003,
+      "loss": 3.9997,
+      "step": 18825
+    },
+    {
+      "epoch": 0.18826,
+      "grad_norm": 1.0995130171261787,
+      "learning_rate": 0.003,
+      "loss": 4.003,
+      "step": 18826
+    },
+    {
+      "epoch": 0.18827,
+      "grad_norm": 1.443020168232223,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 18827
+    },
+    {
+      "epoch": 0.18828,
+      "grad_norm": 1.1282469620944757,
+      "learning_rate": 0.003,
+      "loss": 4.0,
+      "step": 18828
+    },
+    {
+      "epoch": 0.18829,
+      "grad_norm": 1.323305598272999,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 18829
+    },
+    {
+      "epoch": 0.1883,
+      "grad_norm": 1.1828572268105,
+      "learning_rate": 0.003,
+      "loss": 4.0086,
+      "step": 18830
+    },
+    {
+      "epoch": 0.18831,
+      "grad_norm": 1.4276740531309458,
+      "learning_rate": 0.003,
+      "loss": 3.9909,
+      "step": 18831
+    },
+    {
+      "epoch": 0.18832,
+      "grad_norm": 1.184544184111074,
+      "learning_rate": 0.003,
+      "loss": 4.026,
+      "step": 18832
+    },
+    {
+      "epoch": 0.18833,
+      "grad_norm": 1.4822322926422464,
+      "learning_rate": 0.003,
+      "loss": 3.9884,
+      "step": 18833
+    },
+    {
+      "epoch": 0.18834,
+      "grad_norm": 1.3143441193176422,
+      "learning_rate": 0.003,
+      "loss": 4.0308,
+      "step": 18834
+    },
+    {
+      "epoch": 0.18835,
+      "grad_norm": 1.254993659017894,
+      "learning_rate": 0.003,
+      "loss": 3.9978,
+      "step": 18835
+    },
+    {
+      "epoch": 0.18836,
+      "grad_norm": 1.1184698641428459,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 18836
+    },
+    {
+      "epoch": 0.18837,
+      "grad_norm": 1.2404210698345246,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 18837
+    },
+    {
+      "epoch": 0.18838,
+      "grad_norm": 1.1070446605113913,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 18838
+    },
+    {
+      "epoch": 0.18839,
+      "grad_norm": 1.4704983128492852,
+      "learning_rate": 0.003,
+      "loss": 4.0041,
+      "step": 18839
+    },
+    {
+      "epoch": 0.1884,
+      "grad_norm": 1.3008673418840326,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 18840
+    },
+    {
+      "epoch": 0.18841,
+      "grad_norm": 1.2595207232349506,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 18841
+    },
+    {
+      "epoch": 0.18842,
+      "grad_norm": 1.342808067808002,
+      "learning_rate": 0.003,
+      "loss": 4.0177,
+      "step": 18842
+    },
+    {
+      "epoch": 0.18843,
+      "grad_norm": 0.9922458732256416,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 18843
+    },
+    {
+      "epoch": 0.18844,
+      "grad_norm": 1.445638244499319,
+      "learning_rate": 0.003,
+      "loss": 4.0362,
+      "step": 18844
+    },
+    {
+      "epoch": 0.18845,
+      "grad_norm": 0.9789811370228675,
+      "learning_rate": 0.003,
+      "loss": 3.9973,
+      "step": 18845
+    },
+    {
+      "epoch": 0.18846,
+      "grad_norm": 1.5034543902524964,
+      "learning_rate": 0.003,
+      "loss": 4.0022,
+      "step": 18846
+    },
+    {
+      "epoch": 0.18847,
+      "grad_norm": 0.927876524239201,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 18847
+    },
+    {
+      "epoch": 0.18848,
+      "grad_norm": 1.3296711416190021,
+      "learning_rate": 0.003,
+      "loss": 3.9965,
+      "step": 18848
+    },
+    {
+      "epoch": 0.18849,
+      "grad_norm": 1.4641733211136532,
+      "learning_rate": 0.003,
+      "loss": 4.0316,
+      "step": 18849
+    },
+    {
+      "epoch": 0.1885,
+      "grad_norm": 1.2706728987678715,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 18850
+    },
+    {
+      "epoch": 0.18851,
+      "grad_norm": 1.37942646887191,
+      "learning_rate": 0.003,
+      "loss": 4.016,
+      "step": 18851
+    },
+    {
+      "epoch": 0.18852,
+      "grad_norm": 1.4416990193136976,
+      "learning_rate": 0.003,
+      "loss": 4.0472,
+      "step": 18852
+    },
+    {
+      "epoch": 0.18853,
+      "grad_norm": 1.2494013061938976,
+      "learning_rate": 0.003,
+      "loss": 4.0188,
+      "step": 18853
+    },
+    {
+      "epoch": 0.18854,
+      "grad_norm": 1.4870346591138417,
+      "learning_rate": 0.003,
+      "loss": 4.0311,
+      "step": 18854
+    },
+    {
+      "epoch": 0.18855,
+      "grad_norm": 1.2022339227850636,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 18855
+    },
+    {
+      "epoch": 0.18856,
+      "grad_norm": 1.3724267696002113,
+      "learning_rate": 0.003,
+      "loss": 3.9824,
+      "step": 18856
+    },
+    {
+      "epoch": 0.18857,
+      "grad_norm": 1.1434147235476109,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 18857
+    },
+    {
+      "epoch": 0.18858,
+      "grad_norm": 1.3253368873665614,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 18858
+    },
+    {
+      "epoch": 0.18859,
+      "grad_norm": 1.3516363221601224,
+      "learning_rate": 0.003,
+      "loss": 4.0088,
+      "step": 18859
+    },
+    {
+      "epoch": 0.1886,
+      "grad_norm": 1.21235742171777,
+      "learning_rate": 0.003,
+      "loss": 4.0313,
+      "step": 18860
+    },
+    {
+      "epoch": 0.18861,
+      "grad_norm": 1.3597598477739947,
+      "learning_rate": 0.003,
+      "loss": 4.0272,
+      "step": 18861
+    },
+    {
+      "epoch": 0.18862,
+      "grad_norm": 1.2549641906031592,
+      "learning_rate": 0.003,
+      "loss": 4.0255,
+      "step": 18862
+    },
+    {
+      "epoch": 0.18863,
+      "grad_norm": 1.429341267930262,
+      "learning_rate": 0.003,
+      "loss": 3.9903,
+      "step": 18863
+    },
+    {
+      "epoch": 0.18864,
+      "grad_norm": 1.0721288573102947,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 18864
+    },
+    {
+      "epoch": 0.18865,
+      "grad_norm": 1.4264240127264443,
+      "learning_rate": 0.003,
+      "loss": 4.0173,
+      "step": 18865
+    },
+    {
+      "epoch": 0.18866,
+      "grad_norm": 1.1983018274161035,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 18866
+    },
+    {
+      "epoch": 0.18867,
+      "grad_norm": 1.350228467376567,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 18867
+    },
+    {
+      "epoch": 0.18868,
+      "grad_norm": 1.2367787437442568,
+      "learning_rate": 0.003,
+      "loss": 4.0386,
+      "step": 18868
+    },
+    {
+      "epoch": 0.18869,
+      "grad_norm": 1.4888523853636724,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 18869
+    },
+    {
+      "epoch": 0.1887,
+      "grad_norm": 1.1358909114521822,
+      "learning_rate": 0.003,
+      "loss": 4.001,
+      "step": 18870
+    },
+    {
+      "epoch": 0.18871,
+      "grad_norm": 1.2907629612968026,
+      "learning_rate": 0.003,
+      "loss": 4.0318,
+      "step": 18871
+    },
+    {
+      "epoch": 0.18872,
+      "grad_norm": 1.1005953395431796,
+      "learning_rate": 0.003,
+      "loss": 4.0077,
+      "step": 18872
+    },
+    {
+      "epoch": 0.18873,
+      "grad_norm": 1.6345853186178532,
+      "learning_rate": 0.003,
+      "loss": 4.0024,
+      "step": 18873
+    },
+    {
+      "epoch": 0.18874,
+      "grad_norm": 1.1746353686890867,
+      "learning_rate": 0.003,
+      "loss": 4.0212,
+      "step": 18874
+    },
+    {
+      "epoch": 0.18875,
+      "grad_norm": 1.2573364312123834,
+      "learning_rate": 0.003,
+      "loss": 4.0494,
+      "step": 18875
+    },
+    {
+      "epoch": 0.18876,
+      "grad_norm": 1.364399823656439,
+      "learning_rate": 0.003,
+      "loss": 4.0119,
+      "step": 18876
+    },
+    {
+      "epoch": 0.18877,
+      "grad_norm": 1.211411391260069,
+      "learning_rate": 0.003,
+      "loss": 4.0153,
+      "step": 18877
+    },
+    {
+      "epoch": 0.18878,
+      "grad_norm": 1.3657723068251055,
+      "learning_rate": 0.003,
+      "loss": 3.989,
+      "step": 18878
+    },
+    {
+      "epoch": 0.18879,
+      "grad_norm": 1.2149671592494593,
+      "learning_rate": 0.003,
+      "loss": 4.0126,
+      "step": 18879
+    },
+    {
+      "epoch": 0.1888,
+      "grad_norm": 1.3051352574687405,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 18880
+    },
+    {
+      "epoch": 0.18881,
+      "grad_norm": 1.1329075107385167,
+      "learning_rate": 0.003,
+      "loss": 3.9913,
+      "step": 18881
+    },
+    {
+      "epoch": 0.18882,
+      "grad_norm": 1.304321722250987,
+      "learning_rate": 0.003,
+      "loss": 4.036,
+      "step": 18882
+    },
+    {
+      "epoch": 0.18883,
+      "grad_norm": 1.2529829747626335,
+      "learning_rate": 0.003,
+      "loss": 3.9943,
+      "step": 18883
+    },
+    {
+      "epoch": 0.18884,
+      "grad_norm": 1.201355611042984,
+      "learning_rate": 0.003,
+      "loss": 4.0182,
+      "step": 18884
+    },
+    {
+      "epoch": 0.18885,
+      "grad_norm": 1.3400792509794779,
+      "learning_rate": 0.003,
+      "loss": 3.9838,
+      "step": 18885
+    },
+    {
+      "epoch": 0.18886,
+      "grad_norm": 1.3073238246591101,
+      "learning_rate": 0.003,
+      "loss": 4.0065,
+      "step": 18886
+    },
+    {
+      "epoch": 0.18887,
+      "grad_norm": 1.1548865607783483,
+      "learning_rate": 0.003,
+      "loss": 4.0149,
+      "step": 18887
+    },
+    {
+      "epoch": 0.18888,
+      "grad_norm": 1.2996683356200782,
+      "learning_rate": 0.003,
+      "loss": 4.0272,
+      "step": 18888
+    },
+    {
+      "epoch": 0.18889,
+      "grad_norm": 1.2910113853304779,
+      "learning_rate": 0.003,
+      "loss": 3.9857,
+      "step": 18889
+    },
+    {
+      "epoch": 0.1889,
+      "grad_norm": 1.392740912920975,
+      "learning_rate": 0.003,
+      "loss": 4.0233,
+      "step": 18890
+    },
+    {
+      "epoch": 0.18891,
+      "grad_norm": 1.1319046905564893,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 18891
+    },
+    {
+      "epoch": 0.18892,
+      "grad_norm": 1.297455272784623,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 18892
+    },
+    {
+      "epoch": 0.18893,
+      "grad_norm": 1.1822205544925295,
+      "learning_rate": 0.003,
+      "loss": 3.9852,
+      "step": 18893
+    },
+    {
+      "epoch": 0.18894,
+      "grad_norm": 1.3799401432733638,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 18894
+    },
+    {
+      "epoch": 0.18895,
+      "grad_norm": 1.3113136082152823,
+      "learning_rate": 0.003,
+      "loss": 3.9927,
+      "step": 18895
+    },
+    {
+      "epoch": 0.18896,
+      "grad_norm": 1.114155835934784,
+      "learning_rate": 0.003,
+      "loss": 3.9893,
+      "step": 18896
+    },
+    {
+      "epoch": 0.18897,
+      "grad_norm": 1.3355867118053277,
+      "learning_rate": 0.003,
+      "loss": 3.9939,
+      "step": 18897
+    },
+    {
+      "epoch": 0.18898,
+      "grad_norm": 1.2254191046973921,
+      "learning_rate": 0.003,
+      "loss": 4.0295,
+      "step": 18898
+    },
+    {
+      "epoch": 0.18899,
+      "grad_norm": 1.5779703946160442,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 18899
+    },
+    {
+      "epoch": 0.189,
+      "grad_norm": 1.2173062699404291,
+      "learning_rate": 0.003,
+      "loss": 3.9892,
+      "step": 18900
+    },
+    {
+      "epoch": 0.18901,
+      "grad_norm": 1.2936130368839325,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 18901
+    },
+    {
+      "epoch": 0.18902,
+      "grad_norm": 1.2887485223488224,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 18902
+    },
+    {
+      "epoch": 0.18903,
+      "grad_norm": 1.2893185921850154,
+      "learning_rate": 0.003,
+      "loss": 4.0213,
+      "step": 18903
+    },
+    {
+      "epoch": 0.18904,
+      "grad_norm": 1.3204077224346007,
+      "learning_rate": 0.003,
+      "loss": 4.036,
+      "step": 18904
+    },
+    {
+      "epoch": 0.18905,
+      "grad_norm": 1.376599778433685,
+      "learning_rate": 0.003,
+      "loss": 4.02,
+      "step": 18905
+    },
+    {
+      "epoch": 0.18906,
+      "grad_norm": 1.1842652767120574,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 18906
+    },
+    {
+      "epoch": 0.18907,
+      "grad_norm": 1.8752157539520744,
+      "learning_rate": 0.003,
+      "loss": 4.0088,
+      "step": 18907
+    },
+    {
+      "epoch": 0.18908,
+      "grad_norm": 1.0844141954120532,
+      "learning_rate": 0.003,
+      "loss": 3.9992,
+      "step": 18908
+    },
+    {
+      "epoch": 0.18909,
+      "grad_norm": 1.4494358850508267,
+      "learning_rate": 0.003,
+      "loss": 4.0259,
+      "step": 18909
+    },
+    {
+      "epoch": 0.1891,
+      "grad_norm": 1.3815892662527047,
+      "learning_rate": 0.003,
+      "loss": 4.0397,
+      "step": 18910
+    },
+    {
+      "epoch": 0.18911,
+      "grad_norm": 1.22776110818462,
+      "learning_rate": 0.003,
+      "loss": 4.046,
+      "step": 18911
+    },
+    {
+      "epoch": 0.18912,
+      "grad_norm": 1.2546354098864025,
+      "learning_rate": 0.003,
+      "loss": 4.0233,
+      "step": 18912
+    },
+    {
+      "epoch": 0.18913,
+      "grad_norm": 1.4322587831332994,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 18913
+    },
+    {
+      "epoch": 0.18914,
+      "grad_norm": 1.4357672228376845,
+      "learning_rate": 0.003,
+      "loss": 4.028,
+      "step": 18914
+    },
+    {
+      "epoch": 0.18915,
+      "grad_norm": 1.381660269032073,
+      "learning_rate": 0.003,
+      "loss": 3.996,
+      "step": 18915
+    },
+    {
+      "epoch": 0.18916,
+      "grad_norm": 1.2120151654235733,
+      "learning_rate": 0.003,
+      "loss": 4.019,
+      "step": 18916
+    },
+    {
+      "epoch": 0.18917,
+      "grad_norm": 1.155511029221399,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 18917
+    },
+    {
+      "epoch": 0.18918,
+      "grad_norm": 1.3798380706777866,
+      "learning_rate": 0.003,
+      "loss": 3.996,
+      "step": 18918
+    },
+    {
+      "epoch": 0.18919,
+      "grad_norm": 1.2430163913873487,
+      "learning_rate": 0.003,
+      "loss": 4.035,
+      "step": 18919
+    },
+    {
+      "epoch": 0.1892,
+      "grad_norm": 1.2632121393382478,
+      "learning_rate": 0.003,
+      "loss": 4.0107,
+      "step": 18920
+    },
+    {
+      "epoch": 0.18921,
+      "grad_norm": 1.2240506042274173,
+      "learning_rate": 0.003,
+      "loss": 3.9994,
+      "step": 18921
+    },
+    {
+      "epoch": 0.18922,
+      "grad_norm": 1.5078155308240528,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 18922
+    },
+    {
+      "epoch": 0.18923,
+      "grad_norm": 1.0321468505232072,
+      "learning_rate": 0.003,
+      "loss": 3.9962,
+      "step": 18923
+    },
+    {
+      "epoch": 0.18924,
+      "grad_norm": 1.3759820406243037,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 18924
+    },
+    {
+      "epoch": 0.18925,
+      "grad_norm": 1.1578600968645194,
+      "learning_rate": 0.003,
+      "loss": 3.9948,
+      "step": 18925
+    },
+    {
+      "epoch": 0.18926,
+      "grad_norm": 1.4386004900604346,
+      "learning_rate": 0.003,
+      "loss": 4.0474,
+      "step": 18926
+    },
+    {
+      "epoch": 0.18927,
+      "grad_norm": 1.1975391679472591,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 18927
+    },
+    {
+      "epoch": 0.18928,
+      "grad_norm": 1.4233703755004665,
+      "learning_rate": 0.003,
+      "loss": 3.9861,
+      "step": 18928
+    },
+    {
+      "epoch": 0.18929,
+      "grad_norm": 1.3298361094853883,
+      "learning_rate": 0.003,
+      "loss": 4.0331,
+      "step": 18929
+    },
+    {
+      "epoch": 0.1893,
+      "grad_norm": 1.1668141803513414,
+      "learning_rate": 0.003,
+      "loss": 3.9884,
+      "step": 18930
+    },
+    {
+      "epoch": 0.18931,
+      "grad_norm": 1.1927574278879163,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 18931
+    },
+    {
+      "epoch": 0.18932,
+      "grad_norm": 1.2278395725435298,
+      "learning_rate": 0.003,
+      "loss": 4.0512,
+      "step": 18932
+    },
+    {
+      "epoch": 0.18933,
+      "grad_norm": 1.2719217916469485,
+      "learning_rate": 0.003,
+      "loss": 3.9914,
+      "step": 18933
+    },
+    {
+      "epoch": 0.18934,
+      "grad_norm": 1.3688014376645494,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 18934
+    },
+    {
+      "epoch": 0.18935,
+      "grad_norm": 1.2959450429209367,
+      "learning_rate": 0.003,
+      "loss": 3.9954,
+      "step": 18935
+    },
+    {
+      "epoch": 0.18936,
+      "grad_norm": 1.1930725451251745,
+      "learning_rate": 0.003,
+      "loss": 4.0323,
+      "step": 18936
+    },
+    {
+      "epoch": 0.18937,
+      "grad_norm": 1.2407030124021434,
+      "learning_rate": 0.003,
+      "loss": 3.9877,
+      "step": 18937
+    },
+    {
+      "epoch": 0.18938,
+      "grad_norm": 1.3330548919923813,
+      "learning_rate": 0.003,
+      "loss": 4.02,
+      "step": 18938
+    },
+    {
+      "epoch": 0.18939,
+      "grad_norm": 1.3082167721320919,
+      "learning_rate": 0.003,
+      "loss": 4.0112,
+      "step": 18939
+    },
+    {
+      "epoch": 0.1894,
+      "grad_norm": 1.4748190566899932,
+      "learning_rate": 0.003,
+      "loss": 4.0378,
+      "step": 18940
+    },
+    {
+      "epoch": 0.18941,
+      "grad_norm": 1.116748397977893,
+      "learning_rate": 0.003,
+      "loss": 3.9965,
+      "step": 18941
+    },
+    {
+      "epoch": 0.18942,
+      "grad_norm": 1.4291352947153417,
+      "learning_rate": 0.003,
+      "loss": 4.0263,
+      "step": 18942
+    },
+    {
+      "epoch": 0.18943,
+      "grad_norm": 1.1199615328961368,
+      "learning_rate": 0.003,
+      "loss": 3.9998,
+      "step": 18943
+    },
+    {
+      "epoch": 0.18944,
+      "grad_norm": 1.5294642175166582,
+      "learning_rate": 0.003,
+      "loss": 4.0091,
+      "step": 18944
+    },
+    {
+      "epoch": 0.18945,
+      "grad_norm": 1.0882092069192857,
+      "learning_rate": 0.003,
+      "loss": 3.9801,
+      "step": 18945
+    },
+    {
+      "epoch": 0.18946,
+      "grad_norm": 1.6066215519903786,
+      "learning_rate": 0.003,
+      "loss": 3.9966,
+      "step": 18946
+    },
+    {
+      "epoch": 0.18947,
+      "grad_norm": 1.2083091516202875,
+      "learning_rate": 0.003,
+      "loss": 4.001,
+      "step": 18947
+    },
+    {
+      "epoch": 0.18948,
+      "grad_norm": 1.1985481857292923,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 18948
+    },
+    {
+      "epoch": 0.18949,
+      "grad_norm": 1.2775459767805657,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 18949
+    },
+    {
+      "epoch": 0.1895,
+      "grad_norm": 1.298008618901343,
+      "learning_rate": 0.003,
+      "loss": 4.0364,
+      "step": 18950
+    },
+    {
+      "epoch": 0.18951,
+      "grad_norm": 1.3213237601028365,
+      "learning_rate": 0.003,
+      "loss": 4.0086,
+      "step": 18951
+    },
+    {
+      "epoch": 0.18952,
+      "grad_norm": 1.2355302765967384,
+      "learning_rate": 0.003,
+      "loss": 3.9891,
+      "step": 18952
+    },
+    {
+      "epoch": 0.18953,
+      "grad_norm": 1.298428704219246,
+      "learning_rate": 0.003,
+      "loss": 4.0098,
+      "step": 18953
+    },
+    {
+      "epoch": 0.18954,
+      "grad_norm": 1.2595444817645047,
+      "learning_rate": 0.003,
+      "loss": 4.0113,
+      "step": 18954
+    },
+    {
+      "epoch": 0.18955,
+      "grad_norm": 1.4295919433866582,
+      "learning_rate": 0.003,
+      "loss": 3.9913,
+      "step": 18955
+    },
+    {
+      "epoch": 0.18956,
+      "grad_norm": 1.050915980475837,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 18956
+    },
+    {
+      "epoch": 0.18957,
+      "grad_norm": 1.3569253053260681,
+      "learning_rate": 0.003,
+      "loss": 3.9877,
+      "step": 18957
+    },
+    {
+      "epoch": 0.18958,
+      "grad_norm": 1.323218007787156,
+      "learning_rate": 0.003,
+      "loss": 3.9902,
+      "step": 18958
+    },
+    {
+      "epoch": 0.18959,
+      "grad_norm": 1.1827405133434696,
+      "learning_rate": 0.003,
+      "loss": 3.9766,
+      "step": 18959
+    },
+    {
+      "epoch": 0.1896,
+      "grad_norm": 1.249099258846444,
+      "learning_rate": 0.003,
+      "loss": 4.0057,
+      "step": 18960
+    },
+    {
+      "epoch": 0.18961,
+      "grad_norm": 1.199208344986785,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 18961
+    },
+    {
+      "epoch": 0.18962,
+      "grad_norm": 1.2709152360429374,
+      "learning_rate": 0.003,
+      "loss": 4.0413,
+      "step": 18962
+    },
+    {
+      "epoch": 0.18963,
+      "grad_norm": 1.1441255867778828,
+      "learning_rate": 0.003,
+      "loss": 4.0186,
+      "step": 18963
+    },
+    {
+      "epoch": 0.18964,
+      "grad_norm": 1.2224256617719833,
+      "learning_rate": 0.003,
+      "loss": 4.0304,
+      "step": 18964
+    },
+    {
+      "epoch": 0.18965,
+      "grad_norm": 1.2316616781902274,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 18965
+    },
+    {
+      "epoch": 0.18966,
+      "grad_norm": 1.3247909543506073,
+      "learning_rate": 0.003,
+      "loss": 3.9941,
+      "step": 18966
+    },
+    {
+      "epoch": 0.18967,
+      "grad_norm": 1.0669828300995452,
+      "learning_rate": 0.003,
+      "loss": 4.0014,
+      "step": 18967
+    },
+    {
+      "epoch": 0.18968,
+      "grad_norm": 1.6222343013067921,
+      "learning_rate": 0.003,
+      "loss": 4.0435,
+      "step": 18968
+    },
+    {
+      "epoch": 0.18969,
+      "grad_norm": 1.179308640345885,
+      "learning_rate": 0.003,
+      "loss": 4.0209,
+      "step": 18969
+    },
+    {
+      "epoch": 0.1897,
+      "grad_norm": 1.3939614509485425,
+      "learning_rate": 0.003,
+      "loss": 3.9939,
+      "step": 18970
+    },
+    {
+      "epoch": 0.18971,
+      "grad_norm": 1.0954415561570288,
+      "learning_rate": 0.003,
+      "loss": 3.9895,
+      "step": 18971
+    },
+    {
+      "epoch": 0.18972,
+      "grad_norm": 1.3422469479213295,
+      "learning_rate": 0.003,
+      "loss": 4.0177,
+      "step": 18972
+    },
+    {
+      "epoch": 0.18973,
+      "grad_norm": 1.1209971142146853,
+      "learning_rate": 0.003,
+      "loss": 3.9845,
+      "step": 18973
+    },
+    {
+      "epoch": 0.18974,
+      "grad_norm": 1.216262616098274,
+      "learning_rate": 0.003,
+      "loss": 4.0278,
+      "step": 18974
+    },
+    {
+      "epoch": 0.18975,
+      "grad_norm": 1.2521601171024372,
+      "learning_rate": 0.003,
+      "loss": 4.005,
+      "step": 18975
+    },
+    {
+      "epoch": 0.18976,
+      "grad_norm": 1.0936838393827564,
+      "learning_rate": 0.003,
+      "loss": 4.0347,
+      "step": 18976
+    },
+    {
+      "epoch": 0.18977,
+      "grad_norm": 1.4222352337005453,
+      "learning_rate": 0.003,
+      "loss": 4.0406,
+      "step": 18977
+    },
+    {
+      "epoch": 0.18978,
+      "grad_norm": 1.1516237804968388,
+      "learning_rate": 0.003,
+      "loss": 4.0077,
+      "step": 18978
+    },
+    {
+      "epoch": 0.18979,
+      "grad_norm": 1.4556857373688754,
+      "learning_rate": 0.003,
+      "loss": 4.0074,
+      "step": 18979
+    },
+    {
+      "epoch": 0.1898,
+      "grad_norm": 1.0075365323494918,
+      "learning_rate": 0.003,
+      "loss": 3.9884,
+      "step": 18980
+    },
+    {
+      "epoch": 0.18981,
+      "grad_norm": 1.4602101366952436,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 18981
+    },
+    {
+      "epoch": 0.18982,
+      "grad_norm": 1.0574097890207579,
+      "learning_rate": 0.003,
+      "loss": 3.9989,
+      "step": 18982
+    },
+    {
+      "epoch": 0.18983,
+      "grad_norm": 1.7161545080248717,
+      "learning_rate": 0.003,
+      "loss": 4.0344,
+      "step": 18983
+    },
+    {
+      "epoch": 0.18984,
+      "grad_norm": 0.9621329220924494,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 18984
+    },
+    {
+      "epoch": 0.18985,
+      "grad_norm": 1.5805646455801179,
+      "learning_rate": 0.003,
+      "loss": 4.0234,
+      "step": 18985
+    },
+    {
+      "epoch": 0.18986,
+      "grad_norm": 1.2399912292567599,
+      "learning_rate": 0.003,
+      "loss": 3.9879,
+      "step": 18986
+    },
+    {
+      "epoch": 0.18987,
+      "grad_norm": 1.3783649312622117,
+      "learning_rate": 0.003,
+      "loss": 3.9889,
+      "step": 18987
+    },
+    {
+      "epoch": 0.18988,
+      "grad_norm": 1.498681352981351,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 18988
+    },
+    {
+      "epoch": 0.18989,
+      "grad_norm": 1.3807216236031543,
+      "learning_rate": 0.003,
+      "loss": 4.0213,
+      "step": 18989
+    },
+    {
+      "epoch": 0.1899,
+      "grad_norm": 1.375538414014236,
+      "learning_rate": 0.003,
+      "loss": 4.0009,
+      "step": 18990
+    },
+    {
+      "epoch": 0.18991,
+      "grad_norm": 1.2420049603098005,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 18991
+    },
+    {
+      "epoch": 0.18992,
+      "grad_norm": 1.2223551716105183,
+      "learning_rate": 0.003,
+      "loss": 4.0024,
+      "step": 18992
+    },
+    {
+      "epoch": 0.18993,
+      "grad_norm": 1.2570012470085465,
+      "learning_rate": 0.003,
+      "loss": 4.0332,
+      "step": 18993
+    },
+    {
+      "epoch": 0.18994,
+      "grad_norm": 1.4628515256064998,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 18994
+    },
+    {
+      "epoch": 0.18995,
+      "grad_norm": 0.9789874467067208,
+      "learning_rate": 0.003,
+      "loss": 3.9585,
+      "step": 18995
+    },
+    {
+      "epoch": 0.18996,
+      "grad_norm": 1.1670502812183778,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 18996
+    },
+    {
+      "epoch": 0.18997,
+      "grad_norm": 1.3794493322023509,
+      "learning_rate": 0.003,
+      "loss": 4.0281,
+      "step": 18997
+    },
+    {
+      "epoch": 0.18998,
+      "grad_norm": 1.2742959001740786,
+      "learning_rate": 0.003,
+      "loss": 4.026,
+      "step": 18998
+    },
+    {
+      "epoch": 0.18999,
+      "grad_norm": 1.149930266244774,
+      "learning_rate": 0.003,
+      "loss": 3.9909,
+      "step": 18999
+    },
+    {
+      "epoch": 0.19,
+      "grad_norm": 1.3926689602735243,
+      "learning_rate": 0.003,
+      "loss": 4.0376,
+      "step": 19000
+    },
+    {
+      "epoch": 0.19001,
+      "grad_norm": 1.3281844354153476,
+      "learning_rate": 0.003,
+      "loss": 4.0249,
+      "step": 19001
+    },
+    {
+      "epoch": 0.19002,
+      "grad_norm": 1.3399837271544746,
+      "learning_rate": 0.003,
+      "loss": 3.9955,
+      "step": 19002
+    },
+    {
+      "epoch": 0.19003,
+      "grad_norm": 1.151917376655648,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 19003
+    },
+    {
+      "epoch": 0.19004,
+      "grad_norm": 1.402780168163471,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 19004
+    },
+    {
+      "epoch": 0.19005,
+      "grad_norm": 1.1601986147353196,
+      "learning_rate": 0.003,
+      "loss": 4.0018,
+      "step": 19005
+    },
+    {
+      "epoch": 0.19006,
+      "grad_norm": 1.4341768719680854,
+      "learning_rate": 0.003,
+      "loss": 4.0194,
+      "step": 19006
+    },
+    {
+      "epoch": 0.19007,
+      "grad_norm": 1.2738095860949428,
+      "learning_rate": 0.003,
+      "loss": 4.0182,
+      "step": 19007
+    },
+    {
+      "epoch": 0.19008,
+      "grad_norm": 1.4703448321356738,
+      "learning_rate": 0.003,
+      "loss": 4.005,
+      "step": 19008
+    },
+    {
+      "epoch": 0.19009,
+      "grad_norm": 1.2759772286311846,
+      "learning_rate": 0.003,
+      "loss": 4.03,
+      "step": 19009
+    },
+    {
+      "epoch": 0.1901,
+      "grad_norm": 1.4164090562929703,
+      "learning_rate": 0.003,
+      "loss": 4.0033,
+      "step": 19010
+    },
+    {
+      "epoch": 0.19011,
+      "grad_norm": 1.2514843916867464,
+      "learning_rate": 0.003,
+      "loss": 4.0027,
+      "step": 19011
+    },
+    {
+      "epoch": 0.19012,
+      "grad_norm": 1.1829876887961355,
+      "learning_rate": 0.003,
+      "loss": 3.9918,
+      "step": 19012
+    },
+    {
+      "epoch": 0.19013,
+      "grad_norm": 1.365541613847154,
+      "learning_rate": 0.003,
+      "loss": 3.99,
+      "step": 19013
+    },
+    {
+      "epoch": 0.19014,
+      "grad_norm": 1.3682420465503087,
+      "learning_rate": 0.003,
+      "loss": 4.0069,
+      "step": 19014
+    },
+    {
+      "epoch": 0.19015,
+      "grad_norm": 1.2157757363541761,
+      "learning_rate": 0.003,
+      "loss": 3.9725,
+      "step": 19015
+    },
+    {
+      "epoch": 0.19016,
+      "grad_norm": 1.30835776770585,
+      "learning_rate": 0.003,
+      "loss": 3.9988,
+      "step": 19016
+    },
+    {
+      "epoch": 0.19017,
+      "grad_norm": 1.2049003231229503,
+      "learning_rate": 0.003,
+      "loss": 4.0291,
+      "step": 19017
+    },
+    {
+      "epoch": 0.19018,
+      "grad_norm": 1.1741936561386186,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 19018
+    },
+    {
+      "epoch": 0.19019,
+      "grad_norm": 1.0853739123496602,
+      "learning_rate": 0.003,
+      "loss": 3.9967,
+      "step": 19019
+    },
+    {
+      "epoch": 0.1902,
+      "grad_norm": 1.2708312439016338,
+      "learning_rate": 0.003,
+      "loss": 4.0157,
+      "step": 19020
+    },
+    {
+      "epoch": 0.19021,
+      "grad_norm": 1.278559727595023,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 19021
+    },
+    {
+      "epoch": 0.19022,
+      "grad_norm": 1.4935703188671352,
+      "learning_rate": 0.003,
+      "loss": 4.0374,
+      "step": 19022
+    },
+    {
+      "epoch": 0.19023,
+      "grad_norm": 1.3448299722365684,
+      "learning_rate": 0.003,
+      "loss": 4.0584,
+      "step": 19023
+    },
+    {
+      "epoch": 0.19024,
+      "grad_norm": 0.9982529244950282,
+      "learning_rate": 0.003,
+      "loss": 3.9957,
+      "step": 19024
+    },
+    {
+      "epoch": 0.19025,
+      "grad_norm": 1.1850540718595843,
+      "learning_rate": 0.003,
+      "loss": 4.0086,
+      "step": 19025
+    },
+    {
+      "epoch": 0.19026,
+      "grad_norm": 1.275321475108677,
+      "learning_rate": 0.003,
+      "loss": 4.0173,
+      "step": 19026
+    },
+    {
+      "epoch": 0.19027,
+      "grad_norm": 1.3034277428376424,
+      "learning_rate": 0.003,
+      "loss": 4.0316,
+      "step": 19027
+    },
+    {
+      "epoch": 0.19028,
+      "grad_norm": 1.3678353786160546,
+      "learning_rate": 0.003,
+      "loss": 3.9997,
+      "step": 19028
+    },
+    {
+      "epoch": 0.19029,
+      "grad_norm": 1.206949999170969,
+      "learning_rate": 0.003,
+      "loss": 3.9821,
+      "step": 19029
+    },
+    {
+      "epoch": 0.1903,
+      "grad_norm": 1.3360109073763418,
+      "learning_rate": 0.003,
+      "loss": 4.0174,
+      "step": 19030
+    },
+    {
+      "epoch": 0.19031,
+      "grad_norm": 1.1264515540885045,
+      "learning_rate": 0.003,
+      "loss": 4.002,
+      "step": 19031
+    },
+    {
+      "epoch": 0.19032,
+      "grad_norm": 1.5281619018753096,
+      "learning_rate": 0.003,
+      "loss": 4.0146,
+      "step": 19032
+    },
+    {
+      "epoch": 0.19033,
+      "grad_norm": 1.3687621256897895,
+      "learning_rate": 0.003,
+      "loss": 3.9959,
+      "step": 19033
+    },
+    {
+      "epoch": 0.19034,
+      "grad_norm": 1.373568472215279,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 19034
+    },
+    {
+      "epoch": 0.19035,
+      "grad_norm": 1.4109685472418252,
+      "learning_rate": 0.003,
+      "loss": 3.994,
+      "step": 19035
+    },
+    {
+      "epoch": 0.19036,
+      "grad_norm": 1.3856913191782105,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 19036
+    },
+    {
+      "epoch": 0.19037,
+      "grad_norm": 1.3844407098794684,
+      "learning_rate": 0.003,
+      "loss": 3.9961,
+      "step": 19037
+    },
+    {
+      "epoch": 0.19038,
+      "grad_norm": 0.9370740363246308,
+      "learning_rate": 0.003,
+      "loss": 4.0211,
+      "step": 19038
+    },
+    {
+      "epoch": 0.19039,
+      "grad_norm": 1.1793836661650465,
+      "learning_rate": 0.003,
+      "loss": 4.0122,
+      "step": 19039
+    },
+    {
+      "epoch": 0.1904,
+      "grad_norm": 1.1608256830710861,
+      "learning_rate": 0.003,
+      "loss": 3.9849,
+      "step": 19040
+    },
+    {
+      "epoch": 0.19041,
+      "grad_norm": 1.4188609592348138,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 19041
+    },
+    {
+      "epoch": 0.19042,
+      "grad_norm": 1.1914508725401274,
+      "learning_rate": 0.003,
+      "loss": 4.0244,
+      "step": 19042
+    },
+    {
+      "epoch": 0.19043,
+      "grad_norm": 1.4740618183719758,
+      "learning_rate": 0.003,
+      "loss": 4.0287,
+      "step": 19043
+    },
+    {
+      "epoch": 0.19044,
+      "grad_norm": 1.0489196721707512,
+      "learning_rate": 0.003,
+      "loss": 4.007,
+      "step": 19044
+    },
+    {
+      "epoch": 0.19045,
+      "grad_norm": 1.3481056078611993,
+      "learning_rate": 0.003,
+      "loss": 4.0259,
+      "step": 19045
+    },
+    {
+      "epoch": 0.19046,
+      "grad_norm": 1.261395410431745,
+      "learning_rate": 0.003,
+      "loss": 3.9976,
+      "step": 19046
+    },
+    {
+      "epoch": 0.19047,
+      "grad_norm": 1.2461410950690741,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 19047
+    },
+    {
+      "epoch": 0.19048,
+      "grad_norm": 1.4142663861684794,
+      "learning_rate": 0.003,
+      "loss": 4.0311,
+      "step": 19048
+    },
+    {
+      "epoch": 0.19049,
+      "grad_norm": 1.4230389583856595,
+      "learning_rate": 0.003,
+      "loss": 4.0307,
+      "step": 19049
+    },
+    {
+      "epoch": 0.1905,
+      "grad_norm": 1.5337012326259634,
+      "learning_rate": 0.003,
+      "loss": 4.0406,
+      "step": 19050
+    },
+    {
+      "epoch": 0.19051,
+      "grad_norm": 0.9795866202132011,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 19051
+    },
+    {
+      "epoch": 0.19052,
+      "grad_norm": 1.4448665791022492,
+      "learning_rate": 0.003,
+      "loss": 4.0257,
+      "step": 19052
+    },
+    {
+      "epoch": 0.19053,
+      "grad_norm": 0.9690995895127987,
+      "learning_rate": 0.003,
+      "loss": 3.985,
+      "step": 19053
+    },
+    {
+      "epoch": 0.19054,
+      "grad_norm": 1.5231033345167566,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 19054
+    },
+    {
+      "epoch": 0.19055,
+      "grad_norm": 1.063381089439959,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 19055
+    },
+    {
+      "epoch": 0.19056,
+      "grad_norm": 1.4801953783140573,
+      "learning_rate": 0.003,
+      "loss": 4.0048,
+      "step": 19056
+    },
+    {
+      "epoch": 0.19057,
+      "grad_norm": 1.3721181616330667,
+      "learning_rate": 0.003,
+      "loss": 4.0224,
+      "step": 19057
+    },
+    {
+      "epoch": 0.19058,
+      "grad_norm": 1.0712257577961286,
+      "learning_rate": 0.003,
+      "loss": 4.0303,
+      "step": 19058
+    },
+    {
+      "epoch": 0.19059,
+      "grad_norm": 1.4011245776695986,
+      "learning_rate": 0.003,
+      "loss": 4.0276,
+      "step": 19059
+    },
+    {
+      "epoch": 0.1906,
+      "grad_norm": 1.2311881845460202,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 19060
+    },
+    {
+      "epoch": 0.19061,
+      "grad_norm": 1.2361009574500925,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 19061
+    },
+    {
+      "epoch": 0.19062,
+      "grad_norm": 1.3105780132626885,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 19062
+    },
+    {
+      "epoch": 0.19063,
+      "grad_norm": 1.51076336641986,
+      "learning_rate": 0.003,
+      "loss": 3.9832,
+      "step": 19063
+    },
+    {
+      "epoch": 0.19064,
+      "grad_norm": 1.2088524107157208,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 19064
+    },
+    {
+      "epoch": 0.19065,
+      "grad_norm": 1.3515938995121695,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 19065
+    },
+    {
+      "epoch": 0.19066,
+      "grad_norm": 1.0579345253167882,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 19066
+    },
+    {
+      "epoch": 0.19067,
+      "grad_norm": 1.4516672303751375,
+      "learning_rate": 0.003,
+      "loss": 4.0105,
+      "step": 19067
+    },
+    {
+      "epoch": 0.19068,
+      "grad_norm": 1.1772943233256876,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 19068
+    },
+    {
+      "epoch": 0.19069,
+      "grad_norm": 1.5328311320810433,
+      "learning_rate": 0.003,
+      "loss": 4.0304,
+      "step": 19069
+    },
+    {
+      "epoch": 0.1907,
+      "grad_norm": 1.1915204025435355,
+      "learning_rate": 0.003,
+      "loss": 3.9997,
+      "step": 19070
+    },
+    {
+      "epoch": 0.19071,
+      "grad_norm": 1.3495335972107791,
+      "learning_rate": 0.003,
+      "loss": 4.0194,
+      "step": 19071
+    },
+    {
+      "epoch": 0.19072,
+      "grad_norm": 1.2414979495680432,
+      "learning_rate": 0.003,
+      "loss": 4.0314,
+      "step": 19072
+    },
+    {
+      "epoch": 0.19073,
+      "grad_norm": 1.269750840991254,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 19073
+    },
+    {
+      "epoch": 0.19074,
+      "grad_norm": 1.4034087624890401,
+      "learning_rate": 0.003,
+      "loss": 4.0382,
+      "step": 19074
+    },
+    {
+      "epoch": 0.19075,
+      "grad_norm": 1.2755701853905599,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 19075
+    },
+    {
+      "epoch": 0.19076,
+      "grad_norm": 1.512881324709014,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 19076
+    },
+    {
+      "epoch": 0.19077,
+      "grad_norm": 1.2175458756706254,
+      "learning_rate": 0.003,
+      "loss": 4.0386,
+      "step": 19077
+    },
+    {
+      "epoch": 0.19078,
+      "grad_norm": 1.2000797356856625,
+      "learning_rate": 0.003,
+      "loss": 3.9925,
+      "step": 19078
+    },
+    {
+      "epoch": 0.19079,
+      "grad_norm": 1.2520113978678868,
+      "learning_rate": 0.003,
+      "loss": 4.0252,
+      "step": 19079
+    },
+    {
+      "epoch": 0.1908,
+      "grad_norm": 1.2481163934673374,
+      "learning_rate": 0.003,
+      "loss": 3.9823,
+      "step": 19080
+    },
+    {
+      "epoch": 0.19081,
+      "grad_norm": 1.097364474128546,
+      "learning_rate": 0.003,
+      "loss": 3.9889,
+      "step": 19081
+    },
+    {
+      "epoch": 0.19082,
+      "grad_norm": 1.4441792748123627,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 19082
+    },
+    {
+      "epoch": 0.19083,
+      "grad_norm": 1.125776578626099,
+      "learning_rate": 0.003,
+      "loss": 3.9984,
+      "step": 19083
+    },
+    {
+      "epoch": 0.19084,
+      "grad_norm": 1.3143842737096847,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 19084
+    },
+    {
+      "epoch": 0.19085,
+      "grad_norm": 1.2196448496160888,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 19085
+    },
+    {
+      "epoch": 0.19086,
+      "grad_norm": 1.288034172323442,
+      "learning_rate": 0.003,
+      "loss": 4.0025,
+      "step": 19086
+    },
+    {
+      "epoch": 0.19087,
+      "grad_norm": 1.4578452226265926,
+      "learning_rate": 0.003,
+      "loss": 4.0249,
+      "step": 19087
+    },
+    {
+      "epoch": 0.19088,
+      "grad_norm": 1.2634694453509527,
+      "learning_rate": 0.003,
+      "loss": 4.0383,
+      "step": 19088
+    },
+    {
+      "epoch": 0.19089,
+      "grad_norm": 1.387319508923809,
+      "learning_rate": 0.003,
+      "loss": 4.0007,
+      "step": 19089
+    },
+    {
+      "epoch": 0.1909,
+      "grad_norm": 1.0366517034021514,
+      "learning_rate": 0.003,
+      "loss": 4.0059,
+      "step": 19090
+    },
+    {
+      "epoch": 0.19091,
+      "grad_norm": 1.333378890493789,
+      "learning_rate": 0.003,
+      "loss": 4.0251,
+      "step": 19091
+    },
+    {
+      "epoch": 0.19092,
+      "grad_norm": 1.2065461316222208,
+      "learning_rate": 0.003,
+      "loss": 3.9834,
+      "step": 19092
+    },
+    {
+      "epoch": 0.19093,
+      "grad_norm": 1.1718258809234923,
+      "learning_rate": 0.003,
+      "loss": 4.0042,
+      "step": 19093
+    },
+    {
+      "epoch": 0.19094,
+      "grad_norm": 1.2415490969737364,
+      "learning_rate": 0.003,
+      "loss": 4.0264,
+      "step": 19094
+    },
+    {
+      "epoch": 0.19095,
+      "grad_norm": 1.4227188221372549,
+      "learning_rate": 0.003,
+      "loss": 4.0209,
+      "step": 19095
+    },
+    {
+      "epoch": 0.19096,
+      "grad_norm": 1.0988265640686208,
+      "learning_rate": 0.003,
+      "loss": 4.0194,
+      "step": 19096
+    },
+    {
+      "epoch": 0.19097,
+      "grad_norm": 1.4368684260942137,
+      "learning_rate": 0.003,
+      "loss": 4.0233,
+      "step": 19097
+    },
+    {
+      "epoch": 0.19098,
+      "grad_norm": 1.1405292634814344,
+      "learning_rate": 0.003,
+      "loss": 3.9957,
+      "step": 19098
+    },
+    {
+      "epoch": 0.19099,
+      "grad_norm": 1.4162693006948381,
+      "learning_rate": 0.003,
+      "loss": 4.0113,
+      "step": 19099
+    },
+    {
+      "epoch": 0.191,
+      "grad_norm": 1.2545115734304448,
+      "learning_rate": 0.003,
+      "loss": 4.0346,
+      "step": 19100
+    },
+    {
+      "epoch": 0.19101,
+      "grad_norm": 1.6715304163657492,
+      "learning_rate": 0.003,
+      "loss": 4.0329,
+      "step": 19101
+    },
+    {
+      "epoch": 0.19102,
+      "grad_norm": 0.8742404889241492,
+      "learning_rate": 0.003,
+      "loss": 3.9918,
+      "step": 19102
+    },
+    {
+      "epoch": 0.19103,
+      "grad_norm": 1.2582071845065315,
+      "learning_rate": 0.003,
+      "loss": 4.0039,
+      "step": 19103
+    },
+    {
+      "epoch": 0.19104,
+      "grad_norm": 1.3913650666631558,
+      "learning_rate": 0.003,
+      "loss": 4.0309,
+      "step": 19104
+    },
+    {
+      "epoch": 0.19105,
+      "grad_norm": 1.1984116785798966,
+      "learning_rate": 0.003,
+      "loss": 4.0232,
+      "step": 19105
+    },
+    {
+      "epoch": 0.19106,
+      "grad_norm": 1.246416613537958,
+      "learning_rate": 0.003,
+      "loss": 4.0028,
+      "step": 19106
+    },
+    {
+      "epoch": 0.19107,
+      "grad_norm": 1.4743461074757365,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 19107
+    },
+    {
+      "epoch": 0.19108,
+      "grad_norm": 1.0219739765859523,
+      "learning_rate": 0.003,
+      "loss": 4.0064,
+      "step": 19108
+    },
+    {
+      "epoch": 0.19109,
+      "grad_norm": 1.2434644790047842,
+      "learning_rate": 0.003,
+      "loss": 4.0411,
+      "step": 19109
+    },
+    {
+      "epoch": 0.1911,
+      "grad_norm": 0.9530127222735694,
+      "learning_rate": 0.003,
+      "loss": 4.0167,
+      "step": 19110
+    },
+    {
+      "epoch": 0.19111,
+      "grad_norm": 1.4168432581056025,
+      "learning_rate": 0.003,
+      "loss": 4.0068,
+      "step": 19111
+    },
+    {
+      "epoch": 0.19112,
+      "grad_norm": 1.1428098216809432,
+      "learning_rate": 0.003,
+      "loss": 4.0246,
+      "step": 19112
+    },
+    {
+      "epoch": 0.19113,
+      "grad_norm": 1.2447098168335564,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 19113
+    },
+    {
+      "epoch": 0.19114,
+      "grad_norm": 1.3315043841441578,
+      "learning_rate": 0.003,
+      "loss": 3.9888,
+      "step": 19114
+    },
+    {
+      "epoch": 0.19115,
+      "grad_norm": 1.354507672850012,
+      "learning_rate": 0.003,
+      "loss": 4.0089,
+      "step": 19115
+    },
+    {
+      "epoch": 0.19116,
+      "grad_norm": 1.3139660086367373,
+      "learning_rate": 0.003,
+      "loss": 4.0283,
+      "step": 19116
+    },
+    {
+      "epoch": 0.19117,
+      "grad_norm": 1.2763730508264501,
+      "learning_rate": 0.003,
+      "loss": 3.9973,
+      "step": 19117
+    },
+    {
+      "epoch": 0.19118,
+      "grad_norm": 1.374380572006979,
+      "learning_rate": 0.003,
+      "loss": 4.0132,
+      "step": 19118
+    },
+    {
+      "epoch": 0.19119,
+      "grad_norm": 1.2393991245690554,
+      "learning_rate": 0.003,
+      "loss": 4.052,
+      "step": 19119
+    },
+    {
+      "epoch": 0.1912,
+      "grad_norm": 1.3235895655843752,
+      "learning_rate": 0.003,
+      "loss": 4.0283,
+      "step": 19120
+    },
+    {
+      "epoch": 0.19121,
+      "grad_norm": 1.2299513388331191,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 19121
+    },
+    {
+      "epoch": 0.19122,
+      "grad_norm": 1.3009696269716826,
+      "learning_rate": 0.003,
+      "loss": 4.0184,
+      "step": 19122
+    },
+    {
+      "epoch": 0.19123,
+      "grad_norm": 1.059433727540707,
+      "learning_rate": 0.003,
+      "loss": 3.9987,
+      "step": 19123
+    },
+    {
+      "epoch": 0.19124,
+      "grad_norm": 1.474212392176231,
+      "learning_rate": 0.003,
+      "loss": 4.0462,
+      "step": 19124
+    },
+    {
+      "epoch": 0.19125,
+      "grad_norm": 1.2204828187740557,
+      "learning_rate": 0.003,
+      "loss": 4.0015,
+      "step": 19125
+    },
+    {
+      "epoch": 0.19126,
+      "grad_norm": 1.453210903186638,
+      "learning_rate": 0.003,
+      "loss": 3.991,
+      "step": 19126
+    },
+    {
+      "epoch": 0.19127,
+      "grad_norm": 1.1575091883887447,
+      "learning_rate": 0.003,
+      "loss": 4.0163,
+      "step": 19127
+    },
+    {
+      "epoch": 0.19128,
+      "grad_norm": 1.2448048390497166,
+      "learning_rate": 0.003,
+      "loss": 4.0077,
+      "step": 19128
+    },
+    {
+      "epoch": 0.19129,
+      "grad_norm": 1.2071617706827724,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 19129
+    },
+    {
+      "epoch": 0.1913,
+      "grad_norm": 1.6016710999356514,
+      "learning_rate": 0.003,
+      "loss": 4.0346,
+      "step": 19130
+    },
+    {
+      "epoch": 0.19131,
+      "grad_norm": 1.0632827321435563,
+      "learning_rate": 0.003,
+      "loss": 4.0017,
+      "step": 19131
+    },
+    {
+      "epoch": 0.19132,
+      "grad_norm": 1.552070821326928,
+      "learning_rate": 0.003,
+      "loss": 4.015,
+      "step": 19132
+    },
+    {
+      "epoch": 0.19133,
+      "grad_norm": 1.1922010195109165,
+      "learning_rate": 0.003,
+      "loss": 3.9843,
+      "step": 19133
+    },
+    {
+      "epoch": 0.19134,
+      "grad_norm": 1.3765320597846145,
+      "learning_rate": 0.003,
+      "loss": 4.0466,
+      "step": 19134
+    },
+    {
+      "epoch": 0.19135,
+      "grad_norm": 1.0739736706773755,
+      "learning_rate": 0.003,
+      "loss": 4.0291,
+      "step": 19135
+    },
+    {
+      "epoch": 0.19136,
+      "grad_norm": 1.6620003415004323,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 19136
+    },
+    {
+      "epoch": 0.19137,
+      "grad_norm": 1.3351747659310116,
+      "learning_rate": 0.003,
+      "loss": 3.9909,
+      "step": 19137
+    },
+    {
+      "epoch": 0.19138,
+      "grad_norm": 1.2484793825417677,
+      "learning_rate": 0.003,
+      "loss": 4.0237,
+      "step": 19138
+    },
+    {
+      "epoch": 0.19139,
+      "grad_norm": 1.2464807653395094,
+      "learning_rate": 0.003,
+      "loss": 4.0404,
+      "step": 19139
+    },
+    {
+      "epoch": 0.1914,
+      "grad_norm": 1.4173178457188202,
+      "learning_rate": 0.003,
+      "loss": 4.0228,
+      "step": 19140
+    },
+    {
+      "epoch": 0.19141,
+      "grad_norm": 0.9643740345222206,
+      "learning_rate": 0.003,
+      "loss": 3.965,
+      "step": 19141
+    },
+    {
+      "epoch": 0.19142,
+      "grad_norm": 1.3677467675330857,
+      "learning_rate": 0.003,
+      "loss": 4.0096,
+      "step": 19142
+    },
+    {
+      "epoch": 0.19143,
+      "grad_norm": 1.0828147627805695,
+      "learning_rate": 0.003,
+      "loss": 4.0026,
+      "step": 19143
+    },
+    {
+      "epoch": 0.19144,
+      "grad_norm": 1.3952086354765336,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 19144
+    },
+    {
+      "epoch": 0.19145,
+      "grad_norm": 1.1570067587384936,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 19145
+    },
+    {
+      "epoch": 0.19146,
+      "grad_norm": 1.3287517074229556,
+      "learning_rate": 0.003,
+      "loss": 4.0047,
+      "step": 19146
+    },
+    {
+      "epoch": 0.19147,
+      "grad_norm": 1.2316935422853907,
+      "learning_rate": 0.003,
+      "loss": 4.0162,
+      "step": 19147
+    },
+    {
+      "epoch": 0.19148,
+      "grad_norm": 1.2360412924777142,
+      "learning_rate": 0.003,
+      "loss": 4.016,
+      "step": 19148
+    },
+    {
+      "epoch": 0.19149,
+      "grad_norm": 1.2389278017039622,
+      "learning_rate": 0.003,
+      "loss": 3.9992,
+      "step": 19149
+    },
+    {
+      "epoch": 0.1915,
+      "grad_norm": 1.3070126745412278,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 19150
+    },
+    {
+      "epoch": 0.19151,
+      "grad_norm": 1.185173442606673,
+      "learning_rate": 0.003,
+      "loss": 4.0242,
+      "step": 19151
+    },
+    {
+      "epoch": 0.19152,
+      "grad_norm": 1.416980485009476,
+      "learning_rate": 0.003,
+      "loss": 3.9889,
+      "step": 19152
+    },
+    {
+      "epoch": 0.19153,
+      "grad_norm": 1.2751232745991887,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 19153
+    },
+    {
+      "epoch": 0.19154,
+      "grad_norm": 1.110960654854003,
+      "learning_rate": 0.003,
+      "loss": 4.026,
+      "step": 19154
+    },
+    {
+      "epoch": 0.19155,
+      "grad_norm": 1.4027469585522632,
+      "learning_rate": 0.003,
+      "loss": 3.9838,
+      "step": 19155
+    },
+    {
+      "epoch": 0.19156,
+      "grad_norm": 1.0065539903176461,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 19156
+    },
+    {
+      "epoch": 0.19157,
+      "grad_norm": 1.41483538295321,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 19157
+    },
+    {
+      "epoch": 0.19158,
+      "grad_norm": 1.001659705392178,
+      "learning_rate": 0.003,
+      "loss": 4.0157,
+      "step": 19158
+    },
+    {
+      "epoch": 0.19159,
+      "grad_norm": 1.4993941788895317,
+      "learning_rate": 0.003,
+      "loss": 4.0447,
+      "step": 19159
+    },
+    {
+      "epoch": 0.1916,
+      "grad_norm": 1.1235648363683046,
+      "learning_rate": 0.003,
+      "loss": 3.994,
+      "step": 19160
+    },
+    {
+      "epoch": 0.19161,
+      "grad_norm": 1.3724535910875315,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 19161
+    },
+    {
+      "epoch": 0.19162,
+      "grad_norm": 1.36784821779161,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 19162
+    },
+    {
+      "epoch": 0.19163,
+      "grad_norm": 1.3440725887703517,
+      "learning_rate": 0.003,
+      "loss": 4.0053,
+      "step": 19163
+    },
+    {
+      "epoch": 0.19164,
+      "grad_norm": 1.4003114616786319,
+      "learning_rate": 0.003,
+      "loss": 4.0672,
+      "step": 19164
+    },
+    {
+      "epoch": 0.19165,
+      "grad_norm": 1.3456911369524323,
+      "learning_rate": 0.003,
+      "loss": 4.0,
+      "step": 19165
+    },
+    {
+      "epoch": 0.19166,
+      "grad_norm": 1.4166533169352453,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 19166
+    },
+    {
+      "epoch": 0.19167,
+      "grad_norm": 1.1692066530140661,
+      "learning_rate": 0.003,
+      "loss": 4.0283,
+      "step": 19167
+    },
+    {
+      "epoch": 0.19168,
+      "grad_norm": 1.3822989774052477,
+      "learning_rate": 0.003,
+      "loss": 3.9849,
+      "step": 19168
+    },
+    {
+      "epoch": 0.19169,
+      "grad_norm": 1.2704797674341173,
+      "learning_rate": 0.003,
+      "loss": 3.9759,
+      "step": 19169
+    },
+    {
+      "epoch": 0.1917,
+      "grad_norm": 1.3365082189616249,
+      "learning_rate": 0.003,
+      "loss": 4.0342,
+      "step": 19170
+    },
+    {
+      "epoch": 0.19171,
+      "grad_norm": 1.2556939762253736,
+      "learning_rate": 0.003,
+      "loss": 3.985,
+      "step": 19171
+    },
+    {
+      "epoch": 0.19172,
+      "grad_norm": 1.3519804182425261,
+      "learning_rate": 0.003,
+      "loss": 4.0052,
+      "step": 19172
+    },
+    {
+      "epoch": 0.19173,
+      "grad_norm": 1.5667146814140536,
+      "learning_rate": 0.003,
+      "loss": 4.0122,
+      "step": 19173
+    },
+    {
+      "epoch": 0.19174,
+      "grad_norm": 1.1995668269706874,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 19174
+    },
+    {
+      "epoch": 0.19175,
+      "grad_norm": 1.3706076713201705,
+      "learning_rate": 0.003,
+      "loss": 4.0491,
+      "step": 19175
+    },
+    {
+      "epoch": 0.19176,
+      "grad_norm": 1.3617926014182877,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 19176
+    },
+    {
+      "epoch": 0.19177,
+      "grad_norm": 0.9739772235950447,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 19177
+    },
+    {
+      "epoch": 0.19178,
+      "grad_norm": 1.3137040240032942,
+      "learning_rate": 0.003,
+      "loss": 3.9893,
+      "step": 19178
+    },
+    {
+      "epoch": 0.19179,
+      "grad_norm": 1.0656757316187295,
+      "learning_rate": 0.003,
+      "loss": 4.0294,
+      "step": 19179
+    },
+    {
+      "epoch": 0.1918,
+      "grad_norm": 1.4942245097279454,
+      "learning_rate": 0.003,
+      "loss": 3.9954,
+      "step": 19180
+    },
+    {
+      "epoch": 0.19181,
+      "grad_norm": 1.1320305859837994,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 19181
+    },
+    {
+      "epoch": 0.19182,
+      "grad_norm": 1.4395231089438183,
+      "learning_rate": 0.003,
+      "loss": 4.02,
+      "step": 19182
+    },
+    {
+      "epoch": 0.19183,
+      "grad_norm": 1.142047464237548,
+      "learning_rate": 0.003,
+      "loss": 4.0213,
+      "step": 19183
+    },
+    {
+      "epoch": 0.19184,
+      "grad_norm": 1.5417366767841711,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 19184
+    },
+    {
+      "epoch": 0.19185,
+      "grad_norm": 1.0549236655877086,
+      "learning_rate": 0.003,
+      "loss": 3.9914,
+      "step": 19185
+    },
+    {
+      "epoch": 0.19186,
+      "grad_norm": 1.338104735227698,
+      "learning_rate": 0.003,
+      "loss": 3.9829,
+      "step": 19186
+    },
+    {
+      "epoch": 0.19187,
+      "grad_norm": 1.0440395117932924,
+      "learning_rate": 0.003,
+      "loss": 4.0259,
+      "step": 19187
+    },
+    {
+      "epoch": 0.19188,
+      "grad_norm": 1.3699105649139216,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 19188
+    },
+    {
+      "epoch": 0.19189,
+      "grad_norm": 1.175755284923481,
+      "learning_rate": 0.003,
+      "loss": 3.9847,
+      "step": 19189
+    },
+    {
+      "epoch": 0.1919,
+      "grad_norm": 1.3566147152322237,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 19190
+    },
+    {
+      "epoch": 0.19191,
+      "grad_norm": 1.0789372066498049,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 19191
+    },
+    {
+      "epoch": 0.19192,
+      "grad_norm": 1.6305106667821463,
+      "learning_rate": 0.003,
+      "loss": 3.9846,
+      "step": 19192
+    },
+    {
+      "epoch": 0.19193,
+      "grad_norm": 1.0948163952198668,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 19193
+    },
+    {
+      "epoch": 0.19194,
+      "grad_norm": 1.472180439039528,
+      "learning_rate": 0.003,
+      "loss": 4.0059,
+      "step": 19194
+    },
+    {
+      "epoch": 0.19195,
+      "grad_norm": 1.2227345384907626,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 19195
+    },
+    {
+      "epoch": 0.19196,
+      "grad_norm": 1.2583958450071178,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 19196
+    },
+    {
+      "epoch": 0.19197,
+      "grad_norm": 1.3196859367580656,
+      "learning_rate": 0.003,
+      "loss": 3.998,
+      "step": 19197
+    },
+    {
+      "epoch": 0.19198,
+      "grad_norm": 1.3452850973410988,
+      "learning_rate": 0.003,
+      "loss": 3.9985,
+      "step": 19198
+    },
+    {
+      "epoch": 0.19199,
+      "grad_norm": 1.315591325434402,
+      "learning_rate": 0.003,
+      "loss": 4.0351,
+      "step": 19199
+    },
+    {
+      "epoch": 0.192,
+      "grad_norm": 1.2418495760786068,
+      "learning_rate": 0.003,
+      "loss": 4.0493,
+      "step": 19200
+    },
+    {
+      "epoch": 0.19201,
+      "grad_norm": 1.4964553939632956,
+      "learning_rate": 0.003,
+      "loss": 4.0028,
+      "step": 19201
+    },
+    {
+      "epoch": 0.19202,
+      "grad_norm": 1.0893225211336188,
+      "learning_rate": 0.003,
+      "loss": 4.0177,
+      "step": 19202
+    },
+    {
+      "epoch": 0.19203,
+      "grad_norm": 1.5751153952221577,
+      "learning_rate": 0.003,
+      "loss": 3.9936,
+      "step": 19203
+    },
+    {
+      "epoch": 0.19204,
+      "grad_norm": 1.0211644191229647,
+      "learning_rate": 0.003,
+      "loss": 3.9987,
+      "step": 19204
+    },
+    {
+      "epoch": 0.19205,
+      "grad_norm": 1.668758426441642,
+      "learning_rate": 0.003,
+      "loss": 4.0485,
+      "step": 19205
+    },
+    {
+      "epoch": 0.19206,
+      "grad_norm": 1.103866066916637,
+      "learning_rate": 0.003,
+      "loss": 3.9958,
+      "step": 19206
+    },
+    {
+      "epoch": 0.19207,
+      "grad_norm": 1.3708470161547623,
+      "learning_rate": 0.003,
+      "loss": 4.0324,
+      "step": 19207
+    },
+    {
+      "epoch": 0.19208,
+      "grad_norm": 1.336346006480904,
+      "learning_rate": 0.003,
+      "loss": 4.0279,
+      "step": 19208
+    },
+    {
+      "epoch": 0.19209,
+      "grad_norm": 1.383493502088608,
+      "learning_rate": 0.003,
+      "loss": 4.0403,
+      "step": 19209
+    },
+    {
+      "epoch": 0.1921,
+      "grad_norm": 1.3250300138240707,
+      "learning_rate": 0.003,
+      "loss": 4.0249,
+      "step": 19210
+    },
+    {
+      "epoch": 0.19211,
+      "grad_norm": 1.3608835404115982,
+      "learning_rate": 0.003,
+      "loss": 4.0037,
+      "step": 19211
+    },
+    {
+      "epoch": 0.19212,
+      "grad_norm": 1.1279182095367095,
+      "learning_rate": 0.003,
+      "loss": 4.0172,
+      "step": 19212
+    },
+    {
+      "epoch": 0.19213,
+      "grad_norm": 1.4042606967821825,
+      "learning_rate": 0.003,
+      "loss": 4.0171,
+      "step": 19213
+    },
+    {
+      "epoch": 0.19214,
+      "grad_norm": 1.3921242305492765,
+      "learning_rate": 0.003,
+      "loss": 4.0035,
+      "step": 19214
+    },
+    {
+      "epoch": 0.19215,
+      "grad_norm": 1.393570227298357,
+      "learning_rate": 0.003,
+      "loss": 4.0387,
+      "step": 19215
+    },
+    {
+      "epoch": 0.19216,
+      "grad_norm": 0.9400308970698311,
+      "learning_rate": 0.003,
+      "loss": 4.0016,
+      "step": 19216
+    },
+    {
+      "epoch": 0.19217,
+      "grad_norm": 1.4028572447373904,
+      "learning_rate": 0.003,
+      "loss": 3.9924,
+      "step": 19217
+    },
+    {
+      "epoch": 0.19218,
+      "grad_norm": 1.3477680450135243,
+      "learning_rate": 0.003,
+      "loss": 4.0583,
+      "step": 19218
+    },
+    {
+      "epoch": 0.19219,
+      "grad_norm": 1.2722475975697576,
+      "learning_rate": 0.003,
+      "loss": 3.9926,
+      "step": 19219
+    },
+    {
+      "epoch": 0.1922,
+      "grad_norm": 1.214134070008866,
+      "learning_rate": 0.003,
+      "loss": 3.9796,
+      "step": 19220
+    },
+    {
+      "epoch": 0.19221,
+      "grad_norm": 1.2766818349375064,
+      "learning_rate": 0.003,
+      "loss": 4.0439,
+      "step": 19221
+    },
+    {
+      "epoch": 0.19222,
+      "grad_norm": 1.0227775904333438,
+      "learning_rate": 0.003,
+      "loss": 3.9964,
+      "step": 19222
+    },
+    {
+      "epoch": 0.19223,
+      "grad_norm": 1.390055037679002,
+      "learning_rate": 0.003,
+      "loss": 4.0184,
+      "step": 19223
+    },
+    {
+      "epoch": 0.19224,
+      "grad_norm": 1.1565366615058736,
+      "learning_rate": 0.003,
+      "loss": 3.9884,
+      "step": 19224
+    },
+    {
+      "epoch": 0.19225,
+      "grad_norm": 1.2794929776760933,
+      "learning_rate": 0.003,
+      "loss": 4.026,
+      "step": 19225
+    },
+    {
+      "epoch": 0.19226,
+      "grad_norm": 1.1353271404954286,
+      "learning_rate": 0.003,
+      "loss": 4.0022,
+      "step": 19226
+    },
+    {
+      "epoch": 0.19227,
+      "grad_norm": 1.6572333512175204,
+      "learning_rate": 0.003,
+      "loss": 4.0326,
+      "step": 19227
+    },
+    {
+      "epoch": 0.19228,
+      "grad_norm": 1.2872637606958557,
+      "learning_rate": 0.003,
+      "loss": 4.0132,
+      "step": 19228
+    },
+    {
+      "epoch": 0.19229,
+      "grad_norm": 1.1422414048207101,
+      "learning_rate": 0.003,
+      "loss": 3.9968,
+      "step": 19229
+    },
+    {
+      "epoch": 0.1923,
+      "grad_norm": 1.1905627134617205,
+      "learning_rate": 0.003,
+      "loss": 4.0239,
+      "step": 19230
+    },
+    {
+      "epoch": 0.19231,
+      "grad_norm": 1.1327984887160192,
+      "learning_rate": 0.003,
+      "loss": 4.022,
+      "step": 19231
+    },
+    {
+      "epoch": 0.19232,
+      "grad_norm": 1.3500475230168787,
+      "learning_rate": 0.003,
+      "loss": 4.003,
+      "step": 19232
+    },
+    {
+      "epoch": 0.19233,
+      "grad_norm": 1.3342412111618422,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 19233
+    },
+    {
+      "epoch": 0.19234,
+      "grad_norm": 1.3479954438718722,
+      "learning_rate": 0.003,
+      "loss": 4.0001,
+      "step": 19234
+    },
+    {
+      "epoch": 0.19235,
+      "grad_norm": 1.0170260069549755,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 19235
+    },
+    {
+      "epoch": 0.19236,
+      "grad_norm": 1.6278668162468681,
+      "learning_rate": 0.003,
+      "loss": 4.0321,
+      "step": 19236
+    },
+    {
+      "epoch": 0.19237,
+      "grad_norm": 1.1504919423184614,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 19237
+    },
+    {
+      "epoch": 0.19238,
+      "grad_norm": 1.1420386065591528,
+      "learning_rate": 0.003,
+      "loss": 4.0011,
+      "step": 19238
+    },
+    {
+      "epoch": 0.19239,
+      "grad_norm": 1.0549790580222767,
+      "learning_rate": 0.003,
+      "loss": 4.0015,
+      "step": 19239
+    },
+    {
+      "epoch": 0.1924,
+      "grad_norm": 1.5305591571558188,
+      "learning_rate": 0.003,
+      "loss": 3.9879,
+      "step": 19240
+    },
+    {
+      "epoch": 0.19241,
+      "grad_norm": 1.1568374311249705,
+      "learning_rate": 0.003,
+      "loss": 4.0085,
+      "step": 19241
+    },
+    {
+      "epoch": 0.19242,
+      "grad_norm": 1.352353603934135,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 19242
+    },
+    {
+      "epoch": 0.19243,
+      "grad_norm": 1.3056422729112167,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 19243
+    },
+    {
+      "epoch": 0.19244,
+      "grad_norm": 1.362731783574408,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 19244
+    },
+    {
+      "epoch": 0.19245,
+      "grad_norm": 1.2971847907997471,
+      "learning_rate": 0.003,
+      "loss": 4.0105,
+      "step": 19245
+    },
+    {
+      "epoch": 0.19246,
+      "grad_norm": 1.1517573405002746,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 19246
+    },
+    {
+      "epoch": 0.19247,
+      "grad_norm": 1.5410466636651956,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 19247
+    },
+    {
+      "epoch": 0.19248,
+      "grad_norm": 1.1954775748984008,
+      "learning_rate": 0.003,
+      "loss": 3.9934,
+      "step": 19248
+    },
+    {
+      "epoch": 0.19249,
+      "grad_norm": 1.5128546993397118,
+      "learning_rate": 0.003,
+      "loss": 3.9928,
+      "step": 19249
+    },
+    {
+      "epoch": 0.1925,
+      "grad_norm": 1.0842559383860007,
+      "learning_rate": 0.003,
+      "loss": 4.0283,
+      "step": 19250
+    },
+    {
+      "epoch": 0.19251,
+      "grad_norm": 1.4291439821743894,
+      "learning_rate": 0.003,
+      "loss": 4.033,
+      "step": 19251
+    },
+    {
+      "epoch": 0.19252,
+      "grad_norm": 1.0421556628940751,
+      "learning_rate": 0.003,
+      "loss": 4.0016,
+      "step": 19252
+    },
+    {
+      "epoch": 0.19253,
+      "grad_norm": 1.2270565387256347,
+      "learning_rate": 0.003,
+      "loss": 4.026,
+      "step": 19253
+    },
+    {
+      "epoch": 0.19254,
+      "grad_norm": 1.3110699699621147,
+      "learning_rate": 0.003,
+      "loss": 4.0642,
+      "step": 19254
+    },
+    {
+      "epoch": 0.19255,
+      "grad_norm": 1.5660459777797888,
+      "learning_rate": 0.003,
+      "loss": 4.0405,
+      "step": 19255
+    },
+    {
+      "epoch": 0.19256,
+      "grad_norm": 1.12781204659167,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 19256
+    },
+    {
+      "epoch": 0.19257,
+      "grad_norm": 1.3680962997750865,
+      "learning_rate": 0.003,
+      "loss": 3.9903,
+      "step": 19257
+    },
+    {
+      "epoch": 0.19258,
+      "grad_norm": 1.2246106606785605,
+      "learning_rate": 0.003,
+      "loss": 4.008,
+      "step": 19258
+    },
+    {
+      "epoch": 0.19259,
+      "grad_norm": 1.3711940179038715,
+      "learning_rate": 0.003,
+      "loss": 4.0236,
+      "step": 19259
+    },
+    {
+      "epoch": 0.1926,
+      "grad_norm": 1.249101378903422,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 19260
+    },
+    {
+      "epoch": 0.19261,
+      "grad_norm": 1.2695393115142042,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 19261
+    },
+    {
+      "epoch": 0.19262,
+      "grad_norm": 1.096150624265063,
+      "learning_rate": 0.003,
+      "loss": 3.9958,
+      "step": 19262
+    },
+    {
+      "epoch": 0.19263,
+      "grad_norm": 1.4421643212070427,
+      "learning_rate": 0.003,
+      "loss": 4.0211,
+      "step": 19263
+    },
+    {
+      "epoch": 0.19264,
+      "grad_norm": 1.0592830510385476,
+      "learning_rate": 0.003,
+      "loss": 4.0169,
+      "step": 19264
+    },
+    {
+      "epoch": 0.19265,
+      "grad_norm": 1.3993379394834091,
+      "learning_rate": 0.003,
+      "loss": 4.0128,
+      "step": 19265
+    },
+    {
+      "epoch": 0.19266,
+      "grad_norm": 1.2327693654016243,
+      "learning_rate": 0.003,
+      "loss": 4.0216,
+      "step": 19266
+    },
+    {
+      "epoch": 0.19267,
+      "grad_norm": 1.3455571063722753,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 19267
+    },
+    {
+      "epoch": 0.19268,
+      "grad_norm": 1.1314260480703089,
+      "learning_rate": 0.003,
+      "loss": 3.9985,
+      "step": 19268
+    },
+    {
+      "epoch": 0.19269,
+      "grad_norm": 1.1538960933574993,
+      "learning_rate": 0.003,
+      "loss": 4.0174,
+      "step": 19269
+    },
+    {
+      "epoch": 0.1927,
+      "grad_norm": 1.187131626358708,
+      "learning_rate": 0.003,
+      "loss": 4.0052,
+      "step": 19270
+    },
+    {
+      "epoch": 0.19271,
+      "grad_norm": 1.1436148944196758,
+      "learning_rate": 0.003,
+      "loss": 4.0118,
+      "step": 19271
+    },
+    {
+      "epoch": 0.19272,
+      "grad_norm": 1.3829166159388213,
+      "learning_rate": 0.003,
+      "loss": 4.0224,
+      "step": 19272
+    },
+    {
+      "epoch": 0.19273,
+      "grad_norm": 1.1531695855934014,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 19273
+    },
+    {
+      "epoch": 0.19274,
+      "grad_norm": 1.4169055337103866,
+      "learning_rate": 0.003,
+      "loss": 4.0385,
+      "step": 19274
+    },
+    {
+      "epoch": 0.19275,
+      "grad_norm": 1.3208893080744692,
+      "learning_rate": 0.003,
+      "loss": 4.0015,
+      "step": 19275
+    },
+    {
+      "epoch": 0.19276,
+      "grad_norm": 1.2189686284074917,
+      "learning_rate": 0.003,
+      "loss": 4.0081,
+      "step": 19276
+    },
+    {
+      "epoch": 0.19277,
+      "grad_norm": 1.1685162884892721,
+      "learning_rate": 0.003,
+      "loss": 4.0451,
+      "step": 19277
+    },
+    {
+      "epoch": 0.19278,
+      "grad_norm": 1.3097156211266974,
+      "learning_rate": 0.003,
+      "loss": 4.0241,
+      "step": 19278
+    },
+    {
+      "epoch": 0.19279,
+      "grad_norm": 1.5822975034533577,
+      "learning_rate": 0.003,
+      "loss": 3.996,
+      "step": 19279
+    },
+    {
+      "epoch": 0.1928,
+      "grad_norm": 1.018259879922634,
+      "learning_rate": 0.003,
+      "loss": 4.0209,
+      "step": 19280
+    },
+    {
+      "epoch": 0.19281,
+      "grad_norm": 1.6871851030896705,
+      "learning_rate": 0.003,
+      "loss": 4.0232,
+      "step": 19281
+    },
+    {
+      "epoch": 0.19282,
+      "grad_norm": 1.158369794297749,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 19282
+    },
+    {
+      "epoch": 0.19283,
+      "grad_norm": 1.565967294109266,
+      "learning_rate": 0.003,
+      "loss": 4.0091,
+      "step": 19283
+    },
+    {
+      "epoch": 0.19284,
+      "grad_norm": 1.3313926512220093,
+      "learning_rate": 0.003,
+      "loss": 4.0503,
+      "step": 19284
+    },
+    {
+      "epoch": 0.19285,
+      "grad_norm": 1.1636005686550623,
+      "learning_rate": 0.003,
+      "loss": 3.9989,
+      "step": 19285
+    },
+    {
+      "epoch": 0.19286,
+      "grad_norm": 1.2197703555039798,
+      "learning_rate": 0.003,
+      "loss": 3.9833,
+      "step": 19286
+    },
+    {
+      "epoch": 0.19287,
+      "grad_norm": 1.221503826165908,
+      "learning_rate": 0.003,
+      "loss": 4.0051,
+      "step": 19287
+    },
+    {
+      "epoch": 0.19288,
+      "grad_norm": 1.1929165705254783,
+      "learning_rate": 0.003,
+      "loss": 4.019,
+      "step": 19288
+    },
+    {
+      "epoch": 0.19289,
+      "grad_norm": 1.3241772377858156,
+      "learning_rate": 0.003,
+      "loss": 3.9968,
+      "step": 19289
+    },
+    {
+      "epoch": 0.1929,
+      "grad_norm": 1.273938892976147,
+      "learning_rate": 0.003,
+      "loss": 4.0214,
+      "step": 19290
+    },
+    {
+      "epoch": 0.19291,
+      "grad_norm": 1.3370810312539407,
+      "learning_rate": 0.003,
+      "loss": 4.0312,
+      "step": 19291
+    },
+    {
+      "epoch": 0.19292,
+      "grad_norm": 1.2271010108769684,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 19292
+    },
+    {
+      "epoch": 0.19293,
+      "grad_norm": 1.401432219915392,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 19293
+    },
+    {
+      "epoch": 0.19294,
+      "grad_norm": 1.2556446549493423,
+      "learning_rate": 0.003,
+      "loss": 4.0319,
+      "step": 19294
+    },
+    {
+      "epoch": 0.19295,
+      "grad_norm": 1.44739459099722,
+      "learning_rate": 0.003,
+      "loss": 4.0004,
+      "step": 19295
+    },
+    {
+      "epoch": 0.19296,
+      "grad_norm": 1.1923867938466919,
+      "learning_rate": 0.003,
+      "loss": 3.9943,
+      "step": 19296
+    },
+    {
+      "epoch": 0.19297,
+      "grad_norm": 1.2444243812318136,
+      "learning_rate": 0.003,
+      "loss": 4.0042,
+      "step": 19297
+    },
+    {
+      "epoch": 0.19298,
+      "grad_norm": 0.91060215731962,
+      "learning_rate": 0.003,
+      "loss": 3.9846,
+      "step": 19298
+    },
+    {
+      "epoch": 0.19299,
+      "grad_norm": 1.1796978616990044,
+      "learning_rate": 0.003,
+      "loss": 3.9894,
+      "step": 19299
+    },
+    {
+      "epoch": 0.193,
+      "grad_norm": 1.3100247823203226,
+      "learning_rate": 0.003,
+      "loss": 4.0104,
+      "step": 19300
+    },
+    {
+      "epoch": 0.19301,
+      "grad_norm": 1.2632705627470568,
+      "learning_rate": 0.003,
+      "loss": 4.0399,
+      "step": 19301
+    },
+    {
+      "epoch": 0.19302,
+      "grad_norm": 1.3130883423963529,
+      "learning_rate": 0.003,
+      "loss": 4.0118,
+      "step": 19302
+    },
+    {
+      "epoch": 0.19303,
+      "grad_norm": 1.2941841053702245,
+      "learning_rate": 0.003,
+      "loss": 4.0266,
+      "step": 19303
+    },
+    {
+      "epoch": 0.19304,
+      "grad_norm": 1.3912468915674194,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 19304
+    },
+    {
+      "epoch": 0.19305,
+      "grad_norm": 1.2439496071376583,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 19305
+    },
+    {
+      "epoch": 0.19306,
+      "grad_norm": 1.1121497476990196,
+      "learning_rate": 0.003,
+      "loss": 4.006,
+      "step": 19306
+    },
+    {
+      "epoch": 0.19307,
+      "grad_norm": 1.385475619134274,
+      "learning_rate": 0.003,
+      "loss": 4.0273,
+      "step": 19307
+    },
+    {
+      "epoch": 0.19308,
+      "grad_norm": 1.2148891995840227,
+      "learning_rate": 0.003,
+      "loss": 4.004,
+      "step": 19308
+    },
+    {
+      "epoch": 0.19309,
+      "grad_norm": 1.2879980153597395,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 19309
+    },
+    {
+      "epoch": 0.1931,
+      "grad_norm": 1.140076015231091,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 19310
+    },
+    {
+      "epoch": 0.19311,
+      "grad_norm": 1.3173011843941163,
+      "learning_rate": 0.003,
+      "loss": 4.0088,
+      "step": 19311
+    },
+    {
+      "epoch": 0.19312,
+      "grad_norm": 1.187476127872916,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 19312
+    },
+    {
+      "epoch": 0.19313,
+      "grad_norm": 1.1974913089766392,
+      "learning_rate": 0.003,
+      "loss": 4.041,
+      "step": 19313
+    },
+    {
+      "epoch": 0.19314,
+      "grad_norm": 1.6268923293114181,
+      "learning_rate": 0.003,
+      "loss": 4.0225,
+      "step": 19314
+    },
+    {
+      "epoch": 0.19315,
+      "grad_norm": 1.406719694155138,
+      "learning_rate": 0.003,
+      "loss": 3.9888,
+      "step": 19315
+    },
+    {
+      "epoch": 0.19316,
+      "grad_norm": 1.362298849192877,
+      "learning_rate": 0.003,
+      "loss": 3.9992,
+      "step": 19316
+    },
+    {
+      "epoch": 0.19317,
+      "grad_norm": 1.3894069406527816,
+      "learning_rate": 0.003,
+      "loss": 3.995,
+      "step": 19317
+    },
+    {
+      "epoch": 0.19318,
+      "grad_norm": 1.2271526499579337,
+      "learning_rate": 0.003,
+      "loss": 3.9987,
+      "step": 19318
+    },
+    {
+      "epoch": 0.19319,
+      "grad_norm": 1.3715632264623012,
+      "learning_rate": 0.003,
+      "loss": 4.0056,
+      "step": 19319
+    },
+    {
+      "epoch": 0.1932,
+      "grad_norm": 1.3500772463535817,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 19320
+    },
+    {
+      "epoch": 0.19321,
+      "grad_norm": 1.2945282600052037,
+      "learning_rate": 0.003,
+      "loss": 4.0091,
+      "step": 19321
+    },
+    {
+      "epoch": 0.19322,
+      "grad_norm": 1.4278396415481247,
+      "learning_rate": 0.003,
+      "loss": 4.0267,
+      "step": 19322
+    },
+    {
+      "epoch": 0.19323,
+      "grad_norm": 1.2810364318809215,
+      "learning_rate": 0.003,
+      "loss": 4.0001,
+      "step": 19323
+    },
+    {
+      "epoch": 0.19324,
+      "grad_norm": 1.101442820195114,
+      "learning_rate": 0.003,
+      "loss": 4.0188,
+      "step": 19324
+    },
+    {
+      "epoch": 0.19325,
+      "grad_norm": 1.5124095291132174,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 19325
+    },
+    {
+      "epoch": 0.19326,
+      "grad_norm": 1.1142550774717075,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 19326
+    },
+    {
+      "epoch": 0.19327,
+      "grad_norm": 1.296235807624217,
+      "learning_rate": 0.003,
+      "loss": 4.0257,
+      "step": 19327
+    },
+    {
+      "epoch": 0.19328,
+      "grad_norm": 1.2190174813937023,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 19328
+    },
+    {
+      "epoch": 0.19329,
+      "grad_norm": 1.1809580335134242,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 19329
+    },
+    {
+      "epoch": 0.1933,
+      "grad_norm": 1.3725895910679775,
+      "learning_rate": 0.003,
+      "loss": 3.9799,
+      "step": 19330
+    },
+    {
+      "epoch": 0.19331,
+      "grad_norm": 1.198982506010596,
+      "learning_rate": 0.003,
+      "loss": 3.9986,
+      "step": 19331
+    },
+    {
+      "epoch": 0.19332,
+      "grad_norm": 1.634412887922178,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 19332
+    },
+    {
+      "epoch": 0.19333,
+      "grad_norm": 1.1838386916950983,
+      "learning_rate": 0.003,
+      "loss": 4.0134,
+      "step": 19333
+    },
+    {
+      "epoch": 0.19334,
+      "grad_norm": 1.4986024886368075,
+      "learning_rate": 0.003,
+      "loss": 3.9751,
+      "step": 19334
+    },
+    {
+      "epoch": 0.19335,
+      "grad_norm": 1.0251073572126714,
+      "learning_rate": 0.003,
+      "loss": 3.9829,
+      "step": 19335
+    },
+    {
+      "epoch": 0.19336,
+      "grad_norm": 1.3733329145635225,
+      "learning_rate": 0.003,
+      "loss": 3.9947,
+      "step": 19336
+    },
+    {
+      "epoch": 0.19337,
+      "grad_norm": 1.133530270084721,
+      "learning_rate": 0.003,
+      "loss": 4.0026,
+      "step": 19337
+    },
+    {
+      "epoch": 0.19338,
+      "grad_norm": 1.371493307962086,
+      "learning_rate": 0.003,
+      "loss": 4.044,
+      "step": 19338
+    },
+    {
+      "epoch": 0.19339,
+      "grad_norm": 1.2661376845460801,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 19339
+    },
+    {
+      "epoch": 0.1934,
+      "grad_norm": 1.0460255834033878,
+      "learning_rate": 0.003,
+      "loss": 4.0081,
+      "step": 19340
+    },
+    {
+      "epoch": 0.19341,
+      "grad_norm": 1.3474658997520463,
+      "learning_rate": 0.003,
+      "loss": 3.9916,
+      "step": 19341
+    },
+    {
+      "epoch": 0.19342,
+      "grad_norm": 1.2328918139681768,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 19342
+    },
+    {
+      "epoch": 0.19343,
+      "grad_norm": 1.5557496199792455,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 19343
+    },
+    {
+      "epoch": 0.19344,
+      "grad_norm": 1.0805207725066504,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 19344
+    },
+    {
+      "epoch": 0.19345,
+      "grad_norm": 1.5483762002292565,
+      "learning_rate": 0.003,
+      "loss": 4.0318,
+      "step": 19345
+    },
+    {
+      "epoch": 0.19346,
+      "grad_norm": 1.1135645585311766,
+      "learning_rate": 0.003,
+      "loss": 3.9831,
+      "step": 19346
+    },
+    {
+      "epoch": 0.19347,
+      "grad_norm": 1.5861363232816243,
+      "learning_rate": 0.003,
+      "loss": 4.0312,
+      "step": 19347
+    },
+    {
+      "epoch": 0.19348,
+      "grad_norm": 1.2569767841890287,
+      "learning_rate": 0.003,
+      "loss": 4.0608,
+      "step": 19348
+    },
+    {
+      "epoch": 0.19349,
+      "grad_norm": 1.605713062875422,
+      "learning_rate": 0.003,
+      "loss": 4.003,
+      "step": 19349
+    },
+    {
+      "epoch": 0.1935,
+      "grad_norm": 1.0715630148323743,
+      "learning_rate": 0.003,
+      "loss": 4.0173,
+      "step": 19350
+    },
+    {
+      "epoch": 0.19351,
+      "grad_norm": 1.4160353881138203,
+      "learning_rate": 0.003,
+      "loss": 4.0249,
+      "step": 19351
+    },
+    {
+      "epoch": 0.19352,
+      "grad_norm": 1.2151238401699003,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 19352
+    },
+    {
+      "epoch": 0.19353,
+      "grad_norm": 1.4276087862318694,
+      "learning_rate": 0.003,
+      "loss": 4.016,
+      "step": 19353
+    },
+    {
+      "epoch": 0.19354,
+      "grad_norm": 1.2691430926073761,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 19354
+    },
+    {
+      "epoch": 0.19355,
+      "grad_norm": 1.2687363991948806,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 19355
+    },
+    {
+      "epoch": 0.19356,
+      "grad_norm": 1.354969634174041,
+      "learning_rate": 0.003,
+      "loss": 4.0211,
+      "step": 19356
+    },
+    {
+      "epoch": 0.19357,
+      "grad_norm": 1.2651970202661926,
+      "learning_rate": 0.003,
+      "loss": 3.9923,
+      "step": 19357
+    },
+    {
+      "epoch": 0.19358,
+      "grad_norm": 1.1018213384961606,
+      "learning_rate": 0.003,
+      "loss": 4.0223,
+      "step": 19358
+    },
+    {
+      "epoch": 0.19359,
+      "grad_norm": 1.5157626114657716,
+      "learning_rate": 0.003,
+      "loss": 4.0016,
+      "step": 19359
+    },
+    {
+      "epoch": 0.1936,
+      "grad_norm": 0.9785199398680243,
+      "learning_rate": 0.003,
+      "loss": 4.0358,
+      "step": 19360
+    },
+    {
+      "epoch": 0.19361,
+      "grad_norm": 1.356334889201725,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 19361
+    },
+    {
+      "epoch": 0.19362,
+      "grad_norm": 1.3266985654743382,
+      "learning_rate": 0.003,
+      "loss": 4.0092,
+      "step": 19362
+    },
+    {
+      "epoch": 0.19363,
+      "grad_norm": 1.2479698389453084,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 19363
+    },
+    {
+      "epoch": 0.19364,
+      "grad_norm": 1.3392493507431489,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 19364
+    },
+    {
+      "epoch": 0.19365,
+      "grad_norm": 1.2448826266360082,
+      "learning_rate": 0.003,
+      "loss": 4.0233,
+      "step": 19365
+    },
+    {
+      "epoch": 0.19366,
+      "grad_norm": 1.387785717629832,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 19366
+    },
+    {
+      "epoch": 0.19367,
+      "grad_norm": 1.137768309523138,
+      "learning_rate": 0.003,
+      "loss": 4.0084,
+      "step": 19367
+    },
+    {
+      "epoch": 0.19368,
+      "grad_norm": 1.3985451664357438,
+      "learning_rate": 0.003,
+      "loss": 3.9969,
+      "step": 19368
+    },
+    {
+      "epoch": 0.19369,
+      "grad_norm": 1.0230222402651474,
+      "learning_rate": 0.003,
+      "loss": 3.998,
+      "step": 19369
+    },
+    {
+      "epoch": 0.1937,
+      "grad_norm": 1.7043342738804743,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 19370
+    },
+    {
+      "epoch": 0.19371,
+      "grad_norm": 1.070731980335974,
+      "learning_rate": 0.003,
+      "loss": 4.0061,
+      "step": 19371
+    },
+    {
+      "epoch": 0.19372,
+      "grad_norm": 1.5989963870136723,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 19372
+    },
+    {
+      "epoch": 0.19373,
+      "grad_norm": 1.0894608289672285,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 19373
+    },
+    {
+      "epoch": 0.19374,
+      "grad_norm": 1.3492437278162377,
+      "learning_rate": 0.003,
+      "loss": 3.9949,
+      "step": 19374
+    },
+    {
+      "epoch": 0.19375,
+      "grad_norm": 1.1630825670623925,
+      "learning_rate": 0.003,
+      "loss": 4.0237,
+      "step": 19375
+    },
+    {
+      "epoch": 0.19376,
+      "grad_norm": 1.1985445170942122,
+      "learning_rate": 0.003,
+      "loss": 4.0123,
+      "step": 19376
+    },
+    {
+      "epoch": 0.19377,
+      "grad_norm": 1.431249364617067,
+      "learning_rate": 0.003,
+      "loss": 4.0306,
+      "step": 19377
+    },
+    {
+      "epoch": 0.19378,
+      "grad_norm": 1.1379792044498334,
+      "learning_rate": 0.003,
+      "loss": 4.0419,
+      "step": 19378
+    },
+    {
+      "epoch": 0.19379,
+      "grad_norm": 1.2705943922095848,
+      "learning_rate": 0.003,
+      "loss": 3.9849,
+      "step": 19379
+    },
+    {
+      "epoch": 0.1938,
+      "grad_norm": 1.3692326187936688,
+      "learning_rate": 0.003,
+      "loss": 4.0269,
+      "step": 19380
+    },
+    {
+      "epoch": 0.19381,
+      "grad_norm": 1.3105594531707963,
+      "learning_rate": 0.003,
+      "loss": 4.0081,
+      "step": 19381
+    },
+    {
+      "epoch": 0.19382,
+      "grad_norm": 1.2756647051736087,
+      "learning_rate": 0.003,
+      "loss": 4.0336,
+      "step": 19382
+    },
+    {
+      "epoch": 0.19383,
+      "grad_norm": 1.203834054031465,
+      "learning_rate": 0.003,
+      "loss": 3.9896,
+      "step": 19383
+    },
+    {
+      "epoch": 0.19384,
+      "grad_norm": 1.3869501534613795,
+      "learning_rate": 0.003,
+      "loss": 4.0512,
+      "step": 19384
+    },
+    {
+      "epoch": 0.19385,
+      "grad_norm": 1.3912330577028547,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 19385
+    },
+    {
+      "epoch": 0.19386,
+      "grad_norm": 1.3352454715041207,
+      "learning_rate": 0.003,
+      "loss": 3.9818,
+      "step": 19386
+    },
+    {
+      "epoch": 0.19387,
+      "grad_norm": 1.1876484651274395,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 19387
+    },
+    {
+      "epoch": 0.19388,
+      "grad_norm": 1.3867399137431806,
+      "learning_rate": 0.003,
+      "loss": 4.0113,
+      "step": 19388
+    },
+    {
+      "epoch": 0.19389,
+      "grad_norm": 1.3255095596981117,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 19389
+    },
+    {
+      "epoch": 0.1939,
+      "grad_norm": 1.137809374341177,
+      "learning_rate": 0.003,
+      "loss": 4.0187,
+      "step": 19390
+    },
+    {
+      "epoch": 0.19391,
+      "grad_norm": 1.2396201656564152,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 19391
+    },
+    {
+      "epoch": 0.19392,
+      "grad_norm": 1.3626290110646824,
+      "learning_rate": 0.003,
+      "loss": 4.0073,
+      "step": 19392
+    },
+    {
+      "epoch": 0.19393,
+      "grad_norm": 1.6009243795085146,
+      "learning_rate": 0.003,
+      "loss": 3.9989,
+      "step": 19393
+    },
+    {
+      "epoch": 0.19394,
+      "grad_norm": 1.0666235314128802,
+      "learning_rate": 0.003,
+      "loss": 3.994,
+      "step": 19394
+    },
+    {
+      "epoch": 0.19395,
+      "grad_norm": 1.4558412032362964,
+      "learning_rate": 0.003,
+      "loss": 4.0072,
+      "step": 19395
+    },
+    {
+      "epoch": 0.19396,
+      "grad_norm": 1.0651915583472482,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 19396
+    },
+    {
+      "epoch": 0.19397,
+      "grad_norm": 1.376489065756985,
+      "learning_rate": 0.003,
+      "loss": 4.0181,
+      "step": 19397
+    },
+    {
+      "epoch": 0.19398,
+      "grad_norm": 1.0967506436851027,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 19398
+    },
+    {
+      "epoch": 0.19399,
+      "grad_norm": 1.4940525783469398,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 19399
+    },
+    {
+      "epoch": 0.194,
+      "grad_norm": 1.1738583514745011,
+      "learning_rate": 0.003,
+      "loss": 4.0116,
+      "step": 19400
+    },
+    {
+      "epoch": 0.19401,
+      "grad_norm": 1.3791785058792716,
+      "learning_rate": 0.003,
+      "loss": 4.0025,
+      "step": 19401
+    },
+    {
+      "epoch": 0.19402,
+      "grad_norm": 1.520343186080021,
+      "learning_rate": 0.003,
+      "loss": 4.0068,
+      "step": 19402
+    },
+    {
+      "epoch": 0.19403,
+      "grad_norm": 1.0447586867419312,
+      "learning_rate": 0.003,
+      "loss": 3.9988,
+      "step": 19403
+    },
+    {
+      "epoch": 0.19404,
+      "grad_norm": 1.3830076366544406,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 19404
+    },
+    {
+      "epoch": 0.19405,
+      "grad_norm": 1.1849306914145938,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 19405
+    },
+    {
+      "epoch": 0.19406,
+      "grad_norm": 1.3627366781248778,
+      "learning_rate": 0.003,
+      "loss": 3.9826,
+      "step": 19406
+    },
+    {
+      "epoch": 0.19407,
+      "grad_norm": 1.2633007026106786,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 19407
+    },
+    {
+      "epoch": 0.19408,
+      "grad_norm": 1.4176166967200712,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 19408
+    },
+    {
+      "epoch": 0.19409,
+      "grad_norm": 1.1810542121198504,
+      "learning_rate": 0.003,
+      "loss": 4.0207,
+      "step": 19409
+    },
+    {
+      "epoch": 0.1941,
+      "grad_norm": 1.5117743625189217,
+      "learning_rate": 0.003,
+      "loss": 4.0306,
+      "step": 19410
+    },
+    {
+      "epoch": 0.19411,
+      "grad_norm": 1.1576092758714918,
+      "learning_rate": 0.003,
+      "loss": 4.0301,
+      "step": 19411
+    },
+    {
+      "epoch": 0.19412,
+      "grad_norm": 1.4484549629758336,
+      "learning_rate": 0.003,
+      "loss": 3.9979,
+      "step": 19412
+    },
+    {
+      "epoch": 0.19413,
+      "grad_norm": 1.2382917950572578,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 19413
+    },
+    {
+      "epoch": 0.19414,
+      "grad_norm": 1.287689718271487,
+      "learning_rate": 0.003,
+      "loss": 3.992,
+      "step": 19414
+    },
+    {
+      "epoch": 0.19415,
+      "grad_norm": 1.0587472430188611,
+      "learning_rate": 0.003,
+      "loss": 3.9702,
+      "step": 19415
+    },
+    {
+      "epoch": 0.19416,
+      "grad_norm": 1.3551473074839804,
+      "learning_rate": 0.003,
+      "loss": 4.0036,
+      "step": 19416
+    },
+    {
+      "epoch": 0.19417,
+      "grad_norm": 1.2534330169306258,
+      "learning_rate": 0.003,
+      "loss": 4.028,
+      "step": 19417
+    },
+    {
+      "epoch": 0.19418,
+      "grad_norm": 1.474397280636809,
+      "learning_rate": 0.003,
+      "loss": 4.0003,
+      "step": 19418
+    },
+    {
+      "epoch": 0.19419,
+      "grad_norm": 1.2670921396395363,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 19419
+    },
+    {
+      "epoch": 0.1942,
+      "grad_norm": 1.261683865245845,
+      "learning_rate": 0.003,
+      "loss": 3.9902,
+      "step": 19420
+    },
+    {
+      "epoch": 0.19421,
+      "grad_norm": 1.3606329519913305,
+      "learning_rate": 0.003,
+      "loss": 4.0,
+      "step": 19421
+    },
+    {
+      "epoch": 0.19422,
+      "grad_norm": 1.1639836108705277,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 19422
+    },
+    {
+      "epoch": 0.19423,
+      "grad_norm": 1.3157400088902118,
+      "learning_rate": 0.003,
+      "loss": 4.0204,
+      "step": 19423
+    },
+    {
+      "epoch": 0.19424,
+      "grad_norm": 1.0331274223736815,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 19424
+    },
+    {
+      "epoch": 0.19425,
+      "grad_norm": 1.601486387553494,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 19425
+    },
+    {
+      "epoch": 0.19426,
+      "grad_norm": 1.3198023630087965,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 19426
+    },
+    {
+      "epoch": 0.19427,
+      "grad_norm": 1.2751671555010344,
+      "learning_rate": 0.003,
+      "loss": 4.0032,
+      "step": 19427
+    },
+    {
+      "epoch": 0.19428,
+      "grad_norm": 1.096412697272269,
+      "learning_rate": 0.003,
+      "loss": 3.9935,
+      "step": 19428
+    },
+    {
+      "epoch": 0.19429,
+      "grad_norm": 1.3770440911365562,
+      "learning_rate": 0.003,
+      "loss": 4.0236,
+      "step": 19429
+    },
+    {
+      "epoch": 0.1943,
+      "grad_norm": 1.2006570626184592,
+      "learning_rate": 0.003,
+      "loss": 4.0332,
+      "step": 19430
+    },
+    {
+      "epoch": 0.19431,
+      "grad_norm": 1.3108382106025203,
+      "learning_rate": 0.003,
+      "loss": 3.9786,
+      "step": 19431
+    },
+    {
+      "epoch": 0.19432,
+      "grad_norm": 1.051591616589315,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 19432
+    },
+    {
+      "epoch": 0.19433,
+      "grad_norm": 1.4436358146578554,
+      "learning_rate": 0.003,
+      "loss": 4.012,
+      "step": 19433
+    },
+    {
+      "epoch": 0.19434,
+      "grad_norm": 1.2370339867054645,
+      "learning_rate": 0.003,
+      "loss": 4.0257,
+      "step": 19434
+    },
+    {
+      "epoch": 0.19435,
+      "grad_norm": 1.6023018189894631,
+      "learning_rate": 0.003,
+      "loss": 4.0322,
+      "step": 19435
+    },
+    {
+      "epoch": 0.19436,
+      "grad_norm": 1.091354616003643,
+      "learning_rate": 0.003,
+      "loss": 4.0294,
+      "step": 19436
+    },
+    {
+      "epoch": 0.19437,
+      "grad_norm": 1.388347882481318,
+      "learning_rate": 0.003,
+      "loss": 3.9977,
+      "step": 19437
+    },
+    {
+      "epoch": 0.19438,
+      "grad_norm": 1.327826102133524,
+      "learning_rate": 0.003,
+      "loss": 4.0337,
+      "step": 19438
+    },
+    {
+      "epoch": 0.19439,
+      "grad_norm": 1.296743159961658,
+      "learning_rate": 0.003,
+      "loss": 4.0278,
+      "step": 19439
+    },
+    {
+      "epoch": 0.1944,
+      "grad_norm": 1.2561636564981153,
+      "learning_rate": 0.003,
+      "loss": 4.0408,
+      "step": 19440
+    },
+    {
+      "epoch": 0.19441,
+      "grad_norm": 1.0477855132903582,
+      "learning_rate": 0.003,
+      "loss": 4.0363,
+      "step": 19441
+    },
+    {
+      "epoch": 0.19442,
+      "grad_norm": 1.4114023473692943,
+      "learning_rate": 0.003,
+      "loss": 4.0224,
+      "step": 19442
+    },
+    {
+      "epoch": 0.19443,
+      "grad_norm": 1.0783518422238263,
+      "learning_rate": 0.003,
+      "loss": 4.0305,
+      "step": 19443
+    },
+    {
+      "epoch": 0.19444,
+      "grad_norm": 1.452854020825635,
+      "learning_rate": 0.003,
+      "loss": 3.998,
+      "step": 19444
+    },
+    {
+      "epoch": 0.19445,
+      "grad_norm": 1.034270639276834,
+      "learning_rate": 0.003,
+      "loss": 4.0216,
+      "step": 19445
+    },
+    {
+      "epoch": 0.19446,
+      "grad_norm": 1.4788260212438489,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 19446
+    },
+    {
+      "epoch": 0.19447,
+      "grad_norm": 1.172745960623489,
+      "learning_rate": 0.003,
+      "loss": 3.9975,
+      "step": 19447
+    },
+    {
+      "epoch": 0.19448,
+      "grad_norm": 1.456796198625019,
+      "learning_rate": 0.003,
+      "loss": 4.0026,
+      "step": 19448
+    },
+    {
+      "epoch": 0.19449,
+      "grad_norm": 0.968685063971965,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 19449
+    },
+    {
+      "epoch": 0.1945,
+      "grad_norm": 1.1545753508504955,
+      "learning_rate": 0.003,
+      "loss": 3.9931,
+      "step": 19450
+    },
+    {
+      "epoch": 0.19451,
+      "grad_norm": 1.3506025686160945,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 19451
+    },
+    {
+      "epoch": 0.19452,
+      "grad_norm": 1.0894548589385062,
+      "learning_rate": 0.003,
+      "loss": 3.9826,
+      "step": 19452
+    },
+    {
+      "epoch": 0.19453,
+      "grad_norm": 1.417194799818832,
+      "learning_rate": 0.003,
+      "loss": 4.0294,
+      "step": 19453
+    },
+    {
+      "epoch": 0.19454,
+      "grad_norm": 1.1388475162873184,
+      "learning_rate": 0.003,
+      "loss": 4.0349,
+      "step": 19454
+    },
+    {
+      "epoch": 0.19455,
+      "grad_norm": 1.4668464819049523,
+      "learning_rate": 0.003,
+      "loss": 4.0302,
+      "step": 19455
+    },
+    {
+      "epoch": 0.19456,
+      "grad_norm": 1.4128658565436514,
+      "learning_rate": 0.003,
+      "loss": 4.0469,
+      "step": 19456
+    },
+    {
+      "epoch": 0.19457,
+      "grad_norm": 1.288144338755479,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 19457
+    },
+    {
+      "epoch": 0.19458,
+      "grad_norm": 1.0450096230118142,
+      "learning_rate": 0.003,
+      "loss": 4.0121,
+      "step": 19458
+    },
+    {
+      "epoch": 0.19459,
+      "grad_norm": 1.2906418592733522,
+      "learning_rate": 0.003,
+      "loss": 4.0151,
+      "step": 19459
+    },
+    {
+      "epoch": 0.1946,
+      "grad_norm": 1.283352834689703,
+      "learning_rate": 0.003,
+      "loss": 3.9955,
+      "step": 19460
+    },
+    {
+      "epoch": 0.19461,
+      "grad_norm": 1.1838378075547533,
+      "learning_rate": 0.003,
+      "loss": 4.0574,
+      "step": 19461
+    },
+    {
+      "epoch": 0.19462,
+      "grad_norm": 1.426856749707794,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 19462
+    },
+    {
+      "epoch": 0.19463,
+      "grad_norm": 1.1978343329083327,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 19463
+    },
+    {
+      "epoch": 0.19464,
+      "grad_norm": 1.2623076086111769,
+      "learning_rate": 0.003,
+      "loss": 3.9813,
+      "step": 19464
+    },
+    {
+      "epoch": 0.19465,
+      "grad_norm": 1.1432289384815764,
+      "learning_rate": 0.003,
+      "loss": 4.0073,
+      "step": 19465
+    },
+    {
+      "epoch": 0.19466,
+      "grad_norm": 1.4447116352089309,
+      "learning_rate": 0.003,
+      "loss": 4.0254,
+      "step": 19466
+    },
+    {
+      "epoch": 0.19467,
+      "grad_norm": 1.0772318434853418,
+      "learning_rate": 0.003,
+      "loss": 4.024,
+      "step": 19467
+    },
+    {
+      "epoch": 0.19468,
+      "grad_norm": 1.417077676729842,
+      "learning_rate": 0.003,
+      "loss": 3.9963,
+      "step": 19468
+    },
+    {
+      "epoch": 0.19469,
+      "grad_norm": 1.3498797179824242,
+      "learning_rate": 0.003,
+      "loss": 4.0057,
+      "step": 19469
+    },
+    {
+      "epoch": 0.1947,
+      "grad_norm": 1.2332786951762587,
+      "learning_rate": 0.003,
+      "loss": 4.0172,
+      "step": 19470
+    },
+    {
+      "epoch": 0.19471,
+      "grad_norm": 1.2910709164082321,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 19471
+    },
+    {
+      "epoch": 0.19472,
+      "grad_norm": 1.1950076786036183,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 19472
+    },
+    {
+      "epoch": 0.19473,
+      "grad_norm": 1.6243832865525263,
+      "learning_rate": 0.003,
+      "loss": 4.015,
+      "step": 19473
+    },
+    {
+      "epoch": 0.19474,
+      "grad_norm": 1.199166522064954,
+      "learning_rate": 0.003,
+      "loss": 4.0196,
+      "step": 19474
+    },
+    {
+      "epoch": 0.19475,
+      "grad_norm": 1.3810708306150938,
+      "learning_rate": 0.003,
+      "loss": 4.0129,
+      "step": 19475
+    },
+    {
+      "epoch": 0.19476,
+      "grad_norm": 1.0789119449425273,
+      "learning_rate": 0.003,
+      "loss": 3.9983,
+      "step": 19476
+    },
+    {
+      "epoch": 0.19477,
+      "grad_norm": 1.4210291483836852,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 19477
+    },
+    {
+      "epoch": 0.19478,
+      "grad_norm": 1.1774463036805667,
+      "learning_rate": 0.003,
+      "loss": 3.9868,
+      "step": 19478
+    },
+    {
+      "epoch": 0.19479,
+      "grad_norm": 1.2760203007419755,
+      "learning_rate": 0.003,
+      "loss": 4.0019,
+      "step": 19479
+    },
+    {
+      "epoch": 0.1948,
+      "grad_norm": 1.272221077500427,
+      "learning_rate": 0.003,
+      "loss": 4.012,
+      "step": 19480
+    },
+    {
+      "epoch": 0.19481,
+      "grad_norm": 1.4831111201304013,
+      "learning_rate": 0.003,
+      "loss": 4.0277,
+      "step": 19481
+    },
+    {
+      "epoch": 0.19482,
+      "grad_norm": 1.231924637888509,
+      "learning_rate": 0.003,
+      "loss": 4.0198,
+      "step": 19482
+    },
+    {
+      "epoch": 0.19483,
+      "grad_norm": 1.2682120665371375,
+      "learning_rate": 0.003,
+      "loss": 3.9922,
+      "step": 19483
+    },
+    {
+      "epoch": 0.19484,
+      "grad_norm": 1.091269387716183,
+      "learning_rate": 0.003,
+      "loss": 4.0119,
+      "step": 19484
+    },
+    {
+      "epoch": 0.19485,
+      "grad_norm": 1.5753156246444868,
+      "learning_rate": 0.003,
+      "loss": 4.0242,
+      "step": 19485
+    },
+    {
+      "epoch": 0.19486,
+      "grad_norm": 1.2367684483937205,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 19486
+    },
+    {
+      "epoch": 0.19487,
+      "grad_norm": 1.5188014123349969,
+      "learning_rate": 0.003,
+      "loss": 4.03,
+      "step": 19487
+    },
+    {
+      "epoch": 0.19488,
+      "grad_norm": 1.3107252189477876,
+      "learning_rate": 0.003,
+      "loss": 3.9968,
+      "step": 19488
+    },
+    {
+      "epoch": 0.19489,
+      "grad_norm": 1.312658990254798,
+      "learning_rate": 0.003,
+      "loss": 4.0112,
+      "step": 19489
+    },
+    {
+      "epoch": 0.1949,
+      "grad_norm": 1.2109782661315618,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 19490
+    },
+    {
+      "epoch": 0.19491,
+      "grad_norm": 1.2817543758987004,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 19491
+    },
+    {
+      "epoch": 0.19492,
+      "grad_norm": 1.0785274848407513,
+      "learning_rate": 0.003,
+      "loss": 4.0333,
+      "step": 19492
+    },
+    {
+      "epoch": 0.19493,
+      "grad_norm": 1.49230015973104,
+      "learning_rate": 0.003,
+      "loss": 4.0121,
+      "step": 19493
+    },
+    {
+      "epoch": 0.19494,
+      "grad_norm": 1.1274168817705166,
+      "learning_rate": 0.003,
+      "loss": 4.0212,
+      "step": 19494
+    },
+    {
+      "epoch": 0.19495,
+      "grad_norm": 1.294063969812743,
+      "learning_rate": 0.003,
+      "loss": 3.9895,
+      "step": 19495
+    },
+    {
+      "epoch": 0.19496,
+      "grad_norm": 1.1613531675105446,
+      "learning_rate": 0.003,
+      "loss": 3.9957,
+      "step": 19496
+    },
+    {
+      "epoch": 0.19497,
+      "grad_norm": 1.4862255481330564,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 19497
+    },
+    {
+      "epoch": 0.19498,
+      "grad_norm": 0.9498358646092864,
+      "learning_rate": 0.003,
+      "loss": 3.9853,
+      "step": 19498
+    },
+    {
+      "epoch": 0.19499,
+      "grad_norm": 1.3562965438540844,
+      "learning_rate": 0.003,
+      "loss": 4.0281,
+      "step": 19499
+    },
+    {
+      "epoch": 0.195,
+      "grad_norm": 1.5507913360404775,
+      "learning_rate": 0.003,
+      "loss": 4.0244,
+      "step": 19500
+    },
+    {
+      "epoch": 0.19501,
+      "grad_norm": 1.183882132984025,
+      "learning_rate": 0.003,
+      "loss": 4.0366,
+      "step": 19501
+    },
+    {
+      "epoch": 0.19502,
+      "grad_norm": 1.2141130634117046,
+      "learning_rate": 0.003,
+      "loss": 4.0257,
+      "step": 19502
+    },
+    {
+      "epoch": 0.19503,
+      "grad_norm": 1.2572723183943078,
+      "learning_rate": 0.003,
+      "loss": 3.9895,
+      "step": 19503
+    },
+    {
+      "epoch": 0.19504,
+      "grad_norm": 1.2292496953786687,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 19504
+    },
+    {
+      "epoch": 0.19505,
+      "grad_norm": 1.2718207352592203,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 19505
+    },
+    {
+      "epoch": 0.19506,
+      "grad_norm": 1.3579420087373197,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 19506
+    },
+    {
+      "epoch": 0.19507,
+      "grad_norm": 1.3923571093801126,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 19507
+    },
+    {
+      "epoch": 0.19508,
+      "grad_norm": 1.3084970028161689,
+      "learning_rate": 0.003,
+      "loss": 4.0014,
+      "step": 19508
+    },
+    {
+      "epoch": 0.19509,
+      "grad_norm": 1.1574123956711007,
+      "learning_rate": 0.003,
+      "loss": 4.0272,
+      "step": 19509
+    },
+    {
+      "epoch": 0.1951,
+      "grad_norm": 1.0908937677264265,
+      "learning_rate": 0.003,
+      "loss": 4.0065,
+      "step": 19510
+    },
+    {
+      "epoch": 0.19511,
+      "grad_norm": 1.4820847363616818,
+      "learning_rate": 0.003,
+      "loss": 3.9977,
+      "step": 19511
+    },
+    {
+      "epoch": 0.19512,
+      "grad_norm": 1.1462973133902605,
+      "learning_rate": 0.003,
+      "loss": 4.0207,
+      "step": 19512
+    },
+    {
+      "epoch": 0.19513,
+      "grad_norm": 1.4076582536134032,
+      "learning_rate": 0.003,
+      "loss": 4.0324,
+      "step": 19513
+    },
+    {
+      "epoch": 0.19514,
+      "grad_norm": 1.23218979726053,
+      "learning_rate": 0.003,
+      "loss": 4.0467,
+      "step": 19514
+    },
+    {
+      "epoch": 0.19515,
+      "grad_norm": 1.2492087346730791,
+      "learning_rate": 0.003,
+      "loss": 4.0078,
+      "step": 19515
+    },
+    {
+      "epoch": 0.19516,
+      "grad_norm": 1.5677034476508886,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 19516
+    },
+    {
+      "epoch": 0.19517,
+      "grad_norm": 1.2090457606486258,
+      "learning_rate": 0.003,
+      "loss": 4.0014,
+      "step": 19517
+    },
+    {
+      "epoch": 0.19518,
+      "grad_norm": 1.4422839115762187,
+      "learning_rate": 0.003,
+      "loss": 4.0209,
+      "step": 19518
+    },
+    {
+      "epoch": 0.19519,
+      "grad_norm": 1.439118242744385,
+      "learning_rate": 0.003,
+      "loss": 4.0188,
+      "step": 19519
+    },
+    {
+      "epoch": 0.1952,
+      "grad_norm": 1.0080150453423273,
+      "learning_rate": 0.003,
+      "loss": 4.0284,
+      "step": 19520
+    },
+    {
+      "epoch": 0.19521,
+      "grad_norm": 1.4127806825839508,
+      "learning_rate": 0.003,
+      "loss": 3.9982,
+      "step": 19521
+    },
+    {
+      "epoch": 0.19522,
+      "grad_norm": 1.17244703093169,
+      "learning_rate": 0.003,
+      "loss": 4.0398,
+      "step": 19522
+    },
+    {
+      "epoch": 0.19523,
+      "grad_norm": 1.4409740804434454,
+      "learning_rate": 0.003,
+      "loss": 4.0247,
+      "step": 19523
+    },
+    {
+      "epoch": 0.19524,
+      "grad_norm": 1.1664189115774244,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 19524
+    },
+    {
+      "epoch": 0.19525,
+      "grad_norm": 1.6874770421735905,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 19525
+    },
+    {
+      "epoch": 0.19526,
+      "grad_norm": 1.1236396269361812,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 19526
+    },
+    {
+      "epoch": 0.19527,
+      "grad_norm": 1.3889264447743475,
+      "learning_rate": 0.003,
+      "loss": 4.0224,
+      "step": 19527
+    },
+    {
+      "epoch": 0.19528,
+      "grad_norm": 1.4123307347374694,
+      "learning_rate": 0.003,
+      "loss": 4.0228,
+      "step": 19528
+    },
+    {
+      "epoch": 0.19529,
+      "grad_norm": 1.1181524243471284,
+      "learning_rate": 0.003,
+      "loss": 4.0031,
+      "step": 19529
+    },
+    {
+      "epoch": 0.1953,
+      "grad_norm": 1.3385918290292969,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 19530
+    },
+    {
+      "epoch": 0.19531,
+      "grad_norm": 1.143360306045542,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 19531
+    },
+    {
+      "epoch": 0.19532,
+      "grad_norm": 1.336237949476093,
+      "learning_rate": 0.003,
+      "loss": 4.052,
+      "step": 19532
+    },
+    {
+      "epoch": 0.19533,
+      "grad_norm": 1.3034818995577,
+      "learning_rate": 0.003,
+      "loss": 4.0302,
+      "step": 19533
+    },
+    {
+      "epoch": 0.19534,
+      "grad_norm": 1.2485436158887926,
+      "learning_rate": 0.003,
+      "loss": 4.0426,
+      "step": 19534
+    },
+    {
+      "epoch": 0.19535,
+      "grad_norm": 1.2178827255891311,
+      "learning_rate": 0.003,
+      "loss": 4.038,
+      "step": 19535
+    },
+    {
+      "epoch": 0.19536,
+      "grad_norm": 1.3523165354257414,
+      "learning_rate": 0.003,
+      "loss": 3.9829,
+      "step": 19536
+    },
+    {
+      "epoch": 0.19537,
+      "grad_norm": 1.1954964202285772,
+      "learning_rate": 0.003,
+      "loss": 3.9937,
+      "step": 19537
+    },
+    {
+      "epoch": 0.19538,
+      "grad_norm": 1.1470012479877092,
+      "learning_rate": 0.003,
+      "loss": 4.0228,
+      "step": 19538
+    },
+    {
+      "epoch": 0.19539,
+      "grad_norm": 1.11420642932526,
+      "learning_rate": 0.003,
+      "loss": 4.0129,
+      "step": 19539
+    },
+    {
+      "epoch": 0.1954,
+      "grad_norm": 1.35877630360642,
+      "learning_rate": 0.003,
+      "loss": 4.005,
+      "step": 19540
+    },
+    {
+      "epoch": 0.19541,
+      "grad_norm": 1.389067571359014,
+      "learning_rate": 0.003,
+      "loss": 4.0213,
+      "step": 19541
+    },
+    {
+      "epoch": 0.19542,
+      "grad_norm": 1.141529457657941,
+      "learning_rate": 0.003,
+      "loss": 4.0328,
+      "step": 19542
+    },
+    {
+      "epoch": 0.19543,
+      "grad_norm": 1.6344048750222944,
+      "learning_rate": 0.003,
+      "loss": 4.045,
+      "step": 19543
+    },
+    {
+      "epoch": 0.19544,
+      "grad_norm": 1.2425178251490452,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 19544
+    },
+    {
+      "epoch": 0.19545,
+      "grad_norm": 1.4377230036618,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 19545
+    },
+    {
+      "epoch": 0.19546,
+      "grad_norm": 1.02911826421773,
+      "learning_rate": 0.003,
+      "loss": 4.0154,
+      "step": 19546
+    },
+    {
+      "epoch": 0.19547,
+      "grad_norm": 1.277757486896775,
+      "learning_rate": 0.003,
+      "loss": 4.0187,
+      "step": 19547
+    },
+    {
+      "epoch": 0.19548,
+      "grad_norm": 1.1492132696514115,
+      "learning_rate": 0.003,
+      "loss": 4.0214,
+      "step": 19548
+    },
+    {
+      "epoch": 0.19549,
+      "grad_norm": 1.4221840697860018,
+      "learning_rate": 0.003,
+      "loss": 4.0074,
+      "step": 19549
+    },
+    {
+      "epoch": 0.1955,
+      "grad_norm": 1.3483323187017007,
+      "learning_rate": 0.003,
+      "loss": 3.9885,
+      "step": 19550
+    },
+    {
+      "epoch": 0.19551,
+      "grad_norm": 1.3223846988932462,
+      "learning_rate": 0.003,
+      "loss": 4.038,
+      "step": 19551
+    },
+    {
+      "epoch": 0.19552,
+      "grad_norm": 1.2219483120443844,
+      "learning_rate": 0.003,
+      "loss": 3.9904,
+      "step": 19552
+    },
+    {
+      "epoch": 0.19553,
+      "grad_norm": 1.187620882759245,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 19553
+    },
+    {
+      "epoch": 0.19554,
+      "grad_norm": 1.3828814961961264,
+      "learning_rate": 0.003,
+      "loss": 3.9743,
+      "step": 19554
+    },
+    {
+      "epoch": 0.19555,
+      "grad_norm": 1.3160227267020888,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 19555
+    },
+    {
+      "epoch": 0.19556,
+      "grad_norm": 1.5348112101779594,
+      "learning_rate": 0.003,
+      "loss": 3.9919,
+      "step": 19556
+    },
+    {
+      "epoch": 0.19557,
+      "grad_norm": 1.0915727764087106,
+      "learning_rate": 0.003,
+      "loss": 4.0191,
+      "step": 19557
+    },
+    {
+      "epoch": 0.19558,
+      "grad_norm": 1.478346273700097,
+      "learning_rate": 0.003,
+      "loss": 4.001,
+      "step": 19558
+    },
+    {
+      "epoch": 0.19559,
+      "grad_norm": 1.0891766669740826,
+      "learning_rate": 0.003,
+      "loss": 4.0051,
+      "step": 19559
+    },
+    {
+      "epoch": 0.1956,
+      "grad_norm": 1.352084652289617,
+      "learning_rate": 0.003,
+      "loss": 4.0393,
+      "step": 19560
+    },
+    {
+      "epoch": 0.19561,
+      "grad_norm": 1.339878003092702,
+      "learning_rate": 0.003,
+      "loss": 4.0174,
+      "step": 19561
+    },
+    {
+      "epoch": 0.19562,
+      "grad_norm": 1.4612262118494712,
+      "learning_rate": 0.003,
+      "loss": 3.987,
+      "step": 19562
+    },
+    {
+      "epoch": 0.19563,
+      "grad_norm": 1.0957119063328127,
+      "learning_rate": 0.003,
+      "loss": 3.9957,
+      "step": 19563
+    },
+    {
+      "epoch": 0.19564,
+      "grad_norm": 1.391554862001127,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 19564
+    },
+    {
+      "epoch": 0.19565,
+      "grad_norm": 1.16210771029558,
+      "learning_rate": 0.003,
+      "loss": 3.9818,
+      "step": 19565
+    },
+    {
+      "epoch": 0.19566,
+      "grad_norm": 1.5541605775432796,
+      "learning_rate": 0.003,
+      "loss": 4.0236,
+      "step": 19566
+    },
+    {
+      "epoch": 0.19567,
+      "grad_norm": 0.9834935966276703,
+      "learning_rate": 0.003,
+      "loss": 4.0011,
+      "step": 19567
+    },
+    {
+      "epoch": 0.19568,
+      "grad_norm": 1.396728948759227,
+      "learning_rate": 0.003,
+      "loss": 4.0122,
+      "step": 19568
+    },
+    {
+      "epoch": 0.19569,
+      "grad_norm": 1.190996160721869,
+      "learning_rate": 0.003,
+      "loss": 4.0113,
+      "step": 19569
+    },
+    {
+      "epoch": 0.1957,
+      "grad_norm": 1.2895271476321848,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 19570
+    },
+    {
+      "epoch": 0.19571,
+      "grad_norm": 1.2245583691103334,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 19571
+    },
+    {
+      "epoch": 0.19572,
+      "grad_norm": 1.443414821983181,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 19572
+    },
+    {
+      "epoch": 0.19573,
+      "grad_norm": 1.10713646564787,
+      "learning_rate": 0.003,
+      "loss": 3.9957,
+      "step": 19573
+    },
+    {
+      "epoch": 0.19574,
+      "grad_norm": 1.6154382685828403,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 19574
+    },
+    {
+      "epoch": 0.19575,
+      "grad_norm": 0.8588266587571094,
+      "learning_rate": 0.003,
+      "loss": 4.0338,
+      "step": 19575
+    },
+    {
+      "epoch": 0.19576,
+      "grad_norm": 1.3599440401283418,
+      "learning_rate": 0.003,
+      "loss": 4.0043,
+      "step": 19576
+    },
+    {
+      "epoch": 0.19577,
+      "grad_norm": 1.0761054891917938,
+      "learning_rate": 0.003,
+      "loss": 4.0258,
+      "step": 19577
+    },
+    {
+      "epoch": 0.19578,
+      "grad_norm": 1.546996170628607,
+      "learning_rate": 0.003,
+      "loss": 4.0126,
+      "step": 19578
+    },
+    {
+      "epoch": 0.19579,
+      "grad_norm": 1.1745159544637447,
+      "learning_rate": 0.003,
+      "loss": 4.0167,
+      "step": 19579
+    },
+    {
+      "epoch": 0.1958,
+      "grad_norm": 1.4089864607124443,
+      "learning_rate": 0.003,
+      "loss": 4.0267,
+      "step": 19580
+    },
+    {
+      "epoch": 0.19581,
+      "grad_norm": 1.3387280635367134,
+      "learning_rate": 0.003,
+      "loss": 3.9733,
+      "step": 19581
+    },
+    {
+      "epoch": 0.19582,
+      "grad_norm": 1.239627093187696,
+      "learning_rate": 0.003,
+      "loss": 4.0289,
+      "step": 19582
+    },
+    {
+      "epoch": 0.19583,
+      "grad_norm": 1.2503241327084642,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 19583
+    },
+    {
+      "epoch": 0.19584,
+      "grad_norm": 1.3583129441806734,
+      "learning_rate": 0.003,
+      "loss": 4.0142,
+      "step": 19584
+    },
+    {
+      "epoch": 0.19585,
+      "grad_norm": 1.2481769830561873,
+      "learning_rate": 0.003,
+      "loss": 4.0036,
+      "step": 19585
+    },
+    {
+      "epoch": 0.19586,
+      "grad_norm": 1.230808044125073,
+      "learning_rate": 0.003,
+      "loss": 3.998,
+      "step": 19586
+    },
+    {
+      "epoch": 0.19587,
+      "grad_norm": 1.1961512576689082,
+      "learning_rate": 0.003,
+      "loss": 4.0269,
+      "step": 19587
+    },
+    {
+      "epoch": 0.19588,
+      "grad_norm": 1.2852322339234126,
+      "learning_rate": 0.003,
+      "loss": 3.9935,
+      "step": 19588
+    },
+    {
+      "epoch": 0.19589,
+      "grad_norm": 1.221811645449711,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 19589
+    },
+    {
+      "epoch": 0.1959,
+      "grad_norm": 1.0076240783056,
+      "learning_rate": 0.003,
+      "loss": 4.0255,
+      "step": 19590
+    },
+    {
+      "epoch": 0.19591,
+      "grad_norm": 1.4999150829988763,
+      "learning_rate": 0.003,
+      "loss": 4.0345,
+      "step": 19591
+    },
+    {
+      "epoch": 0.19592,
+      "grad_norm": 1.0640297851987874,
+      "learning_rate": 0.003,
+      "loss": 4.0218,
+      "step": 19592
+    },
+    {
+      "epoch": 0.19593,
+      "grad_norm": 1.7607148010155502,
+      "learning_rate": 0.003,
+      "loss": 4.0279,
+      "step": 19593
+    },
+    {
+      "epoch": 0.19594,
+      "grad_norm": 1.0672595416634916,
+      "learning_rate": 0.003,
+      "loss": 4.0344,
+      "step": 19594
+    },
+    {
+      "epoch": 0.19595,
+      "grad_norm": 1.5094645316895097,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 19595
+    },
+    {
+      "epoch": 0.19596,
+      "grad_norm": 1.1542637171421024,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 19596
+    },
+    {
+      "epoch": 0.19597,
+      "grad_norm": 1.4817112348416852,
+      "learning_rate": 0.003,
+      "loss": 4.0433,
+      "step": 19597
+    },
+    {
+      "epoch": 0.19598,
+      "grad_norm": 1.5229437193511899,
+      "learning_rate": 0.003,
+      "loss": 4.0075,
+      "step": 19598
+    },
+    {
+      "epoch": 0.19599,
+      "grad_norm": 1.4775125288132893,
+      "learning_rate": 0.003,
+      "loss": 3.9963,
+      "step": 19599
+    },
+    {
+      "epoch": 0.196,
+      "grad_norm": 1.4068227524673214,
+      "learning_rate": 0.003,
+      "loss": 3.9933,
+      "step": 19600
+    },
+    {
+      "epoch": 0.19601,
+      "grad_norm": 1.0457705201931375,
+      "learning_rate": 0.003,
+      "loss": 3.9944,
+      "step": 19601
+    },
+    {
+      "epoch": 0.19602,
+      "grad_norm": 1.438750150083911,
+      "learning_rate": 0.003,
+      "loss": 3.9902,
+      "step": 19602
+    },
+    {
+      "epoch": 0.19603,
+      "grad_norm": 1.1199358294588768,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 19603
+    },
+    {
+      "epoch": 0.19604,
+      "grad_norm": 1.331506856494912,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 19604
+    },
+    {
+      "epoch": 0.19605,
+      "grad_norm": 0.9161761035355457,
+      "learning_rate": 0.003,
+      "loss": 3.973,
+      "step": 19605
+    },
+    {
+      "epoch": 0.19606,
+      "grad_norm": 1.1850879646854842,
+      "learning_rate": 0.003,
+      "loss": 3.9985,
+      "step": 19606
+    },
+    {
+      "epoch": 0.19607,
+      "grad_norm": 1.3699921119302942,
+      "learning_rate": 0.003,
+      "loss": 3.9876,
+      "step": 19607
+    },
+    {
+      "epoch": 0.19608,
+      "grad_norm": 1.1012663225800385,
+      "learning_rate": 0.003,
+      "loss": 3.9987,
+      "step": 19608
+    },
+    {
+      "epoch": 0.19609,
+      "grad_norm": 1.4283681482485215,
+      "learning_rate": 0.003,
+      "loss": 4.0237,
+      "step": 19609
+    },
+    {
+      "epoch": 0.1961,
+      "grad_norm": 1.1995755266403376,
+      "learning_rate": 0.003,
+      "loss": 4.0172,
+      "step": 19610
+    },
+    {
+      "epoch": 0.19611,
+      "grad_norm": 1.449352654340675,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 19611
+    },
+    {
+      "epoch": 0.19612,
+      "grad_norm": 1.5016703062251386,
+      "learning_rate": 0.003,
+      "loss": 4.0291,
+      "step": 19612
+    },
+    {
+      "epoch": 0.19613,
+      "grad_norm": 0.9960996752444851,
+      "learning_rate": 0.003,
+      "loss": 4.0232,
+      "step": 19613
+    },
+    {
+      "epoch": 0.19614,
+      "grad_norm": 1.5799498651019834,
+      "learning_rate": 0.003,
+      "loss": 3.9868,
+      "step": 19614
+    },
+    {
+      "epoch": 0.19615,
+      "grad_norm": 1.1853321445441805,
+      "learning_rate": 0.003,
+      "loss": 4.0025,
+      "step": 19615
+    },
+    {
+      "epoch": 0.19616,
+      "grad_norm": 1.3315549252697563,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 19616
+    },
+    {
+      "epoch": 0.19617,
+      "grad_norm": 1.2307249576946644,
+      "learning_rate": 0.003,
+      "loss": 4.042,
+      "step": 19617
+    },
+    {
+      "epoch": 0.19618,
+      "grad_norm": 1.429323889473967,
+      "learning_rate": 0.003,
+      "loss": 3.9988,
+      "step": 19618
+    },
+    {
+      "epoch": 0.19619,
+      "grad_norm": 1.1816190761169785,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 19619
+    },
+    {
+      "epoch": 0.1962,
+      "grad_norm": 1.3978859567182818,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 19620
+    },
+    {
+      "epoch": 0.19621,
+      "grad_norm": 1.1054501006023867,
+      "learning_rate": 0.003,
+      "loss": 3.9955,
+      "step": 19621
+    },
+    {
+      "epoch": 0.19622,
+      "grad_norm": 1.4621564700284058,
+      "learning_rate": 0.003,
+      "loss": 4.016,
+      "step": 19622
+    },
+    {
+      "epoch": 0.19623,
+      "grad_norm": 1.1942028969171785,
+      "learning_rate": 0.003,
+      "loss": 3.9981,
+      "step": 19623
+    },
+    {
+      "epoch": 0.19624,
+      "grad_norm": 1.4863189336805593,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 19624
+    },
+    {
+      "epoch": 0.19625,
+      "grad_norm": 1.1812195266751346,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 19625
+    },
+    {
+      "epoch": 0.19626,
+      "grad_norm": 1.3345435614808887,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 19626
+    },
+    {
+      "epoch": 0.19627,
+      "grad_norm": 1.1802296947128674,
+      "learning_rate": 0.003,
+      "loss": 4.0043,
+      "step": 19627
+    },
+    {
+      "epoch": 0.19628,
+      "grad_norm": 1.3100913634401963,
+      "learning_rate": 0.003,
+      "loss": 3.9956,
+      "step": 19628
+    },
+    {
+      "epoch": 0.19629,
+      "grad_norm": 1.3195804620991278,
+      "learning_rate": 0.003,
+      "loss": 4.003,
+      "step": 19629
+    },
+    {
+      "epoch": 0.1963,
+      "grad_norm": 1.3015224117549498,
+      "learning_rate": 0.003,
+      "loss": 3.9831,
+      "step": 19630
+    },
+    {
+      "epoch": 0.19631,
+      "grad_norm": 1.3990786649498785,
+      "learning_rate": 0.003,
+      "loss": 4.0241,
+      "step": 19631
+    },
+    {
+      "epoch": 0.19632,
+      "grad_norm": 1.2705942832047663,
+      "learning_rate": 0.003,
+      "loss": 4.0313,
+      "step": 19632
+    },
+    {
+      "epoch": 0.19633,
+      "grad_norm": 1.3318227756444863,
+      "learning_rate": 0.003,
+      "loss": 3.9957,
+      "step": 19633
+    },
+    {
+      "epoch": 0.19634,
+      "grad_norm": 1.358359097919518,
+      "learning_rate": 0.003,
+      "loss": 4.0348,
+      "step": 19634
+    },
+    {
+      "epoch": 0.19635,
+      "grad_norm": 1.3281336663671188,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 19635
+    },
+    {
+      "epoch": 0.19636,
+      "grad_norm": 1.1148668646915247,
+      "learning_rate": 0.003,
+      "loss": 4.0263,
+      "step": 19636
+    },
+    {
+      "epoch": 0.19637,
+      "grad_norm": 1.3268689508958573,
+      "learning_rate": 0.003,
+      "loss": 3.9849,
+      "step": 19637
+    },
+    {
+      "epoch": 0.19638,
+      "grad_norm": 1.051025589043811,
+      "learning_rate": 0.003,
+      "loss": 3.9533,
+      "step": 19638
+    },
+    {
+      "epoch": 0.19639,
+      "grad_norm": 1.549739495953743,
+      "learning_rate": 0.003,
+      "loss": 4.0487,
+      "step": 19639
+    },
+    {
+      "epoch": 0.1964,
+      "grad_norm": 1.1264379384989271,
+      "learning_rate": 0.003,
+      "loss": 3.9896,
+      "step": 19640
+    },
+    {
+      "epoch": 0.19641,
+      "grad_norm": 1.4563789788275636,
+      "learning_rate": 0.003,
+      "loss": 4.0337,
+      "step": 19641
+    },
+    {
+      "epoch": 0.19642,
+      "grad_norm": 1.0488717460018138,
+      "learning_rate": 0.003,
+      "loss": 3.9972,
+      "step": 19642
+    },
+    {
+      "epoch": 0.19643,
+      "grad_norm": 1.3437090100316245,
+      "learning_rate": 0.003,
+      "loss": 4.0229,
+      "step": 19643
+    },
+    {
+      "epoch": 0.19644,
+      "grad_norm": 1.1006807337081657,
+      "learning_rate": 0.003,
+      "loss": 4.0314,
+      "step": 19644
+    },
+    {
+      "epoch": 0.19645,
+      "grad_norm": 1.3945002548090317,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 19645
+    },
+    {
+      "epoch": 0.19646,
+      "grad_norm": 1.2075426583796376,
+      "learning_rate": 0.003,
+      "loss": 4.034,
+      "step": 19646
+    },
+    {
+      "epoch": 0.19647,
+      "grad_norm": 1.293295858432891,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 19647
+    },
+    {
+      "epoch": 0.19648,
+      "grad_norm": 1.3098697229272513,
+      "learning_rate": 0.003,
+      "loss": 4.0269,
+      "step": 19648
+    },
+    {
+      "epoch": 0.19649,
+      "grad_norm": 1.0961375387377226,
+      "learning_rate": 0.003,
+      "loss": 3.9907,
+      "step": 19649
+    },
+    {
+      "epoch": 0.1965,
+      "grad_norm": 1.2180821931375414,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 19650
+    },
+    {
+      "epoch": 0.19651,
+      "grad_norm": 1.2338546026092299,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 19651
+    },
+    {
+      "epoch": 0.19652,
+      "grad_norm": 1.2197656372181531,
+      "learning_rate": 0.003,
+      "loss": 4.025,
+      "step": 19652
+    },
+    {
+      "epoch": 0.19653,
+      "grad_norm": 1.3341198714196665,
+      "learning_rate": 0.003,
+      "loss": 3.9904,
+      "step": 19653
+    },
+    {
+      "epoch": 0.19654,
+      "grad_norm": 1.3754483753191487,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 19654
+    },
+    {
+      "epoch": 0.19655,
+      "grad_norm": 1.544370728447323,
+      "learning_rate": 0.003,
+      "loss": 4.0112,
+      "step": 19655
+    },
+    {
+      "epoch": 0.19656,
+      "grad_norm": 1.2104419322007474,
+      "learning_rate": 0.003,
+      "loss": 3.9973,
+      "step": 19656
+    },
+    {
+      "epoch": 0.19657,
+      "grad_norm": 1.3340675053609385,
+      "learning_rate": 0.003,
+      "loss": 4.001,
+      "step": 19657
+    },
+    {
+      "epoch": 0.19658,
+      "grad_norm": 1.3802603007206278,
+      "learning_rate": 0.003,
+      "loss": 4.0418,
+      "step": 19658
+    },
+    {
+      "epoch": 0.19659,
+      "grad_norm": 0.9655541069967075,
+      "learning_rate": 0.003,
+      "loss": 4.0225,
+      "step": 19659
+    },
+    {
+      "epoch": 0.1966,
+      "grad_norm": 1.4923395841179794,
+      "learning_rate": 0.003,
+      "loss": 3.9997,
+      "step": 19660
+    },
+    {
+      "epoch": 0.19661,
+      "grad_norm": 1.2288981190118458,
+      "learning_rate": 0.003,
+      "loss": 3.9999,
+      "step": 19661
+    },
+    {
+      "epoch": 0.19662,
+      "grad_norm": 1.502146226676791,
+      "learning_rate": 0.003,
+      "loss": 4.0325,
+      "step": 19662
+    },
+    {
+      "epoch": 0.19663,
+      "grad_norm": 1.0514102968073638,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 19663
+    },
+    {
+      "epoch": 0.19664,
+      "grad_norm": 1.4654220934296756,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 19664
+    },
+    {
+      "epoch": 0.19665,
+      "grad_norm": 1.069241324892344,
+      "learning_rate": 0.003,
+      "loss": 3.992,
+      "step": 19665
+    },
+    {
+      "epoch": 0.19666,
+      "grad_norm": 1.4486582930881937,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 19666
+    },
+    {
+      "epoch": 0.19667,
+      "grad_norm": 1.0831545760755243,
+      "learning_rate": 0.003,
+      "loss": 4.0287,
+      "step": 19667
+    },
+    {
+      "epoch": 0.19668,
+      "grad_norm": 1.436563941257675,
+      "learning_rate": 0.003,
+      "loss": 4.0007,
+      "step": 19668
+    },
+    {
+      "epoch": 0.19669,
+      "grad_norm": 1.5062672788108056,
+      "learning_rate": 0.003,
+      "loss": 3.9976,
+      "step": 19669
+    },
+    {
+      "epoch": 0.1967,
+      "grad_norm": 1.3128311516230786,
+      "learning_rate": 0.003,
+      "loss": 4.0169,
+      "step": 19670
+    },
+    {
+      "epoch": 0.19671,
+      "grad_norm": 1.1719839769410758,
+      "learning_rate": 0.003,
+      "loss": 4.0017,
+      "step": 19671
+    },
+    {
+      "epoch": 0.19672,
+      "grad_norm": 1.1297658595941962,
+      "learning_rate": 0.003,
+      "loss": 3.983,
+      "step": 19672
+    },
+    {
+      "epoch": 0.19673,
+      "grad_norm": 1.4355563405189087,
+      "learning_rate": 0.003,
+      "loss": 3.9936,
+      "step": 19673
+    },
+    {
+      "epoch": 0.19674,
+      "grad_norm": 1.0567084550528547,
+      "learning_rate": 0.003,
+      "loss": 3.9828,
+      "step": 19674
+    },
+    {
+      "epoch": 0.19675,
+      "grad_norm": 1.3423620611541782,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 19675
+    },
+    {
+      "epoch": 0.19676,
+      "grad_norm": 1.1634078654899764,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 19676
+    },
+    {
+      "epoch": 0.19677,
+      "grad_norm": 1.4573019513365029,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 19677
+    },
+    {
+      "epoch": 0.19678,
+      "grad_norm": 1.1106768424828741,
+      "learning_rate": 0.003,
+      "loss": 4.0177,
+      "step": 19678
+    },
+    {
+      "epoch": 0.19679,
+      "grad_norm": 1.2533908896551933,
+      "learning_rate": 0.003,
+      "loss": 3.9975,
+      "step": 19679
+    },
+    {
+      "epoch": 0.1968,
+      "grad_norm": 1.217016984584878,
+      "learning_rate": 0.003,
+      "loss": 3.9969,
+      "step": 19680
+    },
+    {
+      "epoch": 0.19681,
+      "grad_norm": 1.5785958532488575,
+      "learning_rate": 0.003,
+      "loss": 4.05,
+      "step": 19681
+    },
+    {
+      "epoch": 0.19682,
+      "grad_norm": 1.0909009150616689,
+      "learning_rate": 0.003,
+      "loss": 4.0086,
+      "step": 19682
+    },
+    {
+      "epoch": 0.19683,
+      "grad_norm": 1.5771301082548819,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 19683
+    },
+    {
+      "epoch": 0.19684,
+      "grad_norm": 1.228636595353188,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 19684
+    },
+    {
+      "epoch": 0.19685,
+      "grad_norm": 1.2151799400934928,
+      "learning_rate": 0.003,
+      "loss": 3.9708,
+      "step": 19685
+    },
+    {
+      "epoch": 0.19686,
+      "grad_norm": 1.498939221090677,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 19686
+    },
+    {
+      "epoch": 0.19687,
+      "grad_norm": 1.1241652149792463,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 19687
+    },
+    {
+      "epoch": 0.19688,
+      "grad_norm": 1.4310767432747384,
+      "learning_rate": 0.003,
+      "loss": 4.0386,
+      "step": 19688
+    },
+    {
+      "epoch": 0.19689,
+      "grad_norm": 1.2238926456348522,
+      "learning_rate": 0.003,
+      "loss": 4.0186,
+      "step": 19689
+    },
+    {
+      "epoch": 0.1969,
+      "grad_norm": 1.4667044525979636,
+      "learning_rate": 0.003,
+      "loss": 4.0362,
+      "step": 19690
+    },
+    {
+      "epoch": 0.19691,
+      "grad_norm": 1.0596642829766192,
+      "learning_rate": 0.003,
+      "loss": 3.9893,
+      "step": 19691
+    },
+    {
+      "epoch": 0.19692,
+      "grad_norm": 1.3099070307399414,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 19692
+    },
+    {
+      "epoch": 0.19693,
+      "grad_norm": 1.2050870302144918,
+      "learning_rate": 0.003,
+      "loss": 4.002,
+      "step": 19693
+    },
+    {
+      "epoch": 0.19694,
+      "grad_norm": 1.3051230383758814,
+      "learning_rate": 0.003,
+      "loss": 3.9943,
+      "step": 19694
+    },
+    {
+      "epoch": 0.19695,
+      "grad_norm": 1.19873627338944,
+      "learning_rate": 0.003,
+      "loss": 4.0242,
+      "step": 19695
+    },
+    {
+      "epoch": 0.19696,
+      "grad_norm": 1.335301000643218,
+      "learning_rate": 0.003,
+      "loss": 4.0236,
+      "step": 19696
+    },
+    {
+      "epoch": 0.19697,
+      "grad_norm": 1.0525106864958966,
+      "learning_rate": 0.003,
+      "loss": 3.9965,
+      "step": 19697
+    },
+    {
+      "epoch": 0.19698,
+      "grad_norm": 1.2690670053698654,
+      "learning_rate": 0.003,
+      "loss": 4.0402,
+      "step": 19698
+    },
+    {
+      "epoch": 0.19699,
+      "grad_norm": 1.1667388083079688,
+      "learning_rate": 0.003,
+      "loss": 3.9784,
+      "step": 19699
+    },
+    {
+      "epoch": 0.197,
+      "grad_norm": 1.3902853677707654,
+      "learning_rate": 0.003,
+      "loss": 4.0169,
+      "step": 19700
+    },
+    {
+      "epoch": 0.19701,
+      "grad_norm": 1.3054212801520557,
+      "learning_rate": 0.003,
+      "loss": 4.0381,
+      "step": 19701
+    },
+    {
+      "epoch": 0.19702,
+      "grad_norm": 1.0057312698443932,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 19702
+    },
+    {
+      "epoch": 0.19703,
+      "grad_norm": 1.4078413465511643,
+      "learning_rate": 0.003,
+      "loss": 4.0374,
+      "step": 19703
+    },
+    {
+      "epoch": 0.19704,
+      "grad_norm": 1.1415846814421957,
+      "learning_rate": 0.003,
+      "loss": 3.9853,
+      "step": 19704
+    },
+    {
+      "epoch": 0.19705,
+      "grad_norm": 1.5772425361349942,
+      "learning_rate": 0.003,
+      "loss": 4.0017,
+      "step": 19705
+    },
+    {
+      "epoch": 0.19706,
+      "grad_norm": 1.168198686303975,
+      "learning_rate": 0.003,
+      "loss": 3.9745,
+      "step": 19706
+    },
+    {
+      "epoch": 0.19707,
+      "grad_norm": 1.5687239053768094,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 19707
+    },
+    {
+      "epoch": 0.19708,
+      "grad_norm": 1.396968834570952,
+      "learning_rate": 0.003,
+      "loss": 4.0251,
+      "step": 19708
+    },
+    {
+      "epoch": 0.19709,
+      "grad_norm": 1.1717022640531727,
+      "learning_rate": 0.003,
+      "loss": 4.0414,
+      "step": 19709
+    },
+    {
+      "epoch": 0.1971,
+      "grad_norm": 1.3824148125197184,
+      "learning_rate": 0.003,
+      "loss": 4.0234,
+      "step": 19710
+    },
+    {
+      "epoch": 0.19711,
+      "grad_norm": 1.2581546806587176,
+      "learning_rate": 0.003,
+      "loss": 4.0242,
+      "step": 19711
+    },
+    {
+      "epoch": 0.19712,
+      "grad_norm": 1.1889986692506627,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 19712
+    },
+    {
+      "epoch": 0.19713,
+      "grad_norm": 1.373788115379153,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 19713
+    },
+    {
+      "epoch": 0.19714,
+      "grad_norm": 1.1078579990780477,
+      "learning_rate": 0.003,
+      "loss": 3.9761,
+      "step": 19714
+    },
+    {
+      "epoch": 0.19715,
+      "grad_norm": 1.6183362393693175,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 19715
+    },
+    {
+      "epoch": 0.19716,
+      "grad_norm": 1.3729903618430215,
+      "learning_rate": 0.003,
+      "loss": 3.9911,
+      "step": 19716
+    },
+    {
+      "epoch": 0.19717,
+      "grad_norm": 1.4009331087675325,
+      "learning_rate": 0.003,
+      "loss": 4.041,
+      "step": 19717
+    },
+    {
+      "epoch": 0.19718,
+      "grad_norm": 0.9686668335443502,
+      "learning_rate": 0.003,
+      "loss": 4.0015,
+      "step": 19718
+    },
+    {
+      "epoch": 0.19719,
+      "grad_norm": 1.315650140810695,
+      "learning_rate": 0.003,
+      "loss": 3.9646,
+      "step": 19719
+    },
+    {
+      "epoch": 0.1972,
+      "grad_norm": 1.1046163591875344,
+      "learning_rate": 0.003,
+      "loss": 3.9724,
+      "step": 19720
+    },
+    {
+      "epoch": 0.19721,
+      "grad_norm": 1.3711991789963014,
+      "learning_rate": 0.003,
+      "loss": 3.9994,
+      "step": 19721
+    },
+    {
+      "epoch": 0.19722,
+      "grad_norm": 1.1196374050213858,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 19722
+    },
+    {
+      "epoch": 0.19723,
+      "grad_norm": 1.3906875449946674,
+      "learning_rate": 0.003,
+      "loss": 4.0204,
+      "step": 19723
+    },
+    {
+      "epoch": 0.19724,
+      "grad_norm": 1.3877266384695668,
+      "learning_rate": 0.003,
+      "loss": 4.0041,
+      "step": 19724
+    },
+    {
+      "epoch": 0.19725,
+      "grad_norm": 1.0779669526235471,
+      "learning_rate": 0.003,
+      "loss": 3.9982,
+      "step": 19725
+    },
+    {
+      "epoch": 0.19726,
+      "grad_norm": 1.4568307478920994,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 19726
+    },
+    {
+      "epoch": 0.19727,
+      "grad_norm": 0.9949820751362443,
+      "learning_rate": 0.003,
+      "loss": 4.0015,
+      "step": 19727
+    },
+    {
+      "epoch": 0.19728,
+      "grad_norm": 1.520425469944409,
+      "learning_rate": 0.003,
+      "loss": 4.0318,
+      "step": 19728
+    },
+    {
+      "epoch": 0.19729,
+      "grad_norm": 1.2530391457994432,
+      "learning_rate": 0.003,
+      "loss": 4.0162,
+      "step": 19729
+    },
+    {
+      "epoch": 0.1973,
+      "grad_norm": 1.4172911370366776,
+      "learning_rate": 0.003,
+      "loss": 4.021,
+      "step": 19730
+    },
+    {
+      "epoch": 0.19731,
+      "grad_norm": 1.3113726894976678,
+      "learning_rate": 0.003,
+      "loss": 4.04,
+      "step": 19731
+    },
+    {
+      "epoch": 0.19732,
+      "grad_norm": 1.2560314409513915,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 19732
+    },
+    {
+      "epoch": 0.19733,
+      "grad_norm": 1.3348054249116506,
+      "learning_rate": 0.003,
+      "loss": 4.0084,
+      "step": 19733
+    },
+    {
+      "epoch": 0.19734,
+      "grad_norm": 1.4097709381334718,
+      "learning_rate": 0.003,
+      "loss": 4.0077,
+      "step": 19734
+    },
+    {
+      "epoch": 0.19735,
+      "grad_norm": 1.1071202352582918,
+      "learning_rate": 0.003,
+      "loss": 4.0225,
+      "step": 19735
+    },
+    {
+      "epoch": 0.19736,
+      "grad_norm": 1.3179239304648167,
+      "learning_rate": 0.003,
+      "loss": 4.0085,
+      "step": 19736
+    },
+    {
+      "epoch": 0.19737,
+      "grad_norm": 1.2450256904300783,
+      "learning_rate": 0.003,
+      "loss": 3.9909,
+      "step": 19737
+    },
+    {
+      "epoch": 0.19738,
+      "grad_norm": 1.2947671488610253,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 19738
+    },
+    {
+      "epoch": 0.19739,
+      "grad_norm": 1.402401371335116,
+      "learning_rate": 0.003,
+      "loss": 4.0198,
+      "step": 19739
+    },
+    {
+      "epoch": 0.1974,
+      "grad_norm": 1.4593342879624207,
+      "learning_rate": 0.003,
+      "loss": 3.9954,
+      "step": 19740
+    },
+    {
+      "epoch": 0.19741,
+      "grad_norm": 1.3004113201345335,
+      "learning_rate": 0.003,
+      "loss": 4.0112,
+      "step": 19741
+    },
+    {
+      "epoch": 0.19742,
+      "grad_norm": 1.22010355334233,
+      "learning_rate": 0.003,
+      "loss": 4.0028,
+      "step": 19742
+    },
+    {
+      "epoch": 0.19743,
+      "grad_norm": 1.3374302148104331,
+      "learning_rate": 0.003,
+      "loss": 4.0064,
+      "step": 19743
+    },
+    {
+      "epoch": 0.19744,
+      "grad_norm": 1.434411640228916,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 19744
+    },
+    {
+      "epoch": 0.19745,
+      "grad_norm": 1.0662071283113321,
+      "learning_rate": 0.003,
+      "loss": 3.9904,
+      "step": 19745
+    },
+    {
+      "epoch": 0.19746,
+      "grad_norm": 1.3419550070704667,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 19746
+    },
+    {
+      "epoch": 0.19747,
+      "grad_norm": 1.1002303551271284,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 19747
+    },
+    {
+      "epoch": 0.19748,
+      "grad_norm": 1.6190660192363975,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 19748
+    },
+    {
+      "epoch": 0.19749,
+      "grad_norm": 1.1364568126867673,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 19749
+    },
+    {
+      "epoch": 0.1975,
+      "grad_norm": 1.5666824252130285,
+      "learning_rate": 0.003,
+      "loss": 4.0236,
+      "step": 19750
+    },
+    {
+      "epoch": 0.19751,
+      "grad_norm": 1.068959596393331,
+      "learning_rate": 0.003,
+      "loss": 4.0303,
+      "step": 19751
+    },
+    {
+      "epoch": 0.19752,
+      "grad_norm": 1.1833795926952322,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 19752
+    },
+    {
+      "epoch": 0.19753,
+      "grad_norm": 1.345961427843612,
+      "learning_rate": 0.003,
+      "loss": 4.0105,
+      "step": 19753
+    },
+    {
+      "epoch": 0.19754,
+      "grad_norm": 1.2433704664673184,
+      "learning_rate": 0.003,
+      "loss": 4.0104,
+      "step": 19754
+    },
+    {
+      "epoch": 0.19755,
+      "grad_norm": 1.4718043415443065,
+      "learning_rate": 0.003,
+      "loss": 4.041,
+      "step": 19755
+    },
+    {
+      "epoch": 0.19756,
+      "grad_norm": 1.4154876808187333,
+      "learning_rate": 0.003,
+      "loss": 3.9857,
+      "step": 19756
+    },
+    {
+      "epoch": 0.19757,
+      "grad_norm": 1.29862051326179,
+      "learning_rate": 0.003,
+      "loss": 4.0039,
+      "step": 19757
+    },
+    {
+      "epoch": 0.19758,
+      "grad_norm": 1.0561727245174066,
+      "learning_rate": 0.003,
+      "loss": 4.0466,
+      "step": 19758
+    },
+    {
+      "epoch": 0.19759,
+      "grad_norm": 1.5862824440824188,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 19759
+    },
+    {
+      "epoch": 0.1976,
+      "grad_norm": 0.9619700344844889,
+      "learning_rate": 0.003,
+      "loss": 4.0522,
+      "step": 19760
+    },
+    {
+      "epoch": 0.19761,
+      "grad_norm": 1.4055473677373886,
+      "learning_rate": 0.003,
+      "loss": 4.0196,
+      "step": 19761
+    },
+    {
+      "epoch": 0.19762,
+      "grad_norm": 1.1390642245141878,
+      "learning_rate": 0.003,
+      "loss": 4.0182,
+      "step": 19762
+    },
+    {
+      "epoch": 0.19763,
+      "grad_norm": 1.3596343739148216,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 19763
+    },
+    {
+      "epoch": 0.19764,
+      "grad_norm": 1.2867303890863369,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 19764
+    },
+    {
+      "epoch": 0.19765,
+      "grad_norm": 1.445471166508086,
+      "learning_rate": 0.003,
+      "loss": 4.0,
+      "step": 19765
+    },
+    {
+      "epoch": 0.19766,
+      "grad_norm": 1.2450351054399715,
+      "learning_rate": 0.003,
+      "loss": 4.045,
+      "step": 19766
+    },
+    {
+      "epoch": 0.19767,
+      "grad_norm": 1.1953604706804337,
+      "learning_rate": 0.003,
+      "loss": 4.0025,
+      "step": 19767
+    },
+    {
+      "epoch": 0.19768,
+      "grad_norm": 1.4080992195858932,
+      "learning_rate": 0.003,
+      "loss": 4.0299,
+      "step": 19768
+    },
+    {
+      "epoch": 0.19769,
+      "grad_norm": 1.3522236017358396,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 19769
+    },
+    {
+      "epoch": 0.1977,
+      "grad_norm": 1.4830586469562648,
+      "learning_rate": 0.003,
+      "loss": 4.0468,
+      "step": 19770
+    },
+    {
+      "epoch": 0.19771,
+      "grad_norm": 1.3615642238244234,
+      "learning_rate": 0.003,
+      "loss": 4.0288,
+      "step": 19771
+    },
+    {
+      "epoch": 0.19772,
+      "grad_norm": 1.0944898688609779,
+      "learning_rate": 0.003,
+      "loss": 4.0104,
+      "step": 19772
+    },
+    {
+      "epoch": 0.19773,
+      "grad_norm": 1.3924128323557636,
+      "learning_rate": 0.003,
+      "loss": 3.9849,
+      "step": 19773
+    },
+    {
+      "epoch": 0.19774,
+      "grad_norm": 0.9974291921166375,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 19774
+    },
+    {
+      "epoch": 0.19775,
+      "grad_norm": 1.5710353468739093,
+      "learning_rate": 0.003,
+      "loss": 4.0051,
+      "step": 19775
+    },
+    {
+      "epoch": 0.19776,
+      "grad_norm": 1.0049435153152277,
+      "learning_rate": 0.003,
+      "loss": 3.9937,
+      "step": 19776
+    },
+    {
+      "epoch": 0.19777,
+      "grad_norm": 1.4986107319436253,
+      "learning_rate": 0.003,
+      "loss": 3.9965,
+      "step": 19777
+    },
+    {
+      "epoch": 0.19778,
+      "grad_norm": 1.113289922822315,
+      "learning_rate": 0.003,
+      "loss": 4.0099,
+      "step": 19778
+    },
+    {
+      "epoch": 0.19779,
+      "grad_norm": 1.5881179883654661,
+      "learning_rate": 0.003,
+      "loss": 4.0429,
+      "step": 19779
+    },
+    {
+      "epoch": 0.1978,
+      "grad_norm": 1.3407453564226892,
+      "learning_rate": 0.003,
+      "loss": 4.0247,
+      "step": 19780
+    },
+    {
+      "epoch": 0.19781,
+      "grad_norm": 1.153278243038539,
+      "learning_rate": 0.003,
+      "loss": 4.0135,
+      "step": 19781
+    },
+    {
+      "epoch": 0.19782,
+      "grad_norm": 1.2399114381953802,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 19782
+    },
+    {
+      "epoch": 0.19783,
+      "grad_norm": 1.2685810404410176,
+      "learning_rate": 0.003,
+      "loss": 4.0228,
+      "step": 19783
+    },
+    {
+      "epoch": 0.19784,
+      "grad_norm": 1.1153915574929145,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 19784
+    },
+    {
+      "epoch": 0.19785,
+      "grad_norm": 1.3493493096133802,
+      "learning_rate": 0.003,
+      "loss": 4.0251,
+      "step": 19785
+    },
+    {
+      "epoch": 0.19786,
+      "grad_norm": 1.4730269498551556,
+      "learning_rate": 0.003,
+      "loss": 4.0139,
+      "step": 19786
+    },
+    {
+      "epoch": 0.19787,
+      "grad_norm": 1.119131137812184,
+      "learning_rate": 0.003,
+      "loss": 4.0073,
+      "step": 19787
+    },
+    {
+      "epoch": 0.19788,
+      "grad_norm": 1.4487121712179094,
+      "learning_rate": 0.003,
+      "loss": 4.0254,
+      "step": 19788
+    },
+    {
+      "epoch": 0.19789,
+      "grad_norm": 1.3570686590605106,
+      "learning_rate": 0.003,
+      "loss": 4.0278,
+      "step": 19789
+    },
+    {
+      "epoch": 0.1979,
+      "grad_norm": 1.2103468288826205,
+      "learning_rate": 0.003,
+      "loss": 3.9791,
+      "step": 19790
+    },
+    {
+      "epoch": 0.19791,
+      "grad_norm": 1.462626491459287,
+      "learning_rate": 0.003,
+      "loss": 4.0258,
+      "step": 19791
+    },
+    {
+      "epoch": 0.19792,
+      "grad_norm": 1.2764421121716867,
+      "learning_rate": 0.003,
+      "loss": 4.0409,
+      "step": 19792
+    },
+    {
+      "epoch": 0.19793,
+      "grad_norm": 1.4071334596388037,
+      "learning_rate": 0.003,
+      "loss": 4.0113,
+      "step": 19793
+    },
+    {
+      "epoch": 0.19794,
+      "grad_norm": 1.2124538019834994,
+      "learning_rate": 0.003,
+      "loss": 4.0173,
+      "step": 19794
+    },
+    {
+      "epoch": 0.19795,
+      "grad_norm": 1.1890995556358788,
+      "learning_rate": 0.003,
+      "loss": 4.0144,
+      "step": 19795
+    },
+    {
+      "epoch": 0.19796,
+      "grad_norm": 1.3079898222991817,
+      "learning_rate": 0.003,
+      "loss": 4.0059,
+      "step": 19796
+    },
+    {
+      "epoch": 0.19797,
+      "grad_norm": 1.104979515146523,
+      "learning_rate": 0.003,
+      "loss": 3.9935,
+      "step": 19797
+    },
+    {
+      "epoch": 0.19798,
+      "grad_norm": 1.3875520492552549,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 19798
+    },
+    {
+      "epoch": 0.19799,
+      "grad_norm": 1.199380206009414,
+      "learning_rate": 0.003,
+      "loss": 3.9654,
+      "step": 19799
+    },
+    {
+      "epoch": 0.198,
+      "grad_norm": 1.4032844438770256,
+      "learning_rate": 0.003,
+      "loss": 4.0406,
+      "step": 19800
+    },
+    {
+      "epoch": 0.19801,
+      "grad_norm": 1.546886984318848,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 19801
+    },
+    {
+      "epoch": 0.19802,
+      "grad_norm": 1.1354085245190393,
+      "learning_rate": 0.003,
+      "loss": 4.0343,
+      "step": 19802
+    },
+    {
+      "epoch": 0.19803,
+      "grad_norm": 1.2404790730112516,
+      "learning_rate": 0.003,
+      "loss": 4.0089,
+      "step": 19803
+    },
+    {
+      "epoch": 0.19804,
+      "grad_norm": 1.008365070009348,
+      "learning_rate": 0.003,
+      "loss": 4.005,
+      "step": 19804
+    },
+    {
+      "epoch": 0.19805,
+      "grad_norm": 1.2628932558937247,
+      "learning_rate": 0.003,
+      "loss": 3.9848,
+      "step": 19805
+    },
+    {
+      "epoch": 0.19806,
+      "grad_norm": 1.3964097186100302,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 19806
+    },
+    {
+      "epoch": 0.19807,
+      "grad_norm": 1.3239075578065276,
+      "learning_rate": 0.003,
+      "loss": 4.0049,
+      "step": 19807
+    },
+    {
+      "epoch": 0.19808,
+      "grad_norm": 1.032021827127806,
+      "learning_rate": 0.003,
+      "loss": 4.0549,
+      "step": 19808
+    },
+    {
+      "epoch": 0.19809,
+      "grad_norm": 1.4297437315593309,
+      "learning_rate": 0.003,
+      "loss": 4.0236,
+      "step": 19809
+    },
+    {
+      "epoch": 0.1981,
+      "grad_norm": 1.0751040588462952,
+      "learning_rate": 0.003,
+      "loss": 4.0115,
+      "step": 19810
+    },
+    {
+      "epoch": 0.19811,
+      "grad_norm": 1.319129264951117,
+      "learning_rate": 0.003,
+      "loss": 3.9853,
+      "step": 19811
+    },
+    {
+      "epoch": 0.19812,
+      "grad_norm": 1.2437025060882883,
+      "learning_rate": 0.003,
+      "loss": 3.9898,
+      "step": 19812
+    },
+    {
+      "epoch": 0.19813,
+      "grad_norm": 1.3482541217732182,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 19813
+    },
+    {
+      "epoch": 0.19814,
+      "grad_norm": 1.2897947774281204,
+      "learning_rate": 0.003,
+      "loss": 4.0207,
+      "step": 19814
+    },
+    {
+      "epoch": 0.19815,
+      "grad_norm": 1.4141049719480012,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 19815
+    },
+    {
+      "epoch": 0.19816,
+      "grad_norm": 1.1082584286705897,
+      "learning_rate": 0.003,
+      "loss": 3.9993,
+      "step": 19816
+    },
+    {
+      "epoch": 0.19817,
+      "grad_norm": 1.465709555937987,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 19817
+    },
+    {
+      "epoch": 0.19818,
+      "grad_norm": 1.2025427141188985,
+      "learning_rate": 0.003,
+      "loss": 3.99,
+      "step": 19818
+    },
+    {
+      "epoch": 0.19819,
+      "grad_norm": 1.7289218859723787,
+      "learning_rate": 0.003,
+      "loss": 4.0051,
+      "step": 19819
+    },
+    {
+      "epoch": 0.1982,
+      "grad_norm": 1.059815716228972,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 19820
+    },
+    {
+      "epoch": 0.19821,
+      "grad_norm": 1.3661140252107111,
+      "learning_rate": 0.003,
+      "loss": 4.0198,
+      "step": 19821
+    },
+    {
+      "epoch": 0.19822,
+      "grad_norm": 1.1619339212780349,
+      "learning_rate": 0.003,
+      "loss": 3.9927,
+      "step": 19822
+    },
+    {
+      "epoch": 0.19823,
+      "grad_norm": 1.2345919141522645,
+      "learning_rate": 0.003,
+      "loss": 3.9772,
+      "step": 19823
+    },
+    {
+      "epoch": 0.19824,
+      "grad_norm": 1.1943941087601408,
+      "learning_rate": 0.003,
+      "loss": 4.0249,
+      "step": 19824
+    },
+    {
+      "epoch": 0.19825,
+      "grad_norm": 1.3209228992162947,
+      "learning_rate": 0.003,
+      "loss": 4.0,
+      "step": 19825
+    },
+    {
+      "epoch": 0.19826,
+      "grad_norm": 1.400626853056868,
+      "learning_rate": 0.003,
+      "loss": 4.0122,
+      "step": 19826
+    },
+    {
+      "epoch": 0.19827,
+      "grad_norm": 1.1838198395248278,
+      "learning_rate": 0.003,
+      "loss": 3.9862,
+      "step": 19827
+    },
+    {
+      "epoch": 0.19828,
+      "grad_norm": 1.4139212015643148,
+      "learning_rate": 0.003,
+      "loss": 4.007,
+      "step": 19828
+    },
+    {
+      "epoch": 0.19829,
+      "grad_norm": 1.1611027705727395,
+      "learning_rate": 0.003,
+      "loss": 3.9847,
+      "step": 19829
+    },
+    {
+      "epoch": 0.1983,
+      "grad_norm": 1.4802990006639916,
+      "learning_rate": 0.003,
+      "loss": 3.9983,
+      "step": 19830
+    },
+    {
+      "epoch": 0.19831,
+      "grad_norm": 1.1519922294359293,
+      "learning_rate": 0.003,
+      "loss": 4.0167,
+      "step": 19831
+    },
+    {
+      "epoch": 0.19832,
+      "grad_norm": 1.6324002323022744,
+      "learning_rate": 0.003,
+      "loss": 4.0105,
+      "step": 19832
+    },
+    {
+      "epoch": 0.19833,
+      "grad_norm": 1.0663013699844095,
+      "learning_rate": 0.003,
+      "loss": 3.9993,
+      "step": 19833
+    },
+    {
+      "epoch": 0.19834,
+      "grad_norm": 1.4282551215001644,
+      "learning_rate": 0.003,
+      "loss": 3.9863,
+      "step": 19834
+    },
+    {
+      "epoch": 0.19835,
+      "grad_norm": 1.1324964277531966,
+      "learning_rate": 0.003,
+      "loss": 4.0073,
+      "step": 19835
+    },
+    {
+      "epoch": 0.19836,
+      "grad_norm": 1.3709562050334312,
+      "learning_rate": 0.003,
+      "loss": 3.9982,
+      "step": 19836
+    },
+    {
+      "epoch": 0.19837,
+      "grad_norm": 1.0940404076180703,
+      "learning_rate": 0.003,
+      "loss": 4.0037,
+      "step": 19837
+    },
+    {
+      "epoch": 0.19838,
+      "grad_norm": 1.3847674516720958,
+      "learning_rate": 0.003,
+      "loss": 4.003,
+      "step": 19838
+    },
+    {
+      "epoch": 0.19839,
+      "grad_norm": 1.1291151502493115,
+      "learning_rate": 0.003,
+      "loss": 3.9882,
+      "step": 19839
+    },
+    {
+      "epoch": 0.1984,
+      "grad_norm": 1.3777051185906417,
+      "learning_rate": 0.003,
+      "loss": 4.0207,
+      "step": 19840
+    },
+    {
+      "epoch": 0.19841,
+      "grad_norm": 1.0919508755929583,
+      "learning_rate": 0.003,
+      "loss": 4.0082,
+      "step": 19841
+    },
+    {
+      "epoch": 0.19842,
+      "grad_norm": 1.348881062278302,
+      "learning_rate": 0.003,
+      "loss": 4.0182,
+      "step": 19842
+    },
+    {
+      "epoch": 0.19843,
+      "grad_norm": 1.2418211304593945,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 19843
+    },
+    {
+      "epoch": 0.19844,
+      "grad_norm": 1.3728476453222995,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 19844
+    },
+    {
+      "epoch": 0.19845,
+      "grad_norm": 1.2269618919614198,
+      "learning_rate": 0.003,
+      "loss": 4.0064,
+      "step": 19845
+    },
+    {
+      "epoch": 0.19846,
+      "grad_norm": 1.1871889776753077,
+      "learning_rate": 0.003,
+      "loss": 4.0053,
+      "step": 19846
+    },
+    {
+      "epoch": 0.19847,
+      "grad_norm": 1.2001224886804518,
+      "learning_rate": 0.003,
+      "loss": 4.0005,
+      "step": 19847
+    },
+    {
+      "epoch": 0.19848,
+      "grad_norm": 1.5625985536350755,
+      "learning_rate": 0.003,
+      "loss": 3.9857,
+      "step": 19848
+    },
+    {
+      "epoch": 0.19849,
+      "grad_norm": 1.292579711151962,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 19849
+    },
+    {
+      "epoch": 0.1985,
+      "grad_norm": 1.3145883717714808,
+      "learning_rate": 0.003,
+      "loss": 3.9985,
+      "step": 19850
+    },
+    {
+      "epoch": 0.19851,
+      "grad_norm": 1.3274648000379654,
+      "learning_rate": 0.003,
+      "loss": 4.0058,
+      "step": 19851
+    },
+    {
+      "epoch": 0.19852,
+      "grad_norm": 1.265246028849256,
+      "learning_rate": 0.003,
+      "loss": 4.0019,
+      "step": 19852
+    },
+    {
+      "epoch": 0.19853,
+      "grad_norm": 1.375749702846153,
+      "learning_rate": 0.003,
+      "loss": 3.9905,
+      "step": 19853
+    },
+    {
+      "epoch": 0.19854,
+      "grad_norm": 1.115377590562821,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 19854
+    },
+    {
+      "epoch": 0.19855,
+      "grad_norm": 1.2386360086935877,
+      "learning_rate": 0.003,
+      "loss": 3.9933,
+      "step": 19855
+    },
+    {
+      "epoch": 0.19856,
+      "grad_norm": 1.3153541975749203,
+      "learning_rate": 0.003,
+      "loss": 4.021,
+      "step": 19856
+    },
+    {
+      "epoch": 0.19857,
+      "grad_norm": 1.415213998189166,
+      "learning_rate": 0.003,
+      "loss": 4.0008,
+      "step": 19857
+    },
+    {
+      "epoch": 0.19858,
+      "grad_norm": 1.150890878628086,
+      "learning_rate": 0.003,
+      "loss": 4.0084,
+      "step": 19858
+    },
+    {
+      "epoch": 0.19859,
+      "grad_norm": 1.2589607683679,
+      "learning_rate": 0.003,
+      "loss": 4.0081,
+      "step": 19859
+    },
+    {
+      "epoch": 0.1986,
+      "grad_norm": 1.3003198481543805,
+      "learning_rate": 0.003,
+      "loss": 4.0118,
+      "step": 19860
+    },
+    {
+      "epoch": 0.19861,
+      "grad_norm": 1.3075287213165774,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 19861
+    },
+    {
+      "epoch": 0.19862,
+      "grad_norm": 1.2372342463276393,
+      "learning_rate": 0.003,
+      "loss": 4.0022,
+      "step": 19862
+    },
+    {
+      "epoch": 0.19863,
+      "grad_norm": 1.2074841014390805,
+      "learning_rate": 0.003,
+      "loss": 4.0142,
+      "step": 19863
+    },
+    {
+      "epoch": 0.19864,
+      "grad_norm": 1.3158510489112867,
+      "learning_rate": 0.003,
+      "loss": 4.0186,
+      "step": 19864
+    },
+    {
+      "epoch": 0.19865,
+      "grad_norm": 1.266258313126113,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 19865
+    },
+    {
+      "epoch": 0.19866,
+      "grad_norm": 1.3092180528508726,
+      "learning_rate": 0.003,
+      "loss": 3.967,
+      "step": 19866
+    },
+    {
+      "epoch": 0.19867,
+      "grad_norm": 1.3898364795834728,
+      "learning_rate": 0.003,
+      "loss": 4.0099,
+      "step": 19867
+    },
+    {
+      "epoch": 0.19868,
+      "grad_norm": 1.223230386165742,
+      "learning_rate": 0.003,
+      "loss": 4.0177,
+      "step": 19868
+    },
+    {
+      "epoch": 0.19869,
+      "grad_norm": 1.1343795821090121,
+      "learning_rate": 0.003,
+      "loss": 4.0129,
+      "step": 19869
+    },
+    {
+      "epoch": 0.1987,
+      "grad_norm": 1.2313783666454252,
+      "learning_rate": 0.003,
+      "loss": 4.006,
+      "step": 19870
+    },
+    {
+      "epoch": 0.19871,
+      "grad_norm": 1.1124810180891331,
+      "learning_rate": 0.003,
+      "loss": 4.0086,
+      "step": 19871
+    },
+    {
+      "epoch": 0.19872,
+      "grad_norm": 1.122986648539543,
+      "learning_rate": 0.003,
+      "loss": 3.9892,
+      "step": 19872
+    },
+    {
+      "epoch": 0.19873,
+      "grad_norm": 1.4957523759581952,
+      "learning_rate": 0.003,
+      "loss": 4.0357,
+      "step": 19873
+    },
+    {
+      "epoch": 0.19874,
+      "grad_norm": 1.2538678226409337,
+      "learning_rate": 0.003,
+      "loss": 4.0169,
+      "step": 19874
+    },
+    {
+      "epoch": 0.19875,
+      "grad_norm": 1.3763175903101053,
+      "learning_rate": 0.003,
+      "loss": 3.9986,
+      "step": 19875
+    },
+    {
+      "epoch": 0.19876,
+      "grad_norm": 1.3769147680473421,
+      "learning_rate": 0.003,
+      "loss": 4.0187,
+      "step": 19876
+    },
+    {
+      "epoch": 0.19877,
+      "grad_norm": 1.240206570751857,
+      "learning_rate": 0.003,
+      "loss": 4.0121,
+      "step": 19877
+    },
+    {
+      "epoch": 0.19878,
+      "grad_norm": 1.4923959449465398,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 19878
+    },
+    {
+      "epoch": 0.19879,
+      "grad_norm": 1.4426497078834775,
+      "learning_rate": 0.003,
+      "loss": 3.975,
+      "step": 19879
+    },
+    {
+      "epoch": 0.1988,
+      "grad_norm": 1.5326782513880812,
+      "learning_rate": 0.003,
+      "loss": 4.0107,
+      "step": 19880
+    },
+    {
+      "epoch": 0.19881,
+      "grad_norm": 1.1198484349384787,
+      "learning_rate": 0.003,
+      "loss": 4.0035,
+      "step": 19881
+    },
+    {
+      "epoch": 0.19882,
+      "grad_norm": 1.4758766797996754,
+      "learning_rate": 0.003,
+      "loss": 4.0393,
+      "step": 19882
+    },
+    {
+      "epoch": 0.19883,
+      "grad_norm": 1.3020158184335777,
+      "learning_rate": 0.003,
+      "loss": 4.0184,
+      "step": 19883
+    },
+    {
+      "epoch": 0.19884,
+      "grad_norm": 1.3738726300410613,
+      "learning_rate": 0.003,
+      "loss": 4.0254,
+      "step": 19884
+    },
+    {
+      "epoch": 0.19885,
+      "grad_norm": 1.2538233171835351,
+      "learning_rate": 0.003,
+      "loss": 4.0275,
+      "step": 19885
+    },
+    {
+      "epoch": 0.19886,
+      "grad_norm": 1.2578411688474234,
+      "learning_rate": 0.003,
+      "loss": 3.9965,
+      "step": 19886
+    },
+    {
+      "epoch": 0.19887,
+      "grad_norm": 1.356752364377474,
+      "learning_rate": 0.003,
+      "loss": 4.0429,
+      "step": 19887
+    },
+    {
+      "epoch": 0.19888,
+      "grad_norm": 1.540066905196912,
+      "learning_rate": 0.003,
+      "loss": 4.0155,
+      "step": 19888
+    },
+    {
+      "epoch": 0.19889,
+      "grad_norm": 1.2019991754918868,
+      "learning_rate": 0.003,
+      "loss": 4.0025,
+      "step": 19889
+    },
+    {
+      "epoch": 0.1989,
+      "grad_norm": 1.2585274105606052,
+      "learning_rate": 0.003,
+      "loss": 4.0275,
+      "step": 19890
+    },
+    {
+      "epoch": 0.19891,
+      "grad_norm": 1.1130643779284155,
+      "learning_rate": 0.003,
+      "loss": 4.0111,
+      "step": 19891
+    },
+    {
+      "epoch": 0.19892,
+      "grad_norm": 1.346197602965055,
+      "learning_rate": 0.003,
+      "loss": 3.9954,
+      "step": 19892
+    },
+    {
+      "epoch": 0.19893,
+      "grad_norm": 1.3142039541052513,
+      "learning_rate": 0.003,
+      "loss": 4.0153,
+      "step": 19893
+    },
+    {
+      "epoch": 0.19894,
+      "grad_norm": 1.4499935452412276,
+      "learning_rate": 0.003,
+      "loss": 4.0099,
+      "step": 19894
+    },
+    {
+      "epoch": 0.19895,
+      "grad_norm": 1.4097271225315005,
+      "learning_rate": 0.003,
+      "loss": 4.038,
+      "step": 19895
+    },
+    {
+      "epoch": 0.19896,
+      "grad_norm": 1.278539844921079,
+      "learning_rate": 0.003,
+      "loss": 3.9989,
+      "step": 19896
+    },
+    {
+      "epoch": 0.19897,
+      "grad_norm": 1.4983820941940884,
+      "learning_rate": 0.003,
+      "loss": 3.9857,
+      "step": 19897
+    },
+    {
+      "epoch": 0.19898,
+      "grad_norm": 1.3140971611681886,
+      "learning_rate": 0.003,
+      "loss": 4.016,
+      "step": 19898
+    },
+    {
+      "epoch": 0.19899,
+      "grad_norm": 1.2677827863744955,
+      "learning_rate": 0.003,
+      "loss": 3.9779,
+      "step": 19899
+    },
+    {
+      "epoch": 0.199,
+      "grad_norm": 1.4003077985378822,
+      "learning_rate": 0.003,
+      "loss": 3.9992,
+      "step": 19900
+    },
+    {
+      "epoch": 0.19901,
+      "grad_norm": 1.2017085916412145,
+      "learning_rate": 0.003,
+      "loss": 3.9923,
+      "step": 19901
+    },
+    {
+      "epoch": 0.19902,
+      "grad_norm": 1.4066261250854917,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 19902
+    },
+    {
+      "epoch": 0.19903,
+      "grad_norm": 1.149929211457563,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 19903
+    },
+    {
+      "epoch": 0.19904,
+      "grad_norm": 1.4021669503030783,
+      "learning_rate": 0.003,
+      "loss": 4.0121,
+      "step": 19904
+    },
+    {
+      "epoch": 0.19905,
+      "grad_norm": 1.3332020748358346,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 19905
+    },
+    {
+      "epoch": 0.19906,
+      "grad_norm": 1.1980970172011012,
+      "learning_rate": 0.003,
+      "loss": 3.9863,
+      "step": 19906
+    },
+    {
+      "epoch": 0.19907,
+      "grad_norm": 1.5499950321433702,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 19907
+    },
+    {
+      "epoch": 0.19908,
+      "grad_norm": 1.3335967522562018,
+      "learning_rate": 0.003,
+      "loss": 3.9668,
+      "step": 19908
+    },
+    {
+      "epoch": 0.19909,
+      "grad_norm": 1.3178438098368022,
+      "learning_rate": 0.003,
+      "loss": 3.9901,
+      "step": 19909
+    },
+    {
+      "epoch": 0.1991,
+      "grad_norm": 1.1461851507997267,
+      "learning_rate": 0.003,
+      "loss": 3.9963,
+      "step": 19910
+    },
+    {
+      "epoch": 0.19911,
+      "grad_norm": 1.232874197462962,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 19911
+    },
+    {
+      "epoch": 0.19912,
+      "grad_norm": 1.327651192716843,
+      "learning_rate": 0.003,
+      "loss": 4.0155,
+      "step": 19912
+    },
+    {
+      "epoch": 0.19913,
+      "grad_norm": 1.1418981933750165,
+      "learning_rate": 0.003,
+      "loss": 4.0141,
+      "step": 19913
+    },
+    {
+      "epoch": 0.19914,
+      "grad_norm": 1.5386496412005504,
+      "learning_rate": 0.003,
+      "loss": 4.0013,
+      "step": 19914
+    },
+    {
+      "epoch": 0.19915,
+      "grad_norm": 1.0185695420834662,
+      "learning_rate": 0.003,
+      "loss": 4.0032,
+      "step": 19915
+    },
+    {
+      "epoch": 0.19916,
+      "grad_norm": 1.611532283397339,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 19916
+    },
+    {
+      "epoch": 0.19917,
+      "grad_norm": 1.0257260145852496,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 19917
+    },
+    {
+      "epoch": 0.19918,
+      "grad_norm": 1.5786020005498411,
+      "learning_rate": 0.003,
+      "loss": 4.0245,
+      "step": 19918
+    },
+    {
+      "epoch": 0.19919,
+      "grad_norm": 1.5006642093997518,
+      "learning_rate": 0.003,
+      "loss": 4.019,
+      "step": 19919
+    },
+    {
+      "epoch": 0.1992,
+      "grad_norm": 1.4091374141645097,
+      "learning_rate": 0.003,
+      "loss": 4.0294,
+      "step": 19920
+    },
+    {
+      "epoch": 0.19921,
+      "grad_norm": 0.9780913462589801,
+      "learning_rate": 0.003,
+      "loss": 4.0215,
+      "step": 19921
+    },
+    {
+      "epoch": 0.19922,
+      "grad_norm": 1.3135862334881405,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 19922
+    },
+    {
+      "epoch": 0.19923,
+      "grad_norm": 1.1272742738674693,
+      "learning_rate": 0.003,
+      "loss": 4.0299,
+      "step": 19923
+    },
+    {
+      "epoch": 0.19924,
+      "grad_norm": 1.4765541385377001,
+      "learning_rate": 0.003,
+      "loss": 4.0188,
+      "step": 19924
+    },
+    {
+      "epoch": 0.19925,
+      "grad_norm": 1.3555426971862763,
+      "learning_rate": 0.003,
+      "loss": 4.036,
+      "step": 19925
+    },
+    {
+      "epoch": 0.19926,
+      "grad_norm": 1.2239283352193775,
+      "learning_rate": 0.003,
+      "loss": 4.015,
+      "step": 19926
+    },
+    {
+      "epoch": 0.19927,
+      "grad_norm": 1.1140792333302996,
+      "learning_rate": 0.003,
+      "loss": 4.0048,
+      "step": 19927
+    },
+    {
+      "epoch": 0.19928,
+      "grad_norm": 1.4139228446583747,
+      "learning_rate": 0.003,
+      "loss": 4.0187,
+      "step": 19928
+    },
+    {
+      "epoch": 0.19929,
+      "grad_norm": 1.068354073531398,
+      "learning_rate": 0.003,
+      "loss": 4.0073,
+      "step": 19929
+    },
+    {
+      "epoch": 0.1993,
+      "grad_norm": 1.5084297239368216,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 19930
+    },
+    {
+      "epoch": 0.19931,
+      "grad_norm": 1.2842815142998567,
+      "learning_rate": 0.003,
+      "loss": 4.0492,
+      "step": 19931
+    },
+    {
+      "epoch": 0.19932,
+      "grad_norm": 1.2681605296451006,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 19932
+    },
+    {
+      "epoch": 0.19933,
+      "grad_norm": 1.0668443824768168,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 19933
+    },
+    {
+      "epoch": 0.19934,
+      "grad_norm": 1.347065457082421,
+      "learning_rate": 0.003,
+      "loss": 4.0258,
+      "step": 19934
+    },
+    {
+      "epoch": 0.19935,
+      "grad_norm": 1.2044594506341308,
+      "learning_rate": 0.003,
+      "loss": 3.9975,
+      "step": 19935
+    },
+    {
+      "epoch": 0.19936,
+      "grad_norm": 1.1635541263351354,
+      "learning_rate": 0.003,
+      "loss": 4.0266,
+      "step": 19936
+    },
+    {
+      "epoch": 0.19937,
+      "grad_norm": 1.367447608030504,
+      "learning_rate": 0.003,
+      "loss": 4.0107,
+      "step": 19937
+    },
+    {
+      "epoch": 0.19938,
+      "grad_norm": 1.2758116677340372,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 19938
+    },
+    {
+      "epoch": 0.19939,
+      "grad_norm": 1.1756831882235983,
+      "learning_rate": 0.003,
+      "loss": 4.0042,
+      "step": 19939
+    },
+    {
+      "epoch": 0.1994,
+      "grad_norm": 1.3299796255278968,
+      "learning_rate": 0.003,
+      "loss": 3.9871,
+      "step": 19940
+    },
+    {
+      "epoch": 0.19941,
+      "grad_norm": 1.2884697266558414,
+      "learning_rate": 0.003,
+      "loss": 4.026,
+      "step": 19941
+    },
+    {
+      "epoch": 0.19942,
+      "grad_norm": 1.1449187545607178,
+      "learning_rate": 0.003,
+      "loss": 3.9881,
+      "step": 19942
+    },
+    {
+      "epoch": 0.19943,
+      "grad_norm": 1.4859591304059343,
+      "learning_rate": 0.003,
+      "loss": 4.0375,
+      "step": 19943
+    },
+    {
+      "epoch": 0.19944,
+      "grad_norm": 1.0957539924182813,
+      "learning_rate": 0.003,
+      "loss": 4.0306,
+      "step": 19944
+    },
+    {
+      "epoch": 0.19945,
+      "grad_norm": 1.3378678776508732,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 19945
+    },
+    {
+      "epoch": 0.19946,
+      "grad_norm": 1.4819931482001178,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 19946
+    },
+    {
+      "epoch": 0.19947,
+      "grad_norm": 1.294077698382347,
+      "learning_rate": 0.003,
+      "loss": 3.9961,
+      "step": 19947
+    },
+    {
+      "epoch": 0.19948,
+      "grad_norm": 1.2265482459437984,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 19948
+    },
+    {
+      "epoch": 0.19949,
+      "grad_norm": 1.4934306418524002,
+      "learning_rate": 0.003,
+      "loss": 4.003,
+      "step": 19949
+    },
+    {
+      "epoch": 0.1995,
+      "grad_norm": 1.2508703182311427,
+      "learning_rate": 0.003,
+      "loss": 3.996,
+      "step": 19950
+    },
+    {
+      "epoch": 0.19951,
+      "grad_norm": 1.164793526173201,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 19951
+    },
+    {
+      "epoch": 0.19952,
+      "grad_norm": 1.4860610404809922,
+      "learning_rate": 0.003,
+      "loss": 4.0357,
+      "step": 19952
+    },
+    {
+      "epoch": 0.19953,
+      "grad_norm": 1.255059689304981,
+      "learning_rate": 0.003,
+      "loss": 3.9924,
+      "step": 19953
+    },
+    {
+      "epoch": 0.19954,
+      "grad_norm": 1.2160808296014287,
+      "learning_rate": 0.003,
+      "loss": 3.985,
+      "step": 19954
+    },
+    {
+      "epoch": 0.19955,
+      "grad_norm": 1.4832942389587467,
+      "learning_rate": 0.003,
+      "loss": 4.0053,
+      "step": 19955
+    },
+    {
+      "epoch": 0.19956,
+      "grad_norm": 1.1506199135947195,
+      "learning_rate": 0.003,
+      "loss": 3.9999,
+      "step": 19956
+    },
+    {
+      "epoch": 0.19957,
+      "grad_norm": 1.23649567328495,
+      "learning_rate": 0.003,
+      "loss": 3.9989,
+      "step": 19957
+    },
+    {
+      "epoch": 0.19958,
+      "grad_norm": 1.3458934293176252,
+      "learning_rate": 0.003,
+      "loss": 4.0339,
+      "step": 19958
+    },
+    {
+      "epoch": 0.19959,
+      "grad_norm": 1.3655953224892161,
+      "learning_rate": 0.003,
+      "loss": 4.0223,
+      "step": 19959
+    },
+    {
+      "epoch": 0.1996,
+      "grad_norm": 1.3044738879594926,
+      "learning_rate": 0.003,
+      "loss": 3.9969,
+      "step": 19960
+    },
+    {
+      "epoch": 0.19961,
+      "grad_norm": 1.3656005856402742,
+      "learning_rate": 0.003,
+      "loss": 4.0222,
+      "step": 19961
+    },
+    {
+      "epoch": 0.19962,
+      "grad_norm": 1.2196436402418755,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 19962
+    },
+    {
+      "epoch": 0.19963,
+      "grad_norm": 1.223435855406788,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 19963
+    },
+    {
+      "epoch": 0.19964,
+      "grad_norm": 1.0966402736545902,
+      "learning_rate": 0.003,
+      "loss": 3.9995,
+      "step": 19964
+    },
+    {
+      "epoch": 0.19965,
+      "grad_norm": 1.3802718574385502,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 19965
+    },
+    {
+      "epoch": 0.19966,
+      "grad_norm": 1.231443033853684,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 19966
+    },
+    {
+      "epoch": 0.19967,
+      "grad_norm": 1.3717819094638526,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 19967
+    },
+    {
+      "epoch": 0.19968,
+      "grad_norm": 1.1266311034223935,
+      "learning_rate": 0.003,
+      "loss": 3.994,
+      "step": 19968
+    },
+    {
+      "epoch": 0.19969,
+      "grad_norm": 1.2503275015265496,
+      "learning_rate": 0.003,
+      "loss": 3.9883,
+      "step": 19969
+    },
+    {
+      "epoch": 0.1997,
+      "grad_norm": 1.2000810702068685,
+      "learning_rate": 0.003,
+      "loss": 3.9929,
+      "step": 19970
+    },
+    {
+      "epoch": 0.19971,
+      "grad_norm": 1.3187373438292624,
+      "learning_rate": 0.003,
+      "loss": 3.973,
+      "step": 19971
+    },
+    {
+      "epoch": 0.19972,
+      "grad_norm": 1.2474299210918074,
+      "learning_rate": 0.003,
+      "loss": 4.0085,
+      "step": 19972
+    },
+    {
+      "epoch": 0.19973,
+      "grad_norm": 1.5067833226250715,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 19973
+    },
+    {
+      "epoch": 0.19974,
+      "grad_norm": 1.264432323155917,
+      "learning_rate": 0.003,
+      "loss": 3.9903,
+      "step": 19974
+    },
+    {
+      "epoch": 0.19975,
+      "grad_norm": 1.437462646132342,
+      "learning_rate": 0.003,
+      "loss": 3.9926,
+      "step": 19975
+    },
+    {
+      "epoch": 0.19976,
+      "grad_norm": 1.1779257514663124,
+      "learning_rate": 0.003,
+      "loss": 3.9967,
+      "step": 19976
+    },
+    {
+      "epoch": 0.19977,
+      "grad_norm": 1.4933212514645917,
+      "learning_rate": 0.003,
+      "loss": 3.9756,
+      "step": 19977
+    },
+    {
+      "epoch": 0.19978,
+      "grad_norm": 1.1039158051657205,
+      "learning_rate": 0.003,
+      "loss": 4.0328,
+      "step": 19978
+    },
+    {
+      "epoch": 0.19979,
+      "grad_norm": 1.4831708404398949,
+      "learning_rate": 0.003,
+      "loss": 4.0155,
+      "step": 19979
+    },
+    {
+      "epoch": 0.1998,
+      "grad_norm": 1.0937830712886094,
+      "learning_rate": 0.003,
+      "loss": 3.9983,
+      "step": 19980
+    },
+    {
+      "epoch": 0.19981,
+      "grad_norm": 1.3268229958031996,
+      "learning_rate": 0.003,
+      "loss": 3.9959,
+      "step": 19981
+    },
+    {
+      "epoch": 0.19982,
+      "grad_norm": 1.2029460435726707,
+      "learning_rate": 0.003,
+      "loss": 4.0141,
+      "step": 19982
+    },
+    {
+      "epoch": 0.19983,
+      "grad_norm": 1.2607148017431733,
+      "learning_rate": 0.003,
+      "loss": 4.0467,
+      "step": 19983
+    },
+    {
+      "epoch": 0.19984,
+      "grad_norm": 1.176885731672173,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 19984
+    },
+    {
+      "epoch": 0.19985,
+      "grad_norm": 1.355409636225859,
+      "learning_rate": 0.003,
+      "loss": 3.9954,
+      "step": 19985
+    },
+    {
+      "epoch": 0.19986,
+      "grad_norm": 1.2593861683226664,
+      "learning_rate": 0.003,
+      "loss": 3.9992,
+      "step": 19986
+    },
+    {
+      "epoch": 0.19987,
+      "grad_norm": 1.5982713930931183,
+      "learning_rate": 0.003,
+      "loss": 3.9826,
+      "step": 19987
+    },
+    {
+      "epoch": 0.19988,
+      "grad_norm": 1.2088792888274642,
+      "learning_rate": 0.003,
+      "loss": 3.9583,
+      "step": 19988
+    },
+    {
+      "epoch": 0.19989,
+      "grad_norm": 1.3268367608327727,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 19989
+    },
+    {
+      "epoch": 0.1999,
+      "grad_norm": 1.2187431402077036,
+      "learning_rate": 0.003,
+      "loss": 4.0096,
+      "step": 19990
+    },
+    {
+      "epoch": 0.19991,
+      "grad_norm": 1.1412597332587968,
+      "learning_rate": 0.003,
+      "loss": 3.9942,
+      "step": 19991
+    },
+    {
+      "epoch": 0.19992,
+      "grad_norm": 1.3208487753825884,
+      "learning_rate": 0.003,
+      "loss": 3.9872,
+      "step": 19992
+    },
+    {
+      "epoch": 0.19993,
+      "grad_norm": 1.1448332257967233,
+      "learning_rate": 0.003,
+      "loss": 4.0232,
+      "step": 19993
+    },
+    {
+      "epoch": 0.19994,
+      "grad_norm": 1.324365890403094,
+      "learning_rate": 0.003,
+      "loss": 4.0287,
+      "step": 19994
+    },
+    {
+      "epoch": 0.19995,
+      "grad_norm": 1.221510254969645,
+      "learning_rate": 0.003,
+      "loss": 4.0006,
+      "step": 19995
+    },
+    {
+      "epoch": 0.19996,
+      "grad_norm": 1.332054529635232,
+      "learning_rate": 0.003,
+      "loss": 3.984,
+      "step": 19996
+    },
+    {
+      "epoch": 0.19997,
+      "grad_norm": 1.2621766572128545,
+      "learning_rate": 0.003,
+      "loss": 3.9892,
+      "step": 19997
+    },
+    {
+      "epoch": 0.19998,
+      "grad_norm": 1.4821269222180264,
+      "learning_rate": 0.003,
+      "loss": 4.0296,
+      "step": 19998
+    },
+    {
+      "epoch": 0.19999,
+      "grad_norm": 1.2383755886705985,
+      "learning_rate": 0.003,
+      "loss": 3.988,
+      "step": 19999
+    },
+    {
+      "epoch": 0.2,
+      "grad_norm": 1.4557591721914918,
+      "learning_rate": 0.003,
+      "loss": 3.9894,
+      "step": 20000
+    },
+    {
+      "epoch": 0.20001,
+      "grad_norm": 1.2872108905424813,
+      "learning_rate": 0.003,
+      "loss": 3.981,
+      "step": 20001
+    },
+    {
+      "epoch": 0.20002,
+      "grad_norm": 1.2053813588690776,
+      "learning_rate": 0.003,
+      "loss": 4.0001,
+      "step": 20002
+    },
+    {
+      "epoch": 0.20003,
+      "grad_norm": 1.4003686859177387,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 20003
+    },
+    {
+      "epoch": 0.20004,
+      "grad_norm": 1.1003969393093953,
+      "learning_rate": 0.003,
+      "loss": 3.9871,
+      "step": 20004
+    },
+    {
+      "epoch": 0.20005,
+      "grad_norm": 1.4342306664500517,
+      "learning_rate": 0.003,
+      "loss": 4.02,
+      "step": 20005
+    },
+    {
+      "epoch": 0.20006,
+      "grad_norm": 1.2429183425432138,
+      "learning_rate": 0.003,
+      "loss": 3.9722,
+      "step": 20006
+    },
+    {
+      "epoch": 0.20007,
+      "grad_norm": 1.3933561646927375,
+      "learning_rate": 0.003,
+      "loss": 3.9802,
+      "step": 20007
+    },
+    {
+      "epoch": 0.20008,
+      "grad_norm": 1.2390480037790046,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 20008
+    },
+    {
+      "epoch": 0.20009,
+      "grad_norm": 1.3427409829587686,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 20009
+    },
+    {
+      "epoch": 0.2001,
+      "grad_norm": 1.3159038348492624,
+      "learning_rate": 0.003,
+      "loss": 4.0,
+      "step": 20010
+    },
+    {
+      "epoch": 0.20011,
+      "grad_norm": 1.2899834821602063,
+      "learning_rate": 0.003,
+      "loss": 4.0229,
+      "step": 20011
+    },
+    {
+      "epoch": 0.20012,
+      "grad_norm": 1.1668848801126719,
+      "learning_rate": 0.003,
+      "loss": 4.0209,
+      "step": 20012
+    },
+    {
+      "epoch": 0.20013,
+      "grad_norm": 1.1642712714507102,
+      "learning_rate": 0.003,
+      "loss": 4.0309,
+      "step": 20013
+    },
+    {
+      "epoch": 0.20014,
+      "grad_norm": 1.3006093286438671,
+      "learning_rate": 0.003,
+      "loss": 4.0086,
+      "step": 20014
+    },
+    {
+      "epoch": 0.20015,
+      "grad_norm": 1.1177349605233404,
+      "learning_rate": 0.003,
+      "loss": 3.9962,
+      "step": 20015
+    },
+    {
+      "epoch": 0.20016,
+      "grad_norm": 1.0024671816096333,
+      "learning_rate": 0.003,
+      "loss": 4.015,
+      "step": 20016
+    },
+    {
+      "epoch": 0.20017,
+      "grad_norm": 1.572375924836335,
+      "learning_rate": 0.003,
+      "loss": 3.9896,
+      "step": 20017
+    },
+    {
+      "epoch": 0.20018,
+      "grad_norm": 1.3004790977845826,
+      "learning_rate": 0.003,
+      "loss": 4.0036,
+      "step": 20018
+    },
+    {
+      "epoch": 0.20019,
+      "grad_norm": 1.481719642055324,
+      "learning_rate": 0.003,
+      "loss": 4.0194,
+      "step": 20019
+    },
+    {
+      "epoch": 0.2002,
+      "grad_norm": 1.1059269512091963,
+      "learning_rate": 0.003,
+      "loss": 4.0029,
+      "step": 20020
+    },
+    {
+      "epoch": 0.20021,
+      "grad_norm": 1.6076807970962637,
+      "learning_rate": 0.003,
+      "loss": 4.0466,
+      "step": 20021
+    },
+    {
+      "epoch": 0.20022,
+      "grad_norm": 1.4272304241824465,
+      "learning_rate": 0.003,
+      "loss": 4.0228,
+      "step": 20022
+    },
+    {
+      "epoch": 0.20023,
+      "grad_norm": 1.178696720969443,
+      "learning_rate": 0.003,
+      "loss": 4.0084,
+      "step": 20023
+    },
+    {
+      "epoch": 0.20024,
+      "grad_norm": 1.394151581191937,
+      "learning_rate": 0.003,
+      "loss": 4.0068,
+      "step": 20024
+    },
+    {
+      "epoch": 0.20025,
+      "grad_norm": 1.4613247298003293,
+      "learning_rate": 0.003,
+      "loss": 3.9916,
+      "step": 20025
+    },
+    {
+      "epoch": 0.20026,
+      "grad_norm": 1.2082960902802726,
+      "learning_rate": 0.003,
+      "loss": 4.0009,
+      "step": 20026
+    },
+    {
+      "epoch": 0.20027,
+      "grad_norm": 1.3249875445929593,
+      "learning_rate": 0.003,
+      "loss": 4.0121,
+      "step": 20027
+    },
+    {
+      "epoch": 0.20028,
+      "grad_norm": 1.1242590630173444,
+      "learning_rate": 0.003,
+      "loss": 3.9858,
+      "step": 20028
+    },
+    {
+      "epoch": 0.20029,
+      "grad_norm": 1.4352881803928175,
+      "learning_rate": 0.003,
+      "loss": 3.985,
+      "step": 20029
+    },
+    {
+      "epoch": 0.2003,
+      "grad_norm": 1.2221350940695344,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 20030
+    },
+    {
+      "epoch": 0.20031,
+      "grad_norm": 1.2304864519730752,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 20031
+    },
+    {
+      "epoch": 0.20032,
+      "grad_norm": 1.3644401921709184,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 20032
+    },
+    {
+      "epoch": 0.20033,
+      "grad_norm": 1.259897526731243,
+      "learning_rate": 0.003,
+      "loss": 4.008,
+      "step": 20033
+    },
+    {
+      "epoch": 0.20034,
+      "grad_norm": 1.5434712026957687,
+      "learning_rate": 0.003,
+      "loss": 4.0013,
+      "step": 20034
+    },
+    {
+      "epoch": 0.20035,
+      "grad_norm": 1.2918860847668912,
+      "learning_rate": 0.003,
+      "loss": 4.0075,
+      "step": 20035
+    },
+    {
+      "epoch": 0.20036,
+      "grad_norm": 1.5923452257821784,
+      "learning_rate": 0.003,
+      "loss": 4.0455,
+      "step": 20036
+    },
+    {
+      "epoch": 0.20037,
+      "grad_norm": 1.2648145937080033,
+      "learning_rate": 0.003,
+      "loss": 3.999,
+      "step": 20037
+    },
+    {
+      "epoch": 0.20038,
+      "grad_norm": 1.2699819487430486,
+      "learning_rate": 0.003,
+      "loss": 4.0039,
+      "step": 20038
+    },
+    {
+      "epoch": 0.20039,
+      "grad_norm": 1.2841294478366938,
+      "learning_rate": 0.003,
+      "loss": 3.9833,
+      "step": 20039
+    },
+    {
+      "epoch": 0.2004,
+      "grad_norm": 1.33632210794439,
+      "learning_rate": 0.003,
+      "loss": 4.0237,
+      "step": 20040
+    },
+    {
+      "epoch": 0.20041,
+      "grad_norm": 1.298100771622154,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 20041
+    },
+    {
+      "epoch": 0.20042,
+      "grad_norm": 1.1849981863708918,
+      "learning_rate": 0.003,
+      "loss": 4.0466,
+      "step": 20042
+    },
+    {
+      "epoch": 0.20043,
+      "grad_norm": 1.2435358469967919,
+      "learning_rate": 0.003,
+      "loss": 4.0065,
+      "step": 20043
+    },
+    {
+      "epoch": 0.20044,
+      "grad_norm": 1.3633863411001568,
+      "learning_rate": 0.003,
+      "loss": 4.001,
+      "step": 20044
+    },
+    {
+      "epoch": 0.20045,
+      "grad_norm": 1.3442885354413563,
+      "learning_rate": 0.003,
+      "loss": 3.9906,
+      "step": 20045
+    },
+    {
+      "epoch": 0.20046,
+      "grad_norm": 1.5520787810014827,
+      "learning_rate": 0.003,
+      "loss": 3.9878,
+      "step": 20046
+    },
+    {
+      "epoch": 0.20047,
+      "grad_norm": 1.0670731132203308,
+      "learning_rate": 0.003,
+      "loss": 3.9856,
+      "step": 20047
+    },
+    {
+      "epoch": 0.20048,
+      "grad_norm": 1.4191995908315127,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 20048
+    },
+    {
+      "epoch": 0.20049,
+      "grad_norm": 1.1899697103030682,
+      "learning_rate": 0.003,
+      "loss": 3.9945,
+      "step": 20049
+    },
+    {
+      "epoch": 0.2005,
+      "grad_norm": 1.3281054822577516,
+      "learning_rate": 0.003,
+      "loss": 4.0013,
+      "step": 20050
+    },
+    {
+      "epoch": 0.20051,
+      "grad_norm": 1.4123263689267866,
+      "learning_rate": 0.003,
+      "loss": 4.0057,
+      "step": 20051
+    },
+    {
+      "epoch": 0.20052,
+      "grad_norm": 1.344657615520193,
+      "learning_rate": 0.003,
+      "loss": 4.0019,
+      "step": 20052
+    },
+    {
+      "epoch": 0.20053,
+      "grad_norm": 1.1895858299547728,
+      "learning_rate": 0.003,
+      "loss": 3.997,
+      "step": 20053
+    },
+    {
+      "epoch": 0.20054,
+      "grad_norm": 1.2619835687206515,
+      "learning_rate": 0.003,
+      "loss": 4.0186,
+      "step": 20054
+    },
+    {
+      "epoch": 0.20055,
+      "grad_norm": 1.2723581474773138,
+      "learning_rate": 0.003,
+      "loss": 3.9994,
+      "step": 20055
+    },
+    {
+      "epoch": 0.20056,
+      "grad_norm": 1.3225633007851507,
+      "learning_rate": 0.003,
+      "loss": 4.0306,
+      "step": 20056
+    },
+    {
+      "epoch": 0.20057,
+      "grad_norm": 1.365413629076785,
+      "learning_rate": 0.003,
+      "loss": 3.9981,
+      "step": 20057
+    },
+    {
+      "epoch": 0.20058,
+      "grad_norm": 1.2220705717607125,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 20058
+    },
+    {
+      "epoch": 0.20059,
+      "grad_norm": 1.3245988678828018,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 20059
+    },
+    {
+      "epoch": 0.2006,
+      "grad_norm": 1.1725317711954784,
+      "learning_rate": 0.003,
+      "loss": 3.9815,
+      "step": 20060
+    },
+    {
+      "epoch": 0.20061,
+      "grad_norm": 1.5272780825576795,
+      "learning_rate": 0.003,
+      "loss": 4.0291,
+      "step": 20061
+    },
+    {
+      "epoch": 0.20062,
+      "grad_norm": 1.3282626852957824,
+      "learning_rate": 0.003,
+      "loss": 4.0292,
+      "step": 20062
+    },
+    {
+      "epoch": 0.20063,
+      "grad_norm": 1.5302396317146756,
+      "learning_rate": 0.003,
+      "loss": 4.0471,
+      "step": 20063
+    },
+    {
+      "epoch": 0.20064,
+      "grad_norm": 1.1765431266454116,
+      "learning_rate": 0.003,
+      "loss": 4.0381,
+      "step": 20064
+    },
+    {
+      "epoch": 0.20065,
+      "grad_norm": 1.3309505004290878,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 20065
+    },
+    {
+      "epoch": 0.20066,
+      "grad_norm": 1.2444476656383605,
+      "learning_rate": 0.003,
+      "loss": 3.9748,
+      "step": 20066
+    },
+    {
+      "epoch": 0.20067,
+      "grad_norm": 1.454187621773579,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 20067
+    },
+    {
+      "epoch": 0.20068,
+      "grad_norm": 1.2170824440586727,
+      "learning_rate": 0.003,
+      "loss": 3.9877,
+      "step": 20068
+    },
+    {
+      "epoch": 0.20069,
+      "grad_norm": 1.4814436309680565,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 20069
+    },
+    {
+      "epoch": 0.2007,
+      "grad_norm": 1.2886756920520606,
+      "learning_rate": 0.003,
+      "loss": 4.0326,
+      "step": 20070
+    },
+    {
+      "epoch": 0.20071,
+      "grad_norm": 1.115153442763866,
+      "learning_rate": 0.003,
+      "loss": 4.0004,
+      "step": 20071
+    },
+    {
+      "epoch": 0.20072,
+      "grad_norm": 1.1954032357491189,
+      "learning_rate": 0.003,
+      "loss": 3.961,
+      "step": 20072
+    },
+    {
+      "epoch": 0.20073,
+      "grad_norm": 1.246933428026038,
+      "learning_rate": 0.003,
+      "loss": 3.9696,
+      "step": 20073
+    },
+    {
+      "epoch": 0.20074,
+      "grad_norm": 1.1385288952762787,
+      "learning_rate": 0.003,
+      "loss": 3.9916,
+      "step": 20074
+    },
+    {
+      "epoch": 0.20075,
+      "grad_norm": 1.2424746403434173,
+      "learning_rate": 0.003,
+      "loss": 3.9963,
+      "step": 20075
+    },
+    {
+      "epoch": 0.20076,
+      "grad_norm": 1.1431095852525643,
+      "learning_rate": 0.003,
+      "loss": 4.0115,
+      "step": 20076
+    },
+    {
+      "epoch": 0.20077,
+      "grad_norm": 1.1367546583818051,
+      "learning_rate": 0.003,
+      "loss": 3.9722,
+      "step": 20077
+    },
+    {
+      "epoch": 0.20078,
+      "grad_norm": 1.3415782590391023,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 20078
+    },
+    {
+      "epoch": 0.20079,
+      "grad_norm": 1.0971663948510084,
+      "learning_rate": 0.003,
+      "loss": 3.9892,
+      "step": 20079
+    },
+    {
+      "epoch": 0.2008,
+      "grad_norm": 1.7216741097283537,
+      "learning_rate": 0.003,
+      "loss": 4.0228,
+      "step": 20080
+    },
+    {
+      "epoch": 0.20081,
+      "grad_norm": 1.2054948883065015,
+      "learning_rate": 0.003,
+      "loss": 3.9977,
+      "step": 20081
+    },
+    {
+      "epoch": 0.20082,
+      "grad_norm": 1.3338790861123881,
+      "learning_rate": 0.003,
+      "loss": 4.0238,
+      "step": 20082
+    },
+    {
+      "epoch": 0.20083,
+      "grad_norm": 1.2717671202422345,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 20083
+    },
+    {
+      "epoch": 0.20084,
+      "grad_norm": 1.307149328463756,
+      "learning_rate": 0.003,
+      "loss": 3.9908,
+      "step": 20084
+    },
+    {
+      "epoch": 0.20085,
+      "grad_norm": 1.2979914114623365,
+      "learning_rate": 0.003,
+      "loss": 4.0273,
+      "step": 20085
+    },
+    {
+      "epoch": 0.20086,
+      "grad_norm": 1.4206074068510508,
+      "learning_rate": 0.003,
+      "loss": 4.0011,
+      "step": 20086
+    },
+    {
+      "epoch": 0.20087,
+      "grad_norm": 1.0192147372436002,
+      "learning_rate": 0.003,
+      "loss": 4.0092,
+      "step": 20087
+    },
+    {
+      "epoch": 0.20088,
+      "grad_norm": 1.6587732395138055,
+      "learning_rate": 0.003,
+      "loss": 4.0012,
+      "step": 20088
+    },
+    {
+      "epoch": 0.20089,
+      "grad_norm": 1.0145247831716289,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 20089
+    },
+    {
+      "epoch": 0.2009,
+      "grad_norm": 1.517183870317736,
+      "learning_rate": 0.003,
+      "loss": 4.0151,
+      "step": 20090
+    },
+    {
+      "epoch": 0.20091,
+      "grad_norm": 1.1287122468763793,
+      "learning_rate": 0.003,
+      "loss": 4.0245,
+      "step": 20091
+    },
+    {
+      "epoch": 0.20092,
+      "grad_norm": 1.4065941168792935,
+      "learning_rate": 0.003,
+      "loss": 4.032,
+      "step": 20092
+    },
+    {
+      "epoch": 0.20093,
+      "grad_norm": 1.5152175609115452,
+      "learning_rate": 0.003,
+      "loss": 4.022,
+      "step": 20093
+    },
+    {
+      "epoch": 0.20094,
+      "grad_norm": 1.2557890165383672,
+      "learning_rate": 0.003,
+      "loss": 4.0011,
+      "step": 20094
+    },
+    {
+      "epoch": 0.20095,
+      "grad_norm": 1.4765759908029792,
+      "learning_rate": 0.003,
+      "loss": 4.0439,
+      "step": 20095
+    },
+    {
+      "epoch": 0.20096,
+      "grad_norm": 1.4147471159240106,
+      "learning_rate": 0.003,
+      "loss": 3.9984,
+      "step": 20096
+    },
+    {
+      "epoch": 0.20097,
+      "grad_norm": 1.2295943893767531,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 20097
+    },
+    {
+      "epoch": 0.20098,
+      "grad_norm": 1.3818534289791886,
+      "learning_rate": 0.003,
+      "loss": 4.002,
+      "step": 20098
+    },
+    {
+      "epoch": 0.20099,
+      "grad_norm": 1.109535252396866,
+      "learning_rate": 0.003,
+      "loss": 4.024,
+      "step": 20099
+    },
+    {
+      "epoch": 0.201,
+      "grad_norm": 1.3466977091540862,
+      "learning_rate": 0.003,
+      "loss": 4.0024,
+      "step": 20100
+    },
+    {
+      "epoch": 0.20101,
+      "grad_norm": 1.1591770302731246,
+      "learning_rate": 0.003,
+      "loss": 3.9896,
+      "step": 20101
+    },
+    {
+      "epoch": 0.20102,
+      "grad_norm": 1.472840707378827,
+      "learning_rate": 0.003,
+      "loss": 4.0364,
+      "step": 20102
+    },
+    {
+      "epoch": 0.20103,
+      "grad_norm": 1.4403623555508618,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 20103
+    },
+    {
+      "epoch": 0.20104,
+      "grad_norm": 1.0350042730914473,
+      "learning_rate": 0.003,
+      "loss": 4.0186,
+      "step": 20104
+    },
+    {
+      "epoch": 0.20105,
+      "grad_norm": 1.393050291190935,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 20105
+    },
+    {
+      "epoch": 0.20106,
+      "grad_norm": 1.149268736097747,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 20106
+    },
+    {
+      "epoch": 0.20107,
+      "grad_norm": 1.4875169346336197,
+      "learning_rate": 0.003,
+      "loss": 4.02,
+      "step": 20107
+    },
+    {
+      "epoch": 0.20108,
+      "grad_norm": 0.9980571787630449,
+      "learning_rate": 0.003,
+      "loss": 4.0282,
+      "step": 20108
+    },
+    {
+      "epoch": 0.20109,
+      "grad_norm": 1.5526839289477394,
+      "learning_rate": 0.003,
+      "loss": 4.034,
+      "step": 20109
+    },
+    {
+      "epoch": 0.2011,
+      "grad_norm": 0.9384169542455092,
+      "learning_rate": 0.003,
+      "loss": 4.0107,
+      "step": 20110
+    },
+    {
+      "epoch": 0.20111,
+      "grad_norm": 1.5901805633703026,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 20111
+    },
+    {
+      "epoch": 0.20112,
+      "grad_norm": 1.429652090642894,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 20112
+    },
+    {
+      "epoch": 0.20113,
+      "grad_norm": 1.1906045873467526,
+      "learning_rate": 0.003,
+      "loss": 4.0288,
+      "step": 20113
+    },
+    {
+      "epoch": 0.20114,
+      "grad_norm": 1.4432488779744677,
+      "learning_rate": 0.003,
+      "loss": 4.0078,
+      "step": 20114
+    },
+    {
+      "epoch": 0.20115,
+      "grad_norm": 1.385667914005685,
+      "learning_rate": 0.003,
+      "loss": 4.0527,
+      "step": 20115
+    },
+    {
+      "epoch": 0.20116,
+      "grad_norm": 1.4244009552049495,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 20116
+    },
+    {
+      "epoch": 0.20117,
+      "grad_norm": 1.2827299199486064,
+      "learning_rate": 0.003,
+      "loss": 4.0072,
+      "step": 20117
+    },
+    {
+      "epoch": 0.20118,
+      "grad_norm": 1.4885227076803627,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 20118
+    },
+    {
+      "epoch": 0.20119,
+      "grad_norm": 1.1422224850100746,
+      "learning_rate": 0.003,
+      "loss": 4.0213,
+      "step": 20119
+    },
+    {
+      "epoch": 0.2012,
+      "grad_norm": 1.5110093400210558,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 20120
+    },
+    {
+      "epoch": 0.20121,
+      "grad_norm": 1.1259204309102298,
+      "learning_rate": 0.003,
+      "loss": 4.0287,
+      "step": 20121
+    },
+    {
+      "epoch": 0.20122,
+      "grad_norm": 1.4670824057585605,
+      "learning_rate": 0.003,
+      "loss": 4.0511,
+      "step": 20122
+    },
+    {
+      "epoch": 0.20123,
+      "grad_norm": 1.1807438985917096,
+      "learning_rate": 0.003,
+      "loss": 4.0371,
+      "step": 20123
+    },
+    {
+      "epoch": 0.20124,
+      "grad_norm": 1.2746016742055788,
+      "learning_rate": 0.003,
+      "loss": 4.0011,
+      "step": 20124
+    },
+    {
+      "epoch": 0.20125,
+      "grad_norm": 1.2434507795806846,
+      "learning_rate": 0.003,
+      "loss": 4.0041,
+      "step": 20125
+    },
+    {
+      "epoch": 0.20126,
+      "grad_norm": 1.3278992693902891,
+      "learning_rate": 0.003,
+      "loss": 3.9932,
+      "step": 20126
+    },
+    {
+      "epoch": 0.20127,
+      "grad_norm": 1.2855424228037862,
+      "learning_rate": 0.003,
+      "loss": 4.0046,
+      "step": 20127
+    },
+    {
+      "epoch": 0.20128,
+      "grad_norm": 1.3123046599317134,
+      "learning_rate": 0.003,
+      "loss": 4.0364,
+      "step": 20128
+    },
+    {
+      "epoch": 0.20129,
+      "grad_norm": 1.107544969634977,
+      "learning_rate": 0.003,
+      "loss": 4.0119,
+      "step": 20129
+    },
+    {
+      "epoch": 0.2013,
+      "grad_norm": 1.6826516446291635,
+      "learning_rate": 0.003,
+      "loss": 4.041,
+      "step": 20130
+    },
+    {
+      "epoch": 0.20131,
+      "grad_norm": 1.257371993334881,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 20131
+    },
+    {
+      "epoch": 0.20132,
+      "grad_norm": 1.4243106118614248,
+      "learning_rate": 0.003,
+      "loss": 4.0326,
+      "step": 20132
+    },
+    {
+      "epoch": 0.20133,
+      "grad_norm": 1.0949220481466881,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 20133
+    },
+    {
+      "epoch": 0.20134,
+      "grad_norm": 1.2920476144233841,
+      "learning_rate": 0.003,
+      "loss": 3.996,
+      "step": 20134
+    },
+    {
+      "epoch": 0.20135,
+      "grad_norm": 1.135408557241526,
+      "learning_rate": 0.003,
+      "loss": 4.016,
+      "step": 20135
+    },
+    {
+      "epoch": 0.20136,
+      "grad_norm": 1.3402672333918495,
+      "learning_rate": 0.003,
+      "loss": 4.0281,
+      "step": 20136
+    },
+    {
+      "epoch": 0.20137,
+      "grad_norm": 1.3975384167418234,
+      "learning_rate": 0.003,
+      "loss": 3.9846,
+      "step": 20137
+    },
+    {
+      "epoch": 0.20138,
+      "grad_norm": 1.2811500288043876,
+      "learning_rate": 0.003,
+      "loss": 4.0214,
+      "step": 20138
+    },
+    {
+      "epoch": 0.20139,
+      "grad_norm": 1.2901415308452353,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 20139
+    },
+    {
+      "epoch": 0.2014,
+      "grad_norm": 1.182963869393872,
+      "learning_rate": 0.003,
+      "loss": 3.9874,
+      "step": 20140
+    },
+    {
+      "epoch": 0.20141,
+      "grad_norm": 1.2209959804839656,
+      "learning_rate": 0.003,
+      "loss": 4.001,
+      "step": 20141
+    },
+    {
+      "epoch": 0.20142,
+      "grad_norm": 1.2332097681123626,
+      "learning_rate": 0.003,
+      "loss": 3.9911,
+      "step": 20142
+    },
+    {
+      "epoch": 0.20143,
+      "grad_norm": 1.0745254812982972,
+      "learning_rate": 0.003,
+      "loss": 3.993,
+      "step": 20143
+    },
+    {
+      "epoch": 0.20144,
+      "grad_norm": 1.2744526919235004,
+      "learning_rate": 0.003,
+      "loss": 3.9898,
+      "step": 20144
+    },
+    {
+      "epoch": 0.20145,
+      "grad_norm": 1.18420265775978,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 20145
+    },
+    {
+      "epoch": 0.20146,
+      "grad_norm": 1.4372379403279205,
+      "learning_rate": 0.003,
+      "loss": 4.035,
+      "step": 20146
+    },
+    {
+      "epoch": 0.20147,
+      "grad_norm": 1.3169521933254276,
+      "learning_rate": 0.003,
+      "loss": 4.0177,
+      "step": 20147
+    },
+    {
+      "epoch": 0.20148,
+      "grad_norm": 1.3744205542397776,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 20148
+    },
+    {
+      "epoch": 0.20149,
+      "grad_norm": 1.2093005754059674,
+      "learning_rate": 0.003,
+      "loss": 3.9994,
+      "step": 20149
+    },
+    {
+      "epoch": 0.2015,
+      "grad_norm": 1.25458693690898,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 20150
+    },
+    {
+      "epoch": 0.20151,
+      "grad_norm": 1.1773862214312012,
+      "learning_rate": 0.003,
+      "loss": 4.0013,
+      "step": 20151
+    },
+    {
+      "epoch": 0.20152,
+      "grad_norm": 1.4180178744550276,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 20152
+    },
+    {
+      "epoch": 0.20153,
+      "grad_norm": 1.1778560617955687,
+      "learning_rate": 0.003,
+      "loss": 3.99,
+      "step": 20153
+    },
+    {
+      "epoch": 0.20154,
+      "grad_norm": 1.1779384188513027,
+      "learning_rate": 0.003,
+      "loss": 4.0302,
+      "step": 20154
+    },
+    {
+      "epoch": 0.20155,
+      "grad_norm": 1.2536376136623886,
+      "learning_rate": 0.003,
+      "loss": 3.9987,
+      "step": 20155
+    },
+    {
+      "epoch": 0.20156,
+      "grad_norm": 1.3728855827149575,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 20156
+    },
+    {
+      "epoch": 0.20157,
+      "grad_norm": 1.343386900313245,
+      "learning_rate": 0.003,
+      "loss": 3.9821,
+      "step": 20157
+    },
+    {
+      "epoch": 0.20158,
+      "grad_norm": 1.3618232156497776,
+      "learning_rate": 0.003,
+      "loss": 3.9893,
+      "step": 20158
+    },
+    {
+      "epoch": 0.20159,
+      "grad_norm": 1.3728458800836976,
+      "learning_rate": 0.003,
+      "loss": 3.9784,
+      "step": 20159
+    },
+    {
+      "epoch": 0.2016,
+      "grad_norm": 1.1283477996506779,
+      "learning_rate": 0.003,
+      "loss": 3.9831,
+      "step": 20160
+    },
+    {
+      "epoch": 0.20161,
+      "grad_norm": 1.584751284728246,
+      "learning_rate": 0.003,
+      "loss": 4.0093,
+      "step": 20161
+    },
+    {
+      "epoch": 0.20162,
+      "grad_norm": 1.1737340645649315,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 20162
+    },
+    {
+      "epoch": 0.20163,
+      "grad_norm": 1.2790645862333698,
+      "learning_rate": 0.003,
+      "loss": 4.0188,
+      "step": 20163
+    },
+    {
+      "epoch": 0.20164,
+      "grad_norm": 1.231813768294353,
+      "learning_rate": 0.003,
+      "loss": 4.0404,
+      "step": 20164
+    },
+    {
+      "epoch": 0.20165,
+      "grad_norm": 1.2454239838143542,
+      "learning_rate": 0.003,
+      "loss": 3.9814,
+      "step": 20165
+    },
+    {
+      "epoch": 0.20166,
+      "grad_norm": 1.3922839597941756,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 20166
+    },
+    {
+      "epoch": 0.20167,
+      "grad_norm": 1.048922364357797,
+      "learning_rate": 0.003,
+      "loss": 3.9788,
+      "step": 20167
+    },
+    {
+      "epoch": 0.20168,
+      "grad_norm": 1.3596808715150543,
+      "learning_rate": 0.003,
+      "loss": 4.0092,
+      "step": 20168
+    },
+    {
+      "epoch": 0.20169,
+      "grad_norm": 1.3290999734916824,
+      "learning_rate": 0.003,
+      "loss": 4.0014,
+      "step": 20169
+    },
+    {
+      "epoch": 0.2017,
+      "grad_norm": 1.3226337338361225,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 20170
+    },
+    {
+      "epoch": 0.20171,
+      "grad_norm": 1.330582325554692,
+      "learning_rate": 0.003,
+      "loss": 4.0337,
+      "step": 20171
+    },
+    {
+      "epoch": 0.20172,
+      "grad_norm": 1.2093047586735512,
+      "learning_rate": 0.003,
+      "loss": 4.0229,
+      "step": 20172
+    },
+    {
+      "epoch": 0.20173,
+      "grad_norm": 1.2833758445854349,
+      "learning_rate": 0.003,
+      "loss": 3.9657,
+      "step": 20173
+    },
+    {
+      "epoch": 0.20174,
+      "grad_norm": 1.2298504324394215,
+      "learning_rate": 0.003,
+      "loss": 3.9846,
+      "step": 20174
+    },
+    {
+      "epoch": 0.20175,
+      "grad_norm": 1.7804306903211693,
+      "learning_rate": 0.003,
+      "loss": 4.0238,
+      "step": 20175
+    },
+    {
+      "epoch": 0.20176,
+      "grad_norm": 0.977935165562048,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 20176
+    },
+    {
+      "epoch": 0.20177,
+      "grad_norm": 1.546406717352743,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 20177
+    },
+    {
+      "epoch": 0.20178,
+      "grad_norm": 0.9882514453495875,
+      "learning_rate": 0.003,
+      "loss": 4.0013,
+      "step": 20178
+    },
+    {
+      "epoch": 0.20179,
+      "grad_norm": 1.7550199528474113,
+      "learning_rate": 0.003,
+      "loss": 4.0294,
+      "step": 20179
+    },
+    {
+      "epoch": 0.2018,
+      "grad_norm": 1.1257262526802088,
+      "learning_rate": 0.003,
+      "loss": 4.0282,
+      "step": 20180
+    },
+    {
+      "epoch": 0.20181,
+      "grad_norm": 1.1523457169554314,
+      "learning_rate": 0.003,
+      "loss": 3.9936,
+      "step": 20181
+    },
+    {
+      "epoch": 0.20182,
+      "grad_norm": 1.4302406555160894,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 20182
+    },
+    {
+      "epoch": 0.20183,
+      "grad_norm": 1.1754735762686452,
+      "learning_rate": 0.003,
+      "loss": 4.0014,
+      "step": 20183
+    },
+    {
+      "epoch": 0.20184,
+      "grad_norm": 1.298474774973355,
+      "learning_rate": 0.003,
+      "loss": 4.053,
+      "step": 20184
+    },
+    {
+      "epoch": 0.20185,
+      "grad_norm": 1.2844971546398924,
+      "learning_rate": 0.003,
+      "loss": 3.995,
+      "step": 20185
+    },
+    {
+      "epoch": 0.20186,
+      "grad_norm": 1.3426691354966496,
+      "learning_rate": 0.003,
+      "loss": 4.019,
+      "step": 20186
+    },
+    {
+      "epoch": 0.20187,
+      "grad_norm": 1.508097314747256,
+      "learning_rate": 0.003,
+      "loss": 4.0173,
+      "step": 20187
+    },
+    {
+      "epoch": 0.20188,
+      "grad_norm": 1.223706888537585,
+      "learning_rate": 0.003,
+      "loss": 4.0371,
+      "step": 20188
+    },
+    {
+      "epoch": 0.20189,
+      "grad_norm": 1.222043157625563,
+      "learning_rate": 0.003,
+      "loss": 3.991,
+      "step": 20189
+    },
+    {
+      "epoch": 0.2019,
+      "grad_norm": 1.320343981670849,
+      "learning_rate": 0.003,
+      "loss": 4.0184,
+      "step": 20190
+    },
+    {
+      "epoch": 0.20191,
+      "grad_norm": 1.2618735336780729,
+      "learning_rate": 0.003,
+      "loss": 4.036,
+      "step": 20191
+    },
+    {
+      "epoch": 0.20192,
+      "grad_norm": 1.400327710716091,
+      "learning_rate": 0.003,
+      "loss": 4.022,
+      "step": 20192
+    },
+    {
+      "epoch": 0.20193,
+      "grad_norm": 1.01807780330107,
+      "learning_rate": 0.003,
+      "loss": 3.9881,
+      "step": 20193
+    },
+    {
+      "epoch": 0.20194,
+      "grad_norm": 1.7488995079542997,
+      "learning_rate": 0.003,
+      "loss": 4.0329,
+      "step": 20194
+    },
+    {
+      "epoch": 0.20195,
+      "grad_norm": 1.1083844011298842,
+      "learning_rate": 0.003,
+      "loss": 4.0648,
+      "step": 20195
+    },
+    {
+      "epoch": 0.20196,
+      "grad_norm": 1.3297383762977133,
+      "learning_rate": 0.003,
+      "loss": 4.0059,
+      "step": 20196
+    },
+    {
+      "epoch": 0.20197,
+      "grad_norm": 1.4447513034206332,
+      "learning_rate": 0.003,
+      "loss": 4.0216,
+      "step": 20197
+    },
+    {
+      "epoch": 0.20198,
+      "grad_norm": 1.3077494918982797,
+      "learning_rate": 0.003,
+      "loss": 3.9912,
+      "step": 20198
+    },
+    {
+      "epoch": 0.20199,
+      "grad_norm": 1.4154278522839727,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 20199
+    },
+    {
+      "epoch": 0.202,
+      "grad_norm": 1.0413482944604868,
+      "learning_rate": 0.003,
+      "loss": 4.0334,
+      "step": 20200
+    },
+    {
+      "epoch": 0.20201,
+      "grad_norm": 1.4383261793649906,
+      "learning_rate": 0.003,
+      "loss": 4.0098,
+      "step": 20201
+    },
+    {
+      "epoch": 0.20202,
+      "grad_norm": 1.268487557721576,
+      "learning_rate": 0.003,
+      "loss": 4.0577,
+      "step": 20202
+    },
+    {
+      "epoch": 0.20203,
+      "grad_norm": 1.5503468949242645,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 20203
+    },
+    {
+      "epoch": 0.20204,
+      "grad_norm": 1.2890936081091573,
+      "learning_rate": 0.003,
+      "loss": 4.036,
+      "step": 20204
+    },
+    {
+      "epoch": 0.20205,
+      "grad_norm": 1.6118174908253122,
+      "learning_rate": 0.003,
+      "loss": 4.0329,
+      "step": 20205
+    },
+    {
+      "epoch": 0.20206,
+      "grad_norm": 1.0932163816214477,
+      "learning_rate": 0.003,
+      "loss": 3.9689,
+      "step": 20206
+    },
+    {
+      "epoch": 0.20207,
+      "grad_norm": 1.1876581306298424,
+      "learning_rate": 0.003,
+      "loss": 3.9991,
+      "step": 20207
+    },
+    {
+      "epoch": 0.20208,
+      "grad_norm": 1.2077265033884137,
+      "learning_rate": 0.003,
+      "loss": 3.9916,
+      "step": 20208
+    },
+    {
+      "epoch": 0.20209,
+      "grad_norm": 1.3403918921503821,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 20209
+    },
+    {
+      "epoch": 0.2021,
+      "grad_norm": 1.290795683684151,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 20210
+    },
+    {
+      "epoch": 0.20211,
+      "grad_norm": 1.299199574309394,
+      "learning_rate": 0.003,
+      "loss": 4.0416,
+      "step": 20211
+    },
+    {
+      "epoch": 0.20212,
+      "grad_norm": 1.23269592933517,
+      "learning_rate": 0.003,
+      "loss": 4.0092,
+      "step": 20212
+    },
+    {
+      "epoch": 0.20213,
+      "grad_norm": 1.5565793866193829,
+      "learning_rate": 0.003,
+      "loss": 4.008,
+      "step": 20213
+    },
+    {
+      "epoch": 0.20214,
+      "grad_norm": 1.1394728992463181,
+      "learning_rate": 0.003,
+      "loss": 3.97,
+      "step": 20214
+    },
+    {
+      "epoch": 0.20215,
+      "grad_norm": 1.2046820437826478,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 20215
+    },
+    {
+      "epoch": 0.20216,
+      "grad_norm": 1.253587573699531,
+      "learning_rate": 0.003,
+      "loss": 4.0419,
+      "step": 20216
+    },
+    {
+      "epoch": 0.20217,
+      "grad_norm": 1.4559418744607342,
+      "learning_rate": 0.003,
+      "loss": 3.9961,
+      "step": 20217
+    },
+    {
+      "epoch": 0.20218,
+      "grad_norm": 1.1905825696880952,
+      "learning_rate": 0.003,
+      "loss": 4.0093,
+      "step": 20218
+    },
+    {
+      "epoch": 0.20219,
+      "grad_norm": 1.483020268595251,
+      "learning_rate": 0.003,
+      "loss": 3.9969,
+      "step": 20219
+    },
+    {
+      "epoch": 0.2022,
+      "grad_norm": 1.1837160124556272,
+      "learning_rate": 0.003,
+      "loss": 3.976,
+      "step": 20220
+    },
+    {
+      "epoch": 0.20221,
+      "grad_norm": 1.4373127013141753,
+      "learning_rate": 0.003,
+      "loss": 4.0181,
+      "step": 20221
+    },
+    {
+      "epoch": 0.20222,
+      "grad_norm": 1.2052240361209432,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 20222
+    },
+    {
+      "epoch": 0.20223,
+      "grad_norm": 1.239448680306081,
+      "learning_rate": 0.003,
+      "loss": 4.002,
+      "step": 20223
+    },
+    {
+      "epoch": 0.20224,
+      "grad_norm": 1.1676740118569406,
+      "learning_rate": 0.003,
+      "loss": 4.0415,
+      "step": 20224
+    },
+    {
+      "epoch": 0.20225,
+      "grad_norm": 1.5894936936109902,
+      "learning_rate": 0.003,
+      "loss": 3.9946,
+      "step": 20225
+    },
+    {
+      "epoch": 0.20226,
+      "grad_norm": 1.188104726580713,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 20226
+    },
+    {
+      "epoch": 0.20227,
+      "grad_norm": 1.432307529198231,
+      "learning_rate": 0.003,
+      "loss": 3.9915,
+      "step": 20227
+    },
+    {
+      "epoch": 0.20228,
+      "grad_norm": 1.1506883972373618,
+      "learning_rate": 0.003,
+      "loss": 3.9829,
+      "step": 20228
+    },
+    {
+      "epoch": 0.20229,
+      "grad_norm": 1.6021857878693837,
+      "learning_rate": 0.003,
+      "loss": 4.0023,
+      "step": 20229
+    },
+    {
+      "epoch": 0.2023,
+      "grad_norm": 1.1979385145893622,
+      "learning_rate": 0.003,
+      "loss": 3.9968,
+      "step": 20230
+    },
+    {
+      "epoch": 0.20231,
+      "grad_norm": 1.2796081849203744,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 20231
+    },
+    {
+      "epoch": 0.20232,
+      "grad_norm": 1.2378739449003042,
+      "learning_rate": 0.003,
+      "loss": 3.9782,
+      "step": 20232
+    },
+    {
+      "epoch": 0.20233,
+      "grad_norm": 1.3167193552722471,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 20233
+    },
+    {
+      "epoch": 0.20234,
+      "grad_norm": 1.2269656161617786,
+      "learning_rate": 0.003,
+      "loss": 4.0222,
+      "step": 20234
+    },
+    {
+      "epoch": 0.20235,
+      "grad_norm": 1.1998982242267153,
+      "learning_rate": 0.003,
+      "loss": 3.9935,
+      "step": 20235
+    },
+    {
+      "epoch": 0.20236,
+      "grad_norm": 1.3573496414235882,
+      "learning_rate": 0.003,
+      "loss": 4.0072,
+      "step": 20236
+    },
+    {
+      "epoch": 0.20237,
+      "grad_norm": 1.2282065372452702,
+      "learning_rate": 0.003,
+      "loss": 4.0035,
+      "step": 20237
+    },
+    {
+      "epoch": 0.20238,
+      "grad_norm": 1.3782178627983412,
+      "learning_rate": 0.003,
+      "loss": 4.0007,
+      "step": 20238
+    },
+    {
+      "epoch": 0.20239,
+      "grad_norm": 1.1152356313993674,
+      "learning_rate": 0.003,
+      "loss": 3.994,
+      "step": 20239
+    },
+    {
+      "epoch": 0.2024,
+      "grad_norm": 1.2132838784905071,
+      "learning_rate": 0.003,
+      "loss": 3.9943,
+      "step": 20240
+    },
+    {
+      "epoch": 0.20241,
+      "grad_norm": 1.3542371912523392,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 20241
+    },
+    {
+      "epoch": 0.20242,
+      "grad_norm": 1.3079189369228095,
+      "learning_rate": 0.003,
+      "loss": 4.0244,
+      "step": 20242
+    },
+    {
+      "epoch": 0.20243,
+      "grad_norm": 1.1961243401791775,
+      "learning_rate": 0.003,
+      "loss": 3.9666,
+      "step": 20243
+    },
+    {
+      "epoch": 0.20244,
+      "grad_norm": 1.4313341587358037,
+      "learning_rate": 0.003,
+      "loss": 4.0324,
+      "step": 20244
+    },
+    {
+      "epoch": 0.20245,
+      "grad_norm": 1.2704524820960144,
+      "learning_rate": 0.003,
+      "loss": 3.9917,
+      "step": 20245
+    },
+    {
+      "epoch": 0.20246,
+      "grad_norm": 1.3037494474167584,
+      "learning_rate": 0.003,
+      "loss": 3.9961,
+      "step": 20246
+    },
+    {
+      "epoch": 0.20247,
+      "grad_norm": 1.2560248322424168,
+      "learning_rate": 0.003,
+      "loss": 4.0363,
+      "step": 20247
+    },
+    {
+      "epoch": 0.20248,
+      "grad_norm": 1.1988705173723269,
+      "learning_rate": 0.003,
+      "loss": 3.9916,
+      "step": 20248
+    },
+    {
+      "epoch": 0.20249,
+      "grad_norm": 1.2617050127120686,
+      "learning_rate": 0.003,
+      "loss": 4.0151,
+      "step": 20249
+    },
+    {
+      "epoch": 0.2025,
+      "grad_norm": 1.2027070240866309,
+      "learning_rate": 0.003,
+      "loss": 3.9974,
+      "step": 20250
+    },
+    {
+      "epoch": 0.20251,
+      "grad_norm": 1.2485388543546203,
+      "learning_rate": 0.003,
+      "loss": 3.989,
+      "step": 20251
+    },
+    {
+      "epoch": 0.20252,
+      "grad_norm": 1.1196564115044871,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 20252
+    },
+    {
+      "epoch": 0.20253,
+      "grad_norm": 1.3536692154318077,
+      "learning_rate": 0.003,
+      "loss": 4.0008,
+      "step": 20253
+    },
+    {
+      "epoch": 0.20254,
+      "grad_norm": 1.2757481487301507,
+      "learning_rate": 0.003,
+      "loss": 4.0516,
+      "step": 20254
+    },
+    {
+      "epoch": 0.20255,
+      "grad_norm": 1.3087604055792272,
+      "learning_rate": 0.003,
+      "loss": 3.9971,
+      "step": 20255
+    },
+    {
+      "epoch": 0.20256,
+      "grad_norm": 1.2702260923996773,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 20256
+    },
+    {
+      "epoch": 0.20257,
+      "grad_norm": 1.5783167815565562,
+      "learning_rate": 0.003,
+      "loss": 3.9925,
+      "step": 20257
+    },
+    {
+      "epoch": 0.20258,
+      "grad_norm": 1.1697443585360252,
+      "learning_rate": 0.003,
+      "loss": 3.9689,
+      "step": 20258
+    },
+    {
+      "epoch": 0.20259,
+      "grad_norm": 1.5605518636585392,
+      "learning_rate": 0.003,
+      "loss": 4.0186,
+      "step": 20259
+    },
+    {
+      "epoch": 0.2026,
+      "grad_norm": 1.085020225876315,
+      "learning_rate": 0.003,
+      "loss": 4.0082,
+      "step": 20260
+    },
+    {
+      "epoch": 0.20261,
+      "grad_norm": 1.3576484248038343,
+      "learning_rate": 0.003,
+      "loss": 4.0388,
+      "step": 20261
+    },
+    {
+      "epoch": 0.20262,
+      "grad_norm": 1.1881881509307515,
+      "learning_rate": 0.003,
+      "loss": 4.0104,
+      "step": 20262
+    },
+    {
+      "epoch": 0.20263,
+      "grad_norm": 1.2971654316092998,
+      "learning_rate": 0.003,
+      "loss": 4.0074,
+      "step": 20263
+    },
+    {
+      "epoch": 0.20264,
+      "grad_norm": 1.3158425439657275,
+      "learning_rate": 0.003,
+      "loss": 4.0229,
+      "step": 20264
+    },
+    {
+      "epoch": 0.20265,
+      "grad_norm": 1.082100710781211,
+      "learning_rate": 0.003,
+      "loss": 3.9768,
+      "step": 20265
+    },
+    {
+      "epoch": 0.20266,
+      "grad_norm": 1.5878331474512877,
+      "learning_rate": 0.003,
+      "loss": 3.9986,
+      "step": 20266
+    },
+    {
+      "epoch": 0.20267,
+      "grad_norm": 1.185043746435416,
+      "learning_rate": 0.003,
+      "loss": 4.0109,
+      "step": 20267
+    },
+    {
+      "epoch": 0.20268,
+      "grad_norm": 1.3374171610687906,
+      "learning_rate": 0.003,
+      "loss": 4.0257,
+      "step": 20268
+    },
+    {
+      "epoch": 0.20269,
+      "grad_norm": 1.0569770290809042,
+      "learning_rate": 0.003,
+      "loss": 4.0021,
+      "step": 20269
+    },
+    {
+      "epoch": 0.2027,
+      "grad_norm": 1.4211386909396528,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 20270
+    },
+    {
+      "epoch": 0.20271,
+      "grad_norm": 1.380398469753099,
+      "learning_rate": 0.003,
+      "loss": 4.034,
+      "step": 20271
+    },
+    {
+      "epoch": 0.20272,
+      "grad_norm": 1.2613656323029032,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 20272
+    },
+    {
+      "epoch": 0.20273,
+      "grad_norm": 1.58425620799395,
+      "learning_rate": 0.003,
+      "loss": 4.0008,
+      "step": 20273
+    },
+    {
+      "epoch": 0.20274,
+      "grad_norm": 1.0041151675612472,
+      "learning_rate": 0.003,
+      "loss": 4.0278,
+      "step": 20274
+    },
+    {
+      "epoch": 0.20275,
+      "grad_norm": 1.4728703225784694,
+      "learning_rate": 0.003,
+      "loss": 4.0105,
+      "step": 20275
+    },
+    {
+      "epoch": 0.20276,
+      "grad_norm": 1.0492474335040847,
+      "learning_rate": 0.003,
+      "loss": 4.0181,
+      "step": 20276
+    },
+    {
+      "epoch": 0.20277,
+      "grad_norm": 1.4959742596402308,
+      "learning_rate": 0.003,
+      "loss": 4.0393,
+      "step": 20277
+    },
+    {
+      "epoch": 0.20278,
+      "grad_norm": 1.3857891714823416,
+      "learning_rate": 0.003,
+      "loss": 3.9915,
+      "step": 20278
+    },
+    {
+      "epoch": 0.20279,
+      "grad_norm": 1.3188951749936073,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 20279
+    },
+    {
+      "epoch": 0.2028,
+      "grad_norm": 1.3213419081138378,
+      "learning_rate": 0.003,
+      "loss": 3.9746,
+      "step": 20280
+    },
+    {
+      "epoch": 0.20281,
+      "grad_norm": 1.2136789329059783,
+      "learning_rate": 0.003,
+      "loss": 3.9909,
+      "step": 20281
+    },
+    {
+      "epoch": 0.20282,
+      "grad_norm": 1.4007399486198164,
+      "learning_rate": 0.003,
+      "loss": 4.0113,
+      "step": 20282
+    },
+    {
+      "epoch": 0.20283,
+      "grad_norm": 1.2375760425253393,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 20283
+    },
+    {
+      "epoch": 0.20284,
+      "grad_norm": 1.4128467117262387,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 20284
+    },
+    {
+      "epoch": 0.20285,
+      "grad_norm": 1.0545780614995544,
+      "learning_rate": 0.003,
+      "loss": 4.0006,
+      "step": 20285
+    },
+    {
+      "epoch": 0.20286,
+      "grad_norm": 1.2774852180254666,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 20286
+    },
+    {
+      "epoch": 0.20287,
+      "grad_norm": 1.22659554413782,
+      "learning_rate": 0.003,
+      "loss": 3.9818,
+      "step": 20287
+    },
+    {
+      "epoch": 0.20288,
+      "grad_norm": 1.2571524748307277,
+      "learning_rate": 0.003,
+      "loss": 4.0085,
+      "step": 20288
+    },
+    {
+      "epoch": 0.20289,
+      "grad_norm": 1.3115559107561467,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 20289
+    },
+    {
+      "epoch": 0.2029,
+      "grad_norm": 1.204490346438109,
+      "learning_rate": 0.003,
+      "loss": 4.0238,
+      "step": 20290
+    },
+    {
+      "epoch": 0.20291,
+      "grad_norm": 1.3761764888272403,
+      "learning_rate": 0.003,
+      "loss": 3.9929,
+      "step": 20291
+    },
+    {
+      "epoch": 0.20292,
+      "grad_norm": 1.2688526439417949,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 20292
+    },
+    {
+      "epoch": 0.20293,
+      "grad_norm": 1.2054148313170274,
+      "learning_rate": 0.003,
+      "loss": 4.0181,
+      "step": 20293
+    },
+    {
+      "epoch": 0.20294,
+      "grad_norm": 1.33196114027554,
+      "learning_rate": 0.003,
+      "loss": 3.9868,
+      "step": 20294
+    },
+    {
+      "epoch": 0.20295,
+      "grad_norm": 1.2346252883165996,
+      "learning_rate": 0.003,
+      "loss": 4.0164,
+      "step": 20295
+    },
+    {
+      "epoch": 0.20296,
+      "grad_norm": 1.4734618197484464,
+      "learning_rate": 0.003,
+      "loss": 3.9929,
+      "step": 20296
+    },
+    {
+      "epoch": 0.20297,
+      "grad_norm": 1.1111987448681402,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 20297
+    },
+    {
+      "epoch": 0.20298,
+      "grad_norm": 1.1061908155011686,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 20298
+    },
+    {
+      "epoch": 0.20299,
+      "grad_norm": 1.3725650724168703,
+      "learning_rate": 0.003,
+      "loss": 4.0025,
+      "step": 20299
+    },
+    {
+      "epoch": 0.203,
+      "grad_norm": 1.4686507083639422,
+      "learning_rate": 0.003,
+      "loss": 4.0263,
+      "step": 20300
+    },
+    {
+      "epoch": 0.20301,
+      "grad_norm": 1.311929867892505,
+      "learning_rate": 0.003,
+      "loss": 4.0032,
+      "step": 20301
+    },
+    {
+      "epoch": 0.20302,
+      "grad_norm": 1.2465306464884274,
+      "learning_rate": 0.003,
+      "loss": 4.0162,
+      "step": 20302
+    },
+    {
+      "epoch": 0.20303,
+      "grad_norm": 1.3766970797515607,
+      "learning_rate": 0.003,
+      "loss": 4.025,
+      "step": 20303
+    },
+    {
+      "epoch": 0.20304,
+      "grad_norm": 1.2067539414401074,
+      "learning_rate": 0.003,
+      "loss": 3.994,
+      "step": 20304
+    },
+    {
+      "epoch": 0.20305,
+      "grad_norm": 1.1205991706530403,
+      "learning_rate": 0.003,
+      "loss": 4.021,
+      "step": 20305
+    },
+    {
+      "epoch": 0.20306,
+      "grad_norm": 1.3473746947655052,
+      "learning_rate": 0.003,
+      "loss": 3.9847,
+      "step": 20306
+    },
+    {
+      "epoch": 0.20307,
+      "grad_norm": 1.0050786521679067,
+      "learning_rate": 0.003,
+      "loss": 4.0088,
+      "step": 20307
+    },
+    {
+      "epoch": 0.20308,
+      "grad_norm": 1.5771708800047481,
+      "learning_rate": 0.003,
+      "loss": 4.0116,
+      "step": 20308
+    },
+    {
+      "epoch": 0.20309,
+      "grad_norm": 1.1222002278931142,
+      "learning_rate": 0.003,
+      "loss": 4.0021,
+      "step": 20309
+    },
+    {
+      "epoch": 0.2031,
+      "grad_norm": 1.58717189855105,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 20310
+    },
+    {
+      "epoch": 0.20311,
+      "grad_norm": 1.1010743064375048,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 20311
+    },
+    {
+      "epoch": 0.20312,
+      "grad_norm": 1.4719661864251636,
+      "learning_rate": 0.003,
+      "loss": 3.9967,
+      "step": 20312
+    },
+    {
+      "epoch": 0.20313,
+      "grad_norm": 1.1210987798148684,
+      "learning_rate": 0.003,
+      "loss": 4.0228,
+      "step": 20313
+    },
+    {
+      "epoch": 0.20314,
+      "grad_norm": 1.2614937931604777,
+      "learning_rate": 0.003,
+      "loss": 4.0225,
+      "step": 20314
+    },
+    {
+      "epoch": 0.20315,
+      "grad_norm": 1.3978311219468802,
+      "learning_rate": 0.003,
+      "loss": 4.0284,
+      "step": 20315
+    },
+    {
+      "epoch": 0.20316,
+      "grad_norm": 1.2264249194484476,
+      "learning_rate": 0.003,
+      "loss": 3.9999,
+      "step": 20316
+    },
+    {
+      "epoch": 0.20317,
+      "grad_norm": 1.2595157772698269,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 20317
+    },
+    {
+      "epoch": 0.20318,
+      "grad_norm": 1.348121275472313,
+      "learning_rate": 0.003,
+      "loss": 4.0129,
+      "step": 20318
+    },
+    {
+      "epoch": 0.20319,
+      "grad_norm": 1.2066827119276617,
+      "learning_rate": 0.003,
+      "loss": 4.0172,
+      "step": 20319
+    },
+    {
+      "epoch": 0.2032,
+      "grad_norm": 1.349925078091736,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 20320
+    },
+    {
+      "epoch": 0.20321,
+      "grad_norm": 1.2298683880325787,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 20321
+    },
+    {
+      "epoch": 0.20322,
+      "grad_norm": 1.1907032866789202,
+      "learning_rate": 0.003,
+      "loss": 4.007,
+      "step": 20322
+    },
+    {
+      "epoch": 0.20323,
+      "grad_norm": 1.2956137255808344,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 20323
+    },
+    {
+      "epoch": 0.20324,
+      "grad_norm": 1.311864433855721,
+      "learning_rate": 0.003,
+      "loss": 4.0218,
+      "step": 20324
+    },
+    {
+      "epoch": 0.20325,
+      "grad_norm": 1.2000243918958653,
+      "learning_rate": 0.003,
+      "loss": 4.0,
+      "step": 20325
+    },
+    {
+      "epoch": 0.20326,
+      "grad_norm": 1.319566270131484,
+      "learning_rate": 0.003,
+      "loss": 3.9664,
+      "step": 20326
+    },
+    {
+      "epoch": 0.20327,
+      "grad_norm": 1.2385489398116172,
+      "learning_rate": 0.003,
+      "loss": 4.0144,
+      "step": 20327
+    },
+    {
+      "epoch": 0.20328,
+      "grad_norm": 1.7402875384851648,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 20328
+    },
+    {
+      "epoch": 0.20329,
+      "grad_norm": 1.2492891000778201,
+      "learning_rate": 0.003,
+      "loss": 3.9855,
+      "step": 20329
+    },
+    {
+      "epoch": 0.2033,
+      "grad_norm": 1.3983382796279817,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 20330
+    },
+    {
+      "epoch": 0.20331,
+      "grad_norm": 1.2690542054828082,
+      "learning_rate": 0.003,
+      "loss": 4.0016,
+      "step": 20331
+    },
+    {
+      "epoch": 0.20332,
+      "grad_norm": 1.2491676004712835,
+      "learning_rate": 0.003,
+      "loss": 3.997,
+      "step": 20332
+    },
+    {
+      "epoch": 0.20333,
+      "grad_norm": 1.300337776590903,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 20333
+    },
+    {
+      "epoch": 0.20334,
+      "grad_norm": 1.1509550530041472,
+      "learning_rate": 0.003,
+      "loss": 3.9953,
+      "step": 20334
+    },
+    {
+      "epoch": 0.20335,
+      "grad_norm": 1.2354439082953654,
+      "learning_rate": 0.003,
+      "loss": 3.9735,
+      "step": 20335
+    },
+    {
+      "epoch": 0.20336,
+      "grad_norm": 1.2748845950178649,
+      "learning_rate": 0.003,
+      "loss": 4.0019,
+      "step": 20336
+    },
+    {
+      "epoch": 0.20337,
+      "grad_norm": 1.0392136880108995,
+      "learning_rate": 0.003,
+      "loss": 3.9945,
+      "step": 20337
+    },
+    {
+      "epoch": 0.20338,
+      "grad_norm": 1.435576618462437,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 20338
+    },
+    {
+      "epoch": 0.20339,
+      "grad_norm": 1.0032635361648778,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 20339
+    },
+    {
+      "epoch": 0.2034,
+      "grad_norm": 1.4168747456382615,
+      "learning_rate": 0.003,
+      "loss": 4.0421,
+      "step": 20340
+    },
+    {
+      "epoch": 0.20341,
+      "grad_norm": 1.1994839027062312,
+      "learning_rate": 0.003,
+      "loss": 3.9926,
+      "step": 20341
+    },
+    {
+      "epoch": 0.20342,
+      "grad_norm": 1.4958144371532647,
+      "learning_rate": 0.003,
+      "loss": 3.9922,
+      "step": 20342
+    },
+    {
+      "epoch": 0.20343,
+      "grad_norm": 1.2129980120860568,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 20343
+    },
+    {
+      "epoch": 0.20344,
+      "grad_norm": 1.3674163352739366,
+      "learning_rate": 0.003,
+      "loss": 4.0059,
+      "step": 20344
+    },
+    {
+      "epoch": 0.20345,
+      "grad_norm": 1.5189368288173968,
+      "learning_rate": 0.003,
+      "loss": 4.0086,
+      "step": 20345
+    },
+    {
+      "epoch": 0.20346,
+      "grad_norm": 1.1740054256478134,
+      "learning_rate": 0.003,
+      "loss": 3.9733,
+      "step": 20346
+    },
+    {
+      "epoch": 0.20347,
+      "grad_norm": 1.506462112719063,
+      "learning_rate": 0.003,
+      "loss": 4.0295,
+      "step": 20347
+    },
+    {
+      "epoch": 0.20348,
+      "grad_norm": 1.1086995123193881,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 20348
+    },
+    {
+      "epoch": 0.20349,
+      "grad_norm": 1.5094925324332276,
+      "learning_rate": 0.003,
+      "loss": 4.0222,
+      "step": 20349
+    },
+    {
+      "epoch": 0.2035,
+      "grad_norm": 1.0771995990686078,
+      "learning_rate": 0.003,
+      "loss": 4.0149,
+      "step": 20350
+    },
+    {
+      "epoch": 0.20351,
+      "grad_norm": 1.6950364503780027,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 20351
+    },
+    {
+      "epoch": 0.20352,
+      "grad_norm": 1.0174841625125581,
+      "learning_rate": 0.003,
+      "loss": 4.0105,
+      "step": 20352
+    },
+    {
+      "epoch": 0.20353,
+      "grad_norm": 1.3580540148388978,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 20353
+    },
+    {
+      "epoch": 0.20354,
+      "grad_norm": 1.2465835232100113,
+      "learning_rate": 0.003,
+      "loss": 3.9968,
+      "step": 20354
+    },
+    {
+      "epoch": 0.20355,
+      "grad_norm": 1.349436996143901,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 20355
+    },
+    {
+      "epoch": 0.20356,
+      "grad_norm": 1.1724588765158126,
+      "learning_rate": 0.003,
+      "loss": 3.9935,
+      "step": 20356
+    },
+    {
+      "epoch": 0.20357,
+      "grad_norm": 1.256484708783395,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 20357
+    },
+    {
+      "epoch": 0.20358,
+      "grad_norm": 1.239249040314291,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 20358
+    },
+    {
+      "epoch": 0.20359,
+      "grad_norm": 1.3694637971205457,
+      "learning_rate": 0.003,
+      "loss": 3.9977,
+      "step": 20359
+    },
+    {
+      "epoch": 0.2036,
+      "grad_norm": 1.5179212255135088,
+      "learning_rate": 0.003,
+      "loss": 4.0115,
+      "step": 20360
+    },
+    {
+      "epoch": 0.20361,
+      "grad_norm": 1.2100335206389845,
+      "learning_rate": 0.003,
+      "loss": 3.9989,
+      "step": 20361
+    },
+    {
+      "epoch": 0.20362,
+      "grad_norm": 1.3114906979666825,
+      "learning_rate": 0.003,
+      "loss": 4.0077,
+      "step": 20362
+    },
+    {
+      "epoch": 0.20363,
+      "grad_norm": 1.2595435929020353,
+      "learning_rate": 0.003,
+      "loss": 3.9698,
+      "step": 20363
+    },
+    {
+      "epoch": 0.20364,
+      "grad_norm": 1.249281565321927,
+      "learning_rate": 0.003,
+      "loss": 4.007,
+      "step": 20364
+    },
+    {
+      "epoch": 0.20365,
+      "grad_norm": 1.193766439501634,
+      "learning_rate": 0.003,
+      "loss": 4.0017,
+      "step": 20365
+    },
+    {
+      "epoch": 0.20366,
+      "grad_norm": 1.6214554865486757,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 20366
+    },
+    {
+      "epoch": 0.20367,
+      "grad_norm": 0.9978285168413173,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 20367
+    },
+    {
+      "epoch": 0.20368,
+      "grad_norm": 1.272986813494103,
+      "learning_rate": 0.003,
+      "loss": 3.9958,
+      "step": 20368
+    },
+    {
+      "epoch": 0.20369,
+      "grad_norm": 1.1673062224197925,
+      "learning_rate": 0.003,
+      "loss": 4.0331,
+      "step": 20369
+    },
+    {
+      "epoch": 0.2037,
+      "grad_norm": 1.4150841339577258,
+      "learning_rate": 0.003,
+      "loss": 4.0377,
+      "step": 20370
+    },
+    {
+      "epoch": 0.20371,
+      "grad_norm": 1.1221728601775236,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 20371
+    },
+    {
+      "epoch": 0.20372,
+      "grad_norm": 1.4249595263589507,
+      "learning_rate": 0.003,
+      "loss": 3.9923,
+      "step": 20372
+    },
+    {
+      "epoch": 0.20373,
+      "grad_norm": 1.2627194119411804,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 20373
+    },
+    {
+      "epoch": 0.20374,
+      "grad_norm": 1.4703928646398432,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 20374
+    },
+    {
+      "epoch": 0.20375,
+      "grad_norm": 1.1086741663890378,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 20375
+    },
+    {
+      "epoch": 0.20376,
+      "grad_norm": 1.4777891980567714,
+      "learning_rate": 0.003,
+      "loss": 4.0088,
+      "step": 20376
+    },
+    {
+      "epoch": 0.20377,
+      "grad_norm": 1.307666498511952,
+      "learning_rate": 0.003,
+      "loss": 4.0331,
+      "step": 20377
+    },
+    {
+      "epoch": 0.20378,
+      "grad_norm": 1.270419756620071,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 20378
+    },
+    {
+      "epoch": 0.20379,
+      "grad_norm": 1.2940595355574276,
+      "learning_rate": 0.003,
+      "loss": 4.0386,
+      "step": 20379
+    },
+    {
+      "epoch": 0.2038,
+      "grad_norm": 1.3719520445103808,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 20380
+    },
+    {
+      "epoch": 0.20381,
+      "grad_norm": 1.3550286427736966,
+      "learning_rate": 0.003,
+      "loss": 3.9763,
+      "step": 20381
+    },
+    {
+      "epoch": 0.20382,
+      "grad_norm": 1.2719617029079637,
+      "learning_rate": 0.003,
+      "loss": 3.9913,
+      "step": 20382
+    },
+    {
+      "epoch": 0.20383,
+      "grad_norm": 1.4371074919695377,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 20383
+    },
+    {
+      "epoch": 0.20384,
+      "grad_norm": 1.1059137253501337,
+      "learning_rate": 0.003,
+      "loss": 4.0348,
+      "step": 20384
+    },
+    {
+      "epoch": 0.20385,
+      "grad_norm": 1.463132360637437,
+      "learning_rate": 0.003,
+      "loss": 4.0177,
+      "step": 20385
+    },
+    {
+      "epoch": 0.20386,
+      "grad_norm": 1.1249567812697288,
+      "learning_rate": 0.003,
+      "loss": 4.0036,
+      "step": 20386
+    },
+    {
+      "epoch": 0.20387,
+      "grad_norm": 1.3454617764373504,
+      "learning_rate": 0.003,
+      "loss": 4.0188,
+      "step": 20387
+    },
+    {
+      "epoch": 0.20388,
+      "grad_norm": 1.0242291647934376,
+      "learning_rate": 0.003,
+      "loss": 3.9892,
+      "step": 20388
+    },
+    {
+      "epoch": 0.20389,
+      "grad_norm": 1.5980101580086403,
+      "learning_rate": 0.003,
+      "loss": 3.9934,
+      "step": 20389
+    },
+    {
+      "epoch": 0.2039,
+      "grad_norm": 1.1519951887381599,
+      "learning_rate": 0.003,
+      "loss": 4.0319,
+      "step": 20390
+    },
+    {
+      "epoch": 0.20391,
+      "grad_norm": 1.3341951637510467,
+      "learning_rate": 0.003,
+      "loss": 4.0257,
+      "step": 20391
+    },
+    {
+      "epoch": 0.20392,
+      "grad_norm": 1.171678602916609,
+      "learning_rate": 0.003,
+      "loss": 4.0376,
+      "step": 20392
+    },
+    {
+      "epoch": 0.20393,
+      "grad_norm": 1.3382638436681917,
+      "learning_rate": 0.003,
+      "loss": 4.0254,
+      "step": 20393
+    },
+    {
+      "epoch": 0.20394,
+      "grad_norm": 1.071362031614952,
+      "learning_rate": 0.003,
+      "loss": 4.0061,
+      "step": 20394
+    },
+    {
+      "epoch": 0.20395,
+      "grad_norm": 1.4081144402118346,
+      "learning_rate": 0.003,
+      "loss": 4.0331,
+      "step": 20395
+    },
+    {
+      "epoch": 0.20396,
+      "grad_norm": 1.315394098695792,
+      "learning_rate": 0.003,
+      "loss": 3.9805,
+      "step": 20396
+    },
+    {
+      "epoch": 0.20397,
+      "grad_norm": 1.3713805429562576,
+      "learning_rate": 0.003,
+      "loss": 4.0362,
+      "step": 20397
+    },
+    {
+      "epoch": 0.20398,
+      "grad_norm": 1.1772341755348297,
+      "learning_rate": 0.003,
+      "loss": 4.0222,
+      "step": 20398
+    },
+    {
+      "epoch": 0.20399,
+      "grad_norm": 1.4554876783263795,
+      "learning_rate": 0.003,
+      "loss": 4.0174,
+      "step": 20399
+    },
+    {
+      "epoch": 0.204,
+      "grad_norm": 1.1787335540120323,
+      "learning_rate": 0.003,
+      "loss": 4.039,
+      "step": 20400
+    },
+    {
+      "epoch": 0.20401,
+      "grad_norm": 1.2396339898475883,
+      "learning_rate": 0.003,
+      "loss": 4.0046,
+      "step": 20401
+    },
+    {
+      "epoch": 0.20402,
+      "grad_norm": 1.1748562227798123,
+      "learning_rate": 0.003,
+      "loss": 4.003,
+      "step": 20402
+    },
+    {
+      "epoch": 0.20403,
+      "grad_norm": 1.2576506215414536,
+      "learning_rate": 0.003,
+      "loss": 3.992,
+      "step": 20403
+    },
+    {
+      "epoch": 0.20404,
+      "grad_norm": 1.349130787830397,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 20404
+    },
+    {
+      "epoch": 0.20405,
+      "grad_norm": 1.0989732487986719,
+      "learning_rate": 0.003,
+      "loss": 3.9888,
+      "step": 20405
+    },
+    {
+      "epoch": 0.20406,
+      "grad_norm": 1.3685258926173318,
+      "learning_rate": 0.003,
+      "loss": 4.0126,
+      "step": 20406
+    },
+    {
+      "epoch": 0.20407,
+      "grad_norm": 1.1433116123180145,
+      "learning_rate": 0.003,
+      "loss": 4.0239,
+      "step": 20407
+    },
+    {
+      "epoch": 0.20408,
+      "grad_norm": 1.2906198313755357,
+      "learning_rate": 0.003,
+      "loss": 4.012,
+      "step": 20408
+    },
+    {
+      "epoch": 0.20409,
+      "grad_norm": 1.3826855036176626,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 20409
+    },
+    {
+      "epoch": 0.2041,
+      "grad_norm": 1.345009129641235,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 20410
+    },
+    {
+      "epoch": 0.20411,
+      "grad_norm": 1.2059078273944797,
+      "learning_rate": 0.003,
+      "loss": 3.9857,
+      "step": 20411
+    },
+    {
+      "epoch": 0.20412,
+      "grad_norm": 1.443846880230482,
+      "learning_rate": 0.003,
+      "loss": 3.9805,
+      "step": 20412
+    },
+    {
+      "epoch": 0.20413,
+      "grad_norm": 1.0714659998400948,
+      "learning_rate": 0.003,
+      "loss": 4.0036,
+      "step": 20413
+    },
+    {
+      "epoch": 0.20414,
+      "grad_norm": 1.717200174768339,
+      "learning_rate": 0.003,
+      "loss": 4.0039,
+      "step": 20414
+    },
+    {
+      "epoch": 0.20415,
+      "grad_norm": 1.0817003879923672,
+      "learning_rate": 0.003,
+      "loss": 3.9866,
+      "step": 20415
+    },
+    {
+      "epoch": 0.20416,
+      "grad_norm": 1.5647546641462835,
+      "learning_rate": 0.003,
+      "loss": 4.0392,
+      "step": 20416
+    },
+    {
+      "epoch": 0.20417,
+      "grad_norm": 1.1839582528126777,
+      "learning_rate": 0.003,
+      "loss": 4.0074,
+      "step": 20417
+    },
+    {
+      "epoch": 0.20418,
+      "grad_norm": 1.2565920426717236,
+      "learning_rate": 0.003,
+      "loss": 3.9996,
+      "step": 20418
+    },
+    {
+      "epoch": 0.20419,
+      "grad_norm": 1.209842104848665,
+      "learning_rate": 0.003,
+      "loss": 4.0126,
+      "step": 20419
+    },
+    {
+      "epoch": 0.2042,
+      "grad_norm": 1.2761959568325196,
+      "learning_rate": 0.003,
+      "loss": 3.9966,
+      "step": 20420
+    },
+    {
+      "epoch": 0.20421,
+      "grad_norm": 1.437332262772935,
+      "learning_rate": 0.003,
+      "loss": 4.0155,
+      "step": 20421
+    },
+    {
+      "epoch": 0.20422,
+      "grad_norm": 1.1289685166944816,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 20422
+    },
+    {
+      "epoch": 0.20423,
+      "grad_norm": 1.6683403434908055,
+      "learning_rate": 0.003,
+      "loss": 4.006,
+      "step": 20423
+    },
+    {
+      "epoch": 0.20424,
+      "grad_norm": 1.011401007418477,
+      "learning_rate": 0.003,
+      "loss": 4.0003,
+      "step": 20424
+    },
+    {
+      "epoch": 0.20425,
+      "grad_norm": 1.636163206529611,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 20425
+    },
+    {
+      "epoch": 0.20426,
+      "grad_norm": 1.1235362794582984,
+      "learning_rate": 0.003,
+      "loss": 4.021,
+      "step": 20426
+    },
+    {
+      "epoch": 0.20427,
+      "grad_norm": 1.3751719462148506,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 20427
+    },
+    {
+      "epoch": 0.20428,
+      "grad_norm": 1.278148454238994,
+      "learning_rate": 0.003,
+      "loss": 4.0279,
+      "step": 20428
+    },
+    {
+      "epoch": 0.20429,
+      "grad_norm": 1.4640440816171707,
+      "learning_rate": 0.003,
+      "loss": 3.9872,
+      "step": 20429
+    },
+    {
+      "epoch": 0.2043,
+      "grad_norm": 1.267962245071738,
+      "learning_rate": 0.003,
+      "loss": 4.0435,
+      "step": 20430
+    },
+    {
+      "epoch": 0.20431,
+      "grad_norm": 1.4487367083307598,
+      "learning_rate": 0.003,
+      "loss": 4.0139,
+      "step": 20431
+    },
+    {
+      "epoch": 0.20432,
+      "grad_norm": 1.0832277310407208,
+      "learning_rate": 0.003,
+      "loss": 4.0069,
+      "step": 20432
+    },
+    {
+      "epoch": 0.20433,
+      "grad_norm": 1.4954859693244373,
+      "learning_rate": 0.003,
+      "loss": 4.0142,
+      "step": 20433
+    },
+    {
+      "epoch": 0.20434,
+      "grad_norm": 1.3055492118407634,
+      "learning_rate": 0.003,
+      "loss": 4.0282,
+      "step": 20434
+    },
+    {
+      "epoch": 0.20435,
+      "grad_norm": 1.3318250615225313,
+      "learning_rate": 0.003,
+      "loss": 4.0246,
+      "step": 20435
+    },
+    {
+      "epoch": 0.20436,
+      "grad_norm": 1.2545448209890644,
+      "learning_rate": 0.003,
+      "loss": 3.9992,
+      "step": 20436
+    },
+    {
+      "epoch": 0.20437,
+      "grad_norm": 1.3607302696820325,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 20437
+    },
+    {
+      "epoch": 0.20438,
+      "grad_norm": 1.3634086582500857,
+      "learning_rate": 0.003,
+      "loss": 3.9921,
+      "step": 20438
+    },
+    {
+      "epoch": 0.20439,
+      "grad_norm": 1.2268248002932576,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 20439
+    },
+    {
+      "epoch": 0.2044,
+      "grad_norm": 1.2174080720851592,
+      "learning_rate": 0.003,
+      "loss": 3.9994,
+      "step": 20440
+    },
+    {
+      "epoch": 0.20441,
+      "grad_norm": 1.4371734678946444,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 20441
+    },
+    {
+      "epoch": 0.20442,
+      "grad_norm": 1.1237183186967938,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 20442
+    },
+    {
+      "epoch": 0.20443,
+      "grad_norm": 1.5692385820880266,
+      "learning_rate": 0.003,
+      "loss": 4.0238,
+      "step": 20443
+    },
+    {
+      "epoch": 0.20444,
+      "grad_norm": 1.0562477382228892,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 20444
+    },
+    {
+      "epoch": 0.20445,
+      "grad_norm": 1.4161033690224094,
+      "learning_rate": 0.003,
+      "loss": 3.9934,
+      "step": 20445
+    },
+    {
+      "epoch": 0.20446,
+      "grad_norm": 1.10093812404361,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 20446
+    },
+    {
+      "epoch": 0.20447,
+      "grad_norm": 1.3646182883604736,
+      "learning_rate": 0.003,
+      "loss": 4.0399,
+      "step": 20447
+    },
+    {
+      "epoch": 0.20448,
+      "grad_norm": 1.218721734363518,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 20448
+    },
+    {
+      "epoch": 0.20449,
+      "grad_norm": 1.2759893784129883,
+      "learning_rate": 0.003,
+      "loss": 3.9983,
+      "step": 20449
+    },
+    {
+      "epoch": 0.2045,
+      "grad_norm": 1.2871640711368166,
+      "learning_rate": 0.003,
+      "loss": 4.0191,
+      "step": 20450
+    },
+    {
+      "epoch": 0.20451,
+      "grad_norm": 1.1328891508989805,
+      "learning_rate": 0.003,
+      "loss": 3.9837,
+      "step": 20451
+    },
+    {
+      "epoch": 0.20452,
+      "grad_norm": 1.323593248870549,
+      "learning_rate": 0.003,
+      "loss": 3.9889,
+      "step": 20452
+    },
+    {
+      "epoch": 0.20453,
+      "grad_norm": 1.3823404967514885,
+      "learning_rate": 0.003,
+      "loss": 4.0043,
+      "step": 20453
+    },
+    {
+      "epoch": 0.20454,
+      "grad_norm": 1.1873097121479368,
+      "learning_rate": 0.003,
+      "loss": 4.0299,
+      "step": 20454
+    },
+    {
+      "epoch": 0.20455,
+      "grad_norm": 1.5752740333417847,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 20455
+    },
+    {
+      "epoch": 0.20456,
+      "grad_norm": 1.4173233047120901,
+      "learning_rate": 0.003,
+      "loss": 3.987,
+      "step": 20456
+    },
+    {
+      "epoch": 0.20457,
+      "grad_norm": 1.0202018052306596,
+      "learning_rate": 0.003,
+      "loss": 3.9887,
+      "step": 20457
+    },
+    {
+      "epoch": 0.20458,
+      "grad_norm": 1.4564651253747352,
+      "learning_rate": 0.003,
+      "loss": 3.9993,
+      "step": 20458
+    },
+    {
+      "epoch": 0.20459,
+      "grad_norm": 1.179781747035679,
+      "learning_rate": 0.003,
+      "loss": 3.993,
+      "step": 20459
+    },
+    {
+      "epoch": 0.2046,
+      "grad_norm": 1.413425678978077,
+      "learning_rate": 0.003,
+      "loss": 4.0046,
+      "step": 20460
+    },
+    {
+      "epoch": 0.20461,
+      "grad_norm": 1.2238887889743972,
+      "learning_rate": 0.003,
+      "loss": 4.0162,
+      "step": 20461
+    },
+    {
+      "epoch": 0.20462,
+      "grad_norm": 1.2112588556400181,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 20462
+    },
+    {
+      "epoch": 0.20463,
+      "grad_norm": 1.307173538461754,
+      "learning_rate": 0.003,
+      "loss": 3.9976,
+      "step": 20463
+    },
+    {
+      "epoch": 0.20464,
+      "grad_norm": 1.3423813083580993,
+      "learning_rate": 0.003,
+      "loss": 4.0402,
+      "step": 20464
+    },
+    {
+      "epoch": 0.20465,
+      "grad_norm": 1.4136149858559084,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 20465
+    },
+    {
+      "epoch": 0.20466,
+      "grad_norm": 1.1582909044152148,
+      "learning_rate": 0.003,
+      "loss": 4.0346,
+      "step": 20466
+    },
+    {
+      "epoch": 0.20467,
+      "grad_norm": 1.439848593107039,
+      "learning_rate": 0.003,
+      "loss": 4.0046,
+      "step": 20467
+    },
+    {
+      "epoch": 0.20468,
+      "grad_norm": 1.0109651289096517,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 20468
+    },
+    {
+      "epoch": 0.20469,
+      "grad_norm": 1.4419813336186957,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 20469
+    },
+    {
+      "epoch": 0.2047,
+      "grad_norm": 1.1688445164993988,
+      "learning_rate": 0.003,
+      "loss": 3.9907,
+      "step": 20470
+    },
+    {
+      "epoch": 0.20471,
+      "grad_norm": 1.2885229310012967,
+      "learning_rate": 0.003,
+      "loss": 3.9953,
+      "step": 20471
+    },
+    {
+      "epoch": 0.20472,
+      "grad_norm": 1.2452530406815392,
+      "learning_rate": 0.003,
+      "loss": 4.0196,
+      "step": 20472
+    },
+    {
+      "epoch": 0.20473,
+      "grad_norm": 1.2833320118159188,
+      "learning_rate": 0.003,
+      "loss": 4.0121,
+      "step": 20473
+    },
+    {
+      "epoch": 0.20474,
+      "grad_norm": 1.583866958885907,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 20474
+    },
+    {
+      "epoch": 0.20475,
+      "grad_norm": 0.9941341306814071,
+      "learning_rate": 0.003,
+      "loss": 4.0242,
+      "step": 20475
+    },
+    {
+      "epoch": 0.20476,
+      "grad_norm": 1.5365651920742551,
+      "learning_rate": 0.003,
+      "loss": 4.0244,
+      "step": 20476
+    },
+    {
+      "epoch": 0.20477,
+      "grad_norm": 0.9652598829705031,
+      "learning_rate": 0.003,
+      "loss": 3.9833,
+      "step": 20477
+    },
+    {
+      "epoch": 0.20478,
+      "grad_norm": 1.3759081216424855,
+      "learning_rate": 0.003,
+      "loss": 3.9992,
+      "step": 20478
+    },
+    {
+      "epoch": 0.20479,
+      "grad_norm": 1.3101232314467952,
+      "learning_rate": 0.003,
+      "loss": 4.0441,
+      "step": 20479
+    },
+    {
+      "epoch": 0.2048,
+      "grad_norm": 1.0539585629552575,
+      "learning_rate": 0.003,
+      "loss": 4.0177,
+      "step": 20480
+    },
+    {
+      "epoch": 0.20481,
+      "grad_norm": 1.3541991428277784,
+      "learning_rate": 0.003,
+      "loss": 3.9643,
+      "step": 20481
+    },
+    {
+      "epoch": 0.20482,
+      "grad_norm": 1.3449523695755412,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 20482
+    },
+    {
+      "epoch": 0.20483,
+      "grad_norm": 1.1738595670645797,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 20483
+    },
+    {
+      "epoch": 0.20484,
+      "grad_norm": 1.376693017031834,
+      "learning_rate": 0.003,
+      "loss": 4.0287,
+      "step": 20484
+    },
+    {
+      "epoch": 0.20485,
+      "grad_norm": 1.2564985368181085,
+      "learning_rate": 0.003,
+      "loss": 4.0412,
+      "step": 20485
+    },
+    {
+      "epoch": 0.20486,
+      "grad_norm": 1.3424955727366965,
+      "learning_rate": 0.003,
+      "loss": 4.0039,
+      "step": 20486
+    },
+    {
+      "epoch": 0.20487,
+      "grad_norm": 1.3579864662806096,
+      "learning_rate": 0.003,
+      "loss": 4.0234,
+      "step": 20487
+    },
+    {
+      "epoch": 0.20488,
+      "grad_norm": 1.0884526655820375,
+      "learning_rate": 0.003,
+      "loss": 4.0116,
+      "step": 20488
+    },
+    {
+      "epoch": 0.20489,
+      "grad_norm": 1.632075715263895,
+      "learning_rate": 0.003,
+      "loss": 4.0209,
+      "step": 20489
+    },
+    {
+      "epoch": 0.2049,
+      "grad_norm": 1.0796499689351091,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 20490
+    },
+    {
+      "epoch": 0.20491,
+      "grad_norm": 1.3384187872573343,
+      "learning_rate": 0.003,
+      "loss": 4.0214,
+      "step": 20491
+    },
+    {
+      "epoch": 0.20492,
+      "grad_norm": 1.2574063732160432,
+      "learning_rate": 0.003,
+      "loss": 4.0086,
+      "step": 20492
+    },
+    {
+      "epoch": 0.20493,
+      "grad_norm": 1.269768376871494,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 20493
+    },
+    {
+      "epoch": 0.20494,
+      "grad_norm": 1.3966390008879743,
+      "learning_rate": 0.003,
+      "loss": 4.0142,
+      "step": 20494
+    },
+    {
+      "epoch": 0.20495,
+      "grad_norm": 1.167124198151884,
+      "learning_rate": 0.003,
+      "loss": 3.9849,
+      "step": 20495
+    },
+    {
+      "epoch": 0.20496,
+      "grad_norm": 1.5837550364400803,
+      "learning_rate": 0.003,
+      "loss": 4.0121,
+      "step": 20496
+    },
+    {
+      "epoch": 0.20497,
+      "grad_norm": 1.135556661155727,
+      "learning_rate": 0.003,
+      "loss": 4.0122,
+      "step": 20497
+    },
+    {
+      "epoch": 0.20498,
+      "grad_norm": 1.5459802109692873,
+      "learning_rate": 0.003,
+      "loss": 4.0224,
+      "step": 20498
+    },
+    {
+      "epoch": 0.20499,
+      "grad_norm": 1.100762358773126,
+      "learning_rate": 0.003,
+      "loss": 4.0381,
+      "step": 20499
+    },
+    {
+      "epoch": 0.205,
+      "grad_norm": 1.3789655931433107,
+      "learning_rate": 0.003,
+      "loss": 4.0255,
+      "step": 20500
+    },
+    {
+      "epoch": 0.20501,
+      "grad_norm": 1.2451891187234967,
+      "learning_rate": 0.003,
+      "loss": 4.0204,
+      "step": 20501
+    },
+    {
+      "epoch": 0.20502,
+      "grad_norm": 1.1309611233823003,
+      "learning_rate": 0.003,
+      "loss": 4.0,
+      "step": 20502
+    },
+    {
+      "epoch": 0.20503,
+      "grad_norm": 1.2604068246082096,
+      "learning_rate": 0.003,
+      "loss": 4.0077,
+      "step": 20503
+    },
+    {
+      "epoch": 0.20504,
+      "grad_norm": 1.1839926447967617,
+      "learning_rate": 0.003,
+      "loss": 4.0304,
+      "step": 20504
+    },
+    {
+      "epoch": 0.20505,
+      "grad_norm": 1.4026805005182839,
+      "learning_rate": 0.003,
+      "loss": 3.9935,
+      "step": 20505
+    },
+    {
+      "epoch": 0.20506,
+      "grad_norm": 1.2020012719076192,
+      "learning_rate": 0.003,
+      "loss": 4.0662,
+      "step": 20506
+    },
+    {
+      "epoch": 0.20507,
+      "grad_norm": 1.113617135034334,
+      "learning_rate": 0.003,
+      "loss": 4.0245,
+      "step": 20507
+    },
+    {
+      "epoch": 0.20508,
+      "grad_norm": 1.1275545486192737,
+      "learning_rate": 0.003,
+      "loss": 4.0096,
+      "step": 20508
+    },
+    {
+      "epoch": 0.20509,
+      "grad_norm": 1.1593430722890004,
+      "learning_rate": 0.003,
+      "loss": 4.0286,
+      "step": 20509
+    },
+    {
+      "epoch": 0.2051,
+      "grad_norm": 1.40481022329612,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 20510
+    },
+    {
+      "epoch": 0.20511,
+      "grad_norm": 1.2445624335163232,
+      "learning_rate": 0.003,
+      "loss": 4.0104,
+      "step": 20511
+    },
+    {
+      "epoch": 0.20512,
+      "grad_norm": 1.377467179865664,
+      "learning_rate": 0.003,
+      "loss": 3.9822,
+      "step": 20512
+    },
+    {
+      "epoch": 0.20513,
+      "grad_norm": 1.23005556537273,
+      "learning_rate": 0.003,
+      "loss": 4.028,
+      "step": 20513
+    },
+    {
+      "epoch": 0.20514,
+      "grad_norm": 1.3812950509838113,
+      "learning_rate": 0.003,
+      "loss": 4.0092,
+      "step": 20514
+    },
+    {
+      "epoch": 0.20515,
+      "grad_norm": 1.2170981103454575,
+      "learning_rate": 0.003,
+      "loss": 4.0057,
+      "step": 20515
+    },
+    {
+      "epoch": 0.20516,
+      "grad_norm": 1.3998682052656384,
+      "learning_rate": 0.003,
+      "loss": 3.9806,
+      "step": 20516
+    },
+    {
+      "epoch": 0.20517,
+      "grad_norm": 1.1960498568543985,
+      "learning_rate": 0.003,
+      "loss": 4.015,
+      "step": 20517
+    },
+    {
+      "epoch": 0.20518,
+      "grad_norm": 1.3997913391279395,
+      "learning_rate": 0.003,
+      "loss": 3.9943,
+      "step": 20518
+    },
+    {
+      "epoch": 0.20519,
+      "grad_norm": 1.1549659703499877,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 20519
+    },
+    {
+      "epoch": 0.2052,
+      "grad_norm": 1.3553112474695659,
+      "learning_rate": 0.003,
+      "loss": 4.0033,
+      "step": 20520
+    },
+    {
+      "epoch": 0.20521,
+      "grad_norm": 1.4761498854844728,
+      "learning_rate": 0.003,
+      "loss": 3.9848,
+      "step": 20521
+    },
+    {
+      "epoch": 0.20522,
+      "grad_norm": 1.2014863966468765,
+      "learning_rate": 0.003,
+      "loss": 4.0144,
+      "step": 20522
+    },
+    {
+      "epoch": 0.20523,
+      "grad_norm": 1.3065676257084342,
+      "learning_rate": 0.003,
+      "loss": 4.021,
+      "step": 20523
+    },
+    {
+      "epoch": 0.20524,
+      "grad_norm": 1.0765788551663926,
+      "learning_rate": 0.003,
+      "loss": 4.0026,
+      "step": 20524
+    },
+    {
+      "epoch": 0.20525,
+      "grad_norm": 1.39573503177698,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 20525
+    },
+    {
+      "epoch": 0.20526,
+      "grad_norm": 1.2895510254939981,
+      "learning_rate": 0.003,
+      "loss": 4.0013,
+      "step": 20526
+    },
+    {
+      "epoch": 0.20527,
+      "grad_norm": 1.283121855455,
+      "learning_rate": 0.003,
+      "loss": 4.0077,
+      "step": 20527
+    },
+    {
+      "epoch": 0.20528,
+      "grad_norm": 1.6461266020632557,
+      "learning_rate": 0.003,
+      "loss": 4.0077,
+      "step": 20528
+    },
+    {
+      "epoch": 0.20529,
+      "grad_norm": 1.070997454164778,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 20529
+    },
+    {
+      "epoch": 0.2053,
+      "grad_norm": 1.6011188993117524,
+      "learning_rate": 0.003,
+      "loss": 4.0188,
+      "step": 20530
+    },
+    {
+      "epoch": 0.20531,
+      "grad_norm": 1.1726257479000965,
+      "learning_rate": 0.003,
+      "loss": 4.0261,
+      "step": 20531
+    },
+    {
+      "epoch": 0.20532,
+      "grad_norm": 1.333611663197986,
+      "learning_rate": 0.003,
+      "loss": 3.9781,
+      "step": 20532
+    },
+    {
+      "epoch": 0.20533,
+      "grad_norm": 1.3281196087521732,
+      "learning_rate": 0.003,
+      "loss": 4.0146,
+      "step": 20533
+    },
+    {
+      "epoch": 0.20534,
+      "grad_norm": 1.275063232716974,
+      "learning_rate": 0.003,
+      "loss": 4.0153,
+      "step": 20534
+    },
+    {
+      "epoch": 0.20535,
+      "grad_norm": 1.399575421257767,
+      "learning_rate": 0.003,
+      "loss": 4.0025,
+      "step": 20535
+    },
+    {
+      "epoch": 0.20536,
+      "grad_norm": 1.4609737493111625,
+      "learning_rate": 0.003,
+      "loss": 4.005,
+      "step": 20536
+    },
+    {
+      "epoch": 0.20537,
+      "grad_norm": 1.289700499556751,
+      "learning_rate": 0.003,
+      "loss": 3.996,
+      "step": 20537
+    },
+    {
+      "epoch": 0.20538,
+      "grad_norm": 1.1532533117400263,
+      "learning_rate": 0.003,
+      "loss": 3.9851,
+      "step": 20538
+    },
+    {
+      "epoch": 0.20539,
+      "grad_norm": 1.3673726798115244,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 20539
+    },
+    {
+      "epoch": 0.2054,
+      "grad_norm": 1.0990584803222765,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 20540
+    },
+    {
+      "epoch": 0.20541,
+      "grad_norm": 1.2842097728635957,
+      "learning_rate": 0.003,
+      "loss": 3.9928,
+      "step": 20541
+    },
+    {
+      "epoch": 0.20542,
+      "grad_norm": 1.2840210767235878,
+      "learning_rate": 0.003,
+      "loss": 4.064,
+      "step": 20542
+    },
+    {
+      "epoch": 0.20543,
+      "grad_norm": 1.419649350128111,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 20543
+    },
+    {
+      "epoch": 0.20544,
+      "grad_norm": 1.2523549449045448,
+      "learning_rate": 0.003,
+      "loss": 4.0017,
+      "step": 20544
+    },
+    {
+      "epoch": 0.20545,
+      "grad_norm": 1.4074485733227524,
+      "learning_rate": 0.003,
+      "loss": 4.008,
+      "step": 20545
+    },
+    {
+      "epoch": 0.20546,
+      "grad_norm": 1.061800238651892,
+      "learning_rate": 0.003,
+      "loss": 3.9707,
+      "step": 20546
+    },
+    {
+      "epoch": 0.20547,
+      "grad_norm": 1.4625601184576438,
+      "learning_rate": 0.003,
+      "loss": 3.9984,
+      "step": 20547
+    },
+    {
+      "epoch": 0.20548,
+      "grad_norm": 1.3911019266503366,
+      "learning_rate": 0.003,
+      "loss": 4.0402,
+      "step": 20548
+    },
+    {
+      "epoch": 0.20549,
+      "grad_norm": 1.3791685530944973,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 20549
+    },
+    {
+      "epoch": 0.2055,
+      "grad_norm": 1.240972813150801,
+      "learning_rate": 0.003,
+      "loss": 4.0237,
+      "step": 20550
+    },
+    {
+      "epoch": 0.20551,
+      "grad_norm": 1.1971776526888327,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 20551
+    },
+    {
+      "epoch": 0.20552,
+      "grad_norm": 1.5060869965067512,
+      "learning_rate": 0.003,
+      "loss": 4.0078,
+      "step": 20552
+    },
+    {
+      "epoch": 0.20553,
+      "grad_norm": 1.114017667915659,
+      "learning_rate": 0.003,
+      "loss": 3.9848,
+      "step": 20553
+    },
+    {
+      "epoch": 0.20554,
+      "grad_norm": 1.4261085445517363,
+      "learning_rate": 0.003,
+      "loss": 4.0191,
+      "step": 20554
+    },
+    {
+      "epoch": 0.20555,
+      "grad_norm": 1.1786318999064316,
+      "learning_rate": 0.003,
+      "loss": 4.0277,
+      "step": 20555
+    },
+    {
+      "epoch": 0.20556,
+      "grad_norm": 1.339959236345092,
+      "learning_rate": 0.003,
+      "loss": 3.9928,
+      "step": 20556
+    },
+    {
+      "epoch": 0.20557,
+      "grad_norm": 1.1892416002023873,
+      "learning_rate": 0.003,
+      "loss": 4.0107,
+      "step": 20557
+    },
+    {
+      "epoch": 0.20558,
+      "grad_norm": 1.3826411968223087,
+      "learning_rate": 0.003,
+      "loss": 4.0361,
+      "step": 20558
+    },
+    {
+      "epoch": 0.20559,
+      "grad_norm": 1.3639245730004423,
+      "learning_rate": 0.003,
+      "loss": 4.0063,
+      "step": 20559
+    },
+    {
+      "epoch": 0.2056,
+      "grad_norm": 0.9666214138445555,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 20560
+    },
+    {
+      "epoch": 0.20561,
+      "grad_norm": 1.4847671648655332,
+      "learning_rate": 0.003,
+      "loss": 4.0149,
+      "step": 20561
+    },
+    {
+      "epoch": 0.20562,
+      "grad_norm": 1.1222047212643582,
+      "learning_rate": 0.003,
+      "loss": 4.0111,
+      "step": 20562
+    },
+    {
+      "epoch": 0.20563,
+      "grad_norm": 1.5720280325897276,
+      "learning_rate": 0.003,
+      "loss": 4.0333,
+      "step": 20563
+    },
+    {
+      "epoch": 0.20564,
+      "grad_norm": 1.256016027297345,
+      "learning_rate": 0.003,
+      "loss": 3.9993,
+      "step": 20564
+    },
+    {
+      "epoch": 0.20565,
+      "grad_norm": 1.1215394340087894,
+      "learning_rate": 0.003,
+      "loss": 3.9977,
+      "step": 20565
+    },
+    {
+      "epoch": 0.20566,
+      "grad_norm": 1.3320378335509828,
+      "learning_rate": 0.003,
+      "loss": 4.0111,
+      "step": 20566
+    },
+    {
+      "epoch": 0.20567,
+      "grad_norm": 1.1266204339000965,
+      "learning_rate": 0.003,
+      "loss": 3.9825,
+      "step": 20567
+    },
+    {
+      "epoch": 0.20568,
+      "grad_norm": 1.2947269271197213,
+      "learning_rate": 0.003,
+      "loss": 3.9856,
+      "step": 20568
+    },
+    {
+      "epoch": 0.20569,
+      "grad_norm": 1.323547470360493,
+      "learning_rate": 0.003,
+      "loss": 3.9909,
+      "step": 20569
+    },
+    {
+      "epoch": 0.2057,
+      "grad_norm": 1.3808605806119016,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 20570
+    },
+    {
+      "epoch": 0.20571,
+      "grad_norm": 1.1226757786628874,
+      "learning_rate": 0.003,
+      "loss": 4.0109,
+      "step": 20571
+    },
+    {
+      "epoch": 0.20572,
+      "grad_norm": 1.4520121655408023,
+      "learning_rate": 0.003,
+      "loss": 4.0241,
+      "step": 20572
+    },
+    {
+      "epoch": 0.20573,
+      "grad_norm": 1.2799293567575476,
+      "learning_rate": 0.003,
+      "loss": 4.0229,
+      "step": 20573
+    },
+    {
+      "epoch": 0.20574,
+      "grad_norm": 1.189555045205036,
+      "learning_rate": 0.003,
+      "loss": 4.0082,
+      "step": 20574
+    },
+    {
+      "epoch": 0.20575,
+      "grad_norm": 1.2369487733048843,
+      "learning_rate": 0.003,
+      "loss": 4.0284,
+      "step": 20575
+    },
+    {
+      "epoch": 0.20576,
+      "grad_norm": 1.2638883091347621,
+      "learning_rate": 0.003,
+      "loss": 3.9894,
+      "step": 20576
+    },
+    {
+      "epoch": 0.20577,
+      "grad_norm": 1.472620673991004,
+      "learning_rate": 0.003,
+      "loss": 3.9961,
+      "step": 20577
+    },
+    {
+      "epoch": 0.20578,
+      "grad_norm": 1.0513629693754731,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 20578
+    },
+    {
+      "epoch": 0.20579,
+      "grad_norm": 1.545249525696994,
+      "learning_rate": 0.003,
+      "loss": 4.0019,
+      "step": 20579
+    },
+    {
+      "epoch": 0.2058,
+      "grad_norm": 1.111774577991971,
+      "learning_rate": 0.003,
+      "loss": 4.0241,
+      "step": 20580
+    },
+    {
+      "epoch": 0.20581,
+      "grad_norm": 1.4511201831757192,
+      "learning_rate": 0.003,
+      "loss": 4.0043,
+      "step": 20581
+    },
+    {
+      "epoch": 0.20582,
+      "grad_norm": 1.3217257597406564,
+      "learning_rate": 0.003,
+      "loss": 4.0245,
+      "step": 20582
+    },
+    {
+      "epoch": 0.20583,
+      "grad_norm": 1.2865553777542038,
+      "learning_rate": 0.003,
+      "loss": 3.9805,
+      "step": 20583
+    },
+    {
+      "epoch": 0.20584,
+      "grad_norm": 1.2712595787222918,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 20584
+    },
+    {
+      "epoch": 0.20585,
+      "grad_norm": 1.2903223416773595,
+      "learning_rate": 0.003,
+      "loss": 4.0018,
+      "step": 20585
+    },
+    {
+      "epoch": 0.20586,
+      "grad_norm": 1.2855548438355584,
+      "learning_rate": 0.003,
+      "loss": 3.995,
+      "step": 20586
+    },
+    {
+      "epoch": 0.20587,
+      "grad_norm": 1.2206579233496302,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 20587
+    },
+    {
+      "epoch": 0.20588,
+      "grad_norm": 1.1559486033155753,
+      "learning_rate": 0.003,
+      "loss": 3.9897,
+      "step": 20588
+    },
+    {
+      "epoch": 0.20589,
+      "grad_norm": 1.3270067686982983,
+      "learning_rate": 0.003,
+      "loss": 4.0002,
+      "step": 20589
+    },
+    {
+      "epoch": 0.2059,
+      "grad_norm": 1.2063978658164853,
+      "learning_rate": 0.003,
+      "loss": 4.0216,
+      "step": 20590
+    },
+    {
+      "epoch": 0.20591,
+      "grad_norm": 1.2805182089034122,
+      "learning_rate": 0.003,
+      "loss": 3.9986,
+      "step": 20591
+    },
+    {
+      "epoch": 0.20592,
+      "grad_norm": 1.2809423865160992,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 20592
+    },
+    {
+      "epoch": 0.20593,
+      "grad_norm": 1.3974586842002943,
+      "learning_rate": 0.003,
+      "loss": 4.0058,
+      "step": 20593
+    },
+    {
+      "epoch": 0.20594,
+      "grad_norm": 1.0942478018284425,
+      "learning_rate": 0.003,
+      "loss": 4.0006,
+      "step": 20594
+    },
+    {
+      "epoch": 0.20595,
+      "grad_norm": 1.2747137112779623,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 20595
+    },
+    {
+      "epoch": 0.20596,
+      "grad_norm": 1.3819452643269272,
+      "learning_rate": 0.003,
+      "loss": 4.0267,
+      "step": 20596
+    },
+    {
+      "epoch": 0.20597,
+      "grad_norm": 1.3211829669543382,
+      "learning_rate": 0.003,
+      "loss": 3.9796,
+      "step": 20597
+    },
+    {
+      "epoch": 0.20598,
+      "grad_norm": 1.178660124641291,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 20598
+    },
+    {
+      "epoch": 0.20599,
+      "grad_norm": 1.3644000461117907,
+      "learning_rate": 0.003,
+      "loss": 4.0553,
+      "step": 20599
+    },
+    {
+      "epoch": 0.206,
+      "grad_norm": 1.217466801731437,
+      "learning_rate": 0.003,
+      "loss": 3.9904,
+      "step": 20600
+    },
+    {
+      "epoch": 0.20601,
+      "grad_norm": 1.1454527330303983,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 20601
+    },
+    {
+      "epoch": 0.20602,
+      "grad_norm": 1.3190660349181011,
+      "learning_rate": 0.003,
+      "loss": 4.0052,
+      "step": 20602
+    },
+    {
+      "epoch": 0.20603,
+      "grad_norm": 1.2555997363614386,
+      "learning_rate": 0.003,
+      "loss": 3.9864,
+      "step": 20603
+    },
+    {
+      "epoch": 0.20604,
+      "grad_norm": 1.3992701868999888,
+      "learning_rate": 0.003,
+      "loss": 4.0093,
+      "step": 20604
+    },
+    {
+      "epoch": 0.20605,
+      "grad_norm": 1.4976837652852493,
+      "learning_rate": 0.003,
+      "loss": 4.0257,
+      "step": 20605
+    },
+    {
+      "epoch": 0.20606,
+      "grad_norm": 1.1865323786360482,
+      "learning_rate": 0.003,
+      "loss": 3.9773,
+      "step": 20606
+    },
+    {
+      "epoch": 0.20607,
+      "grad_norm": 1.2809142822904975,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 20607
+    },
+    {
+      "epoch": 0.20608,
+      "grad_norm": 1.3504397720490322,
+      "learning_rate": 0.003,
+      "loss": 3.9894,
+      "step": 20608
+    },
+    {
+      "epoch": 0.20609,
+      "grad_norm": 1.0158339370513485,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 20609
+    },
+    {
+      "epoch": 0.2061,
+      "grad_norm": 1.5301956197044464,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 20610
+    },
+    {
+      "epoch": 0.20611,
+      "grad_norm": 1.1755431494787218,
+      "learning_rate": 0.003,
+      "loss": 4.0275,
+      "step": 20611
+    },
+    {
+      "epoch": 0.20612,
+      "grad_norm": 1.5389533869565877,
+      "learning_rate": 0.003,
+      "loss": 4.0302,
+      "step": 20612
+    },
+    {
+      "epoch": 0.20613,
+      "grad_norm": 1.0744611483737596,
+      "learning_rate": 0.003,
+      "loss": 3.9787,
+      "step": 20613
+    },
+    {
+      "epoch": 0.20614,
+      "grad_norm": 1.40708776055189,
+      "learning_rate": 0.003,
+      "loss": 4.0367,
+      "step": 20614
+    },
+    {
+      "epoch": 0.20615,
+      "grad_norm": 1.3747869052840511,
+      "learning_rate": 0.003,
+      "loss": 3.9758,
+      "step": 20615
+    },
+    {
+      "epoch": 0.20616,
+      "grad_norm": 1.4331851062065355,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 20616
+    },
+    {
+      "epoch": 0.20617,
+      "grad_norm": 1.3407159770644455,
+      "learning_rate": 0.003,
+      "loss": 3.9981,
+      "step": 20617
+    },
+    {
+      "epoch": 0.20618,
+      "grad_norm": 1.2768955892238656,
+      "learning_rate": 0.003,
+      "loss": 4.0128,
+      "step": 20618
+    },
+    {
+      "epoch": 0.20619,
+      "grad_norm": 1.3080299761105072,
+      "learning_rate": 0.003,
+      "loss": 4.0454,
+      "step": 20619
+    },
+    {
+      "epoch": 0.2062,
+      "grad_norm": 1.2865388103203717,
+      "learning_rate": 0.003,
+      "loss": 4.0073,
+      "step": 20620
+    },
+    {
+      "epoch": 0.20621,
+      "grad_norm": 1.3363652510846296,
+      "learning_rate": 0.003,
+      "loss": 4.021,
+      "step": 20621
+    },
+    {
+      "epoch": 0.20622,
+      "grad_norm": 1.1170030540815248,
+      "learning_rate": 0.003,
+      "loss": 3.9911,
+      "step": 20622
+    },
+    {
+      "epoch": 0.20623,
+      "grad_norm": 1.2683880378078294,
+      "learning_rate": 0.003,
+      "loss": 4.0,
+      "step": 20623
+    },
+    {
+      "epoch": 0.20624,
+      "grad_norm": 1.212849363064571,
+      "learning_rate": 0.003,
+      "loss": 3.9934,
+      "step": 20624
+    },
+    {
+      "epoch": 0.20625,
+      "grad_norm": 1.356058213868984,
+      "learning_rate": 0.003,
+      "loss": 3.9898,
+      "step": 20625
+    },
+    {
+      "epoch": 0.20626,
+      "grad_norm": 1.3459499081576194,
+      "learning_rate": 0.003,
+      "loss": 4.0381,
+      "step": 20626
+    },
+    {
+      "epoch": 0.20627,
+      "grad_norm": 1.4174675996189183,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 20627
+    },
+    {
+      "epoch": 0.20628,
+      "grad_norm": 1.176828583358703,
+      "learning_rate": 0.003,
+      "loss": 4.033,
+      "step": 20628
+    },
+    {
+      "epoch": 0.20629,
+      "grad_norm": 1.2503714517860733,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 20629
+    },
+    {
+      "epoch": 0.2063,
+      "grad_norm": 1.1528020942216353,
+      "learning_rate": 0.003,
+      "loss": 3.9774,
+      "step": 20630
+    },
+    {
+      "epoch": 0.20631,
+      "grad_norm": 1.5248490762349984,
+      "learning_rate": 0.003,
+      "loss": 4.004,
+      "step": 20631
+    },
+    {
+      "epoch": 0.20632,
+      "grad_norm": 1.295593891273598,
+      "learning_rate": 0.003,
+      "loss": 4.0068,
+      "step": 20632
+    },
+    {
+      "epoch": 0.20633,
+      "grad_norm": 1.262912319565957,
+      "learning_rate": 0.003,
+      "loss": 3.9966,
+      "step": 20633
+    },
+    {
+      "epoch": 0.20634,
+      "grad_norm": 1.3047062287083504,
+      "learning_rate": 0.003,
+      "loss": 4.0075,
+      "step": 20634
+    },
+    {
+      "epoch": 0.20635,
+      "grad_norm": 1.0937140196112127,
+      "learning_rate": 0.003,
+      "loss": 3.9863,
+      "step": 20635
+    },
+    {
+      "epoch": 0.20636,
+      "grad_norm": 1.3655807850177033,
+      "learning_rate": 0.003,
+      "loss": 4.0327,
+      "step": 20636
+    },
+    {
+      "epoch": 0.20637,
+      "grad_norm": 1.2649651522358472,
+      "learning_rate": 0.003,
+      "loss": 3.9867,
+      "step": 20637
+    },
+    {
+      "epoch": 0.20638,
+      "grad_norm": 1.4727599306393149,
+      "learning_rate": 0.003,
+      "loss": 3.9957,
+      "step": 20638
+    },
+    {
+      "epoch": 0.20639,
+      "grad_norm": 1.1188449807535996,
+      "learning_rate": 0.003,
+      "loss": 3.9846,
+      "step": 20639
+    },
+    {
+      "epoch": 0.2064,
+      "grad_norm": 1.517193017130125,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 20640
+    },
+    {
+      "epoch": 0.20641,
+      "grad_norm": 1.1615846079181251,
+      "learning_rate": 0.003,
+      "loss": 4.0036,
+      "step": 20641
+    },
+    {
+      "epoch": 0.20642,
+      "grad_norm": 1.331983615865443,
+      "learning_rate": 0.003,
+      "loss": 4.0126,
+      "step": 20642
+    },
+    {
+      "epoch": 0.20643,
+      "grad_norm": 1.1339331664069554,
+      "learning_rate": 0.003,
+      "loss": 4.0107,
+      "step": 20643
+    },
+    {
+      "epoch": 0.20644,
+      "grad_norm": 1.523861615831484,
+      "learning_rate": 0.003,
+      "loss": 4.0319,
+      "step": 20644
+    },
+    {
+      "epoch": 0.20645,
+      "grad_norm": 1.0967301726398142,
+      "learning_rate": 0.003,
+      "loss": 4.0261,
+      "step": 20645
+    },
+    {
+      "epoch": 0.20646,
+      "grad_norm": 1.2546083406177109,
+      "learning_rate": 0.003,
+      "loss": 4.0139,
+      "step": 20646
+    },
+    {
+      "epoch": 0.20647,
+      "grad_norm": 1.2914190996289006,
+      "learning_rate": 0.003,
+      "loss": 4.0186,
+      "step": 20647
+    },
+    {
+      "epoch": 0.20648,
+      "grad_norm": 1.2202489559769463,
+      "learning_rate": 0.003,
+      "loss": 3.9851,
+      "step": 20648
+    },
+    {
+      "epoch": 0.20649,
+      "grad_norm": 1.2446350548788678,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 20649
+    },
+    {
+      "epoch": 0.2065,
+      "grad_norm": 1.060336173915405,
+      "learning_rate": 0.003,
+      "loss": 3.9731,
+      "step": 20650
+    },
+    {
+      "epoch": 0.20651,
+      "grad_norm": 1.4534817567096714,
+      "learning_rate": 0.003,
+      "loss": 4.0177,
+      "step": 20651
+    },
+    {
+      "epoch": 0.20652,
+      "grad_norm": 1.6169219171799551,
+      "learning_rate": 0.003,
+      "loss": 3.9662,
+      "step": 20652
+    },
+    {
+      "epoch": 0.20653,
+      "grad_norm": 1.425633538149865,
+      "learning_rate": 0.003,
+      "loss": 4.0119,
+      "step": 20653
+    },
+    {
+      "epoch": 0.20654,
+      "grad_norm": 1.3653587165092793,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 20654
+    },
+    {
+      "epoch": 0.20655,
+      "grad_norm": 1.1013599802631413,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 20655
+    },
+    {
+      "epoch": 0.20656,
+      "grad_norm": 1.389759493828228,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 20656
+    },
+    {
+      "epoch": 0.20657,
+      "grad_norm": 1.1118818231305716,
+      "learning_rate": 0.003,
+      "loss": 4.0456,
+      "step": 20657
+    },
+    {
+      "epoch": 0.20658,
+      "grad_norm": 1.1976859327513487,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 20658
+    },
+    {
+      "epoch": 0.20659,
+      "grad_norm": 1.6270206809984922,
+      "learning_rate": 0.003,
+      "loss": 4.0265,
+      "step": 20659
+    },
+    {
+      "epoch": 0.2066,
+      "grad_norm": 1.3083284960876418,
+      "learning_rate": 0.003,
+      "loss": 3.9954,
+      "step": 20660
+    },
+    {
+      "epoch": 0.20661,
+      "grad_norm": 1.2424107371931825,
+      "learning_rate": 0.003,
+      "loss": 4.0364,
+      "step": 20661
+    },
+    {
+      "epoch": 0.20662,
+      "grad_norm": 1.3452500939675385,
+      "learning_rate": 0.003,
+      "loss": 4.0072,
+      "step": 20662
+    },
+    {
+      "epoch": 0.20663,
+      "grad_norm": 1.5353196441580816,
+      "learning_rate": 0.003,
+      "loss": 4.0023,
+      "step": 20663
+    },
+    {
+      "epoch": 0.20664,
+      "grad_norm": 1.1350995906876258,
+      "learning_rate": 0.003,
+      "loss": 3.9781,
+      "step": 20664
+    },
+    {
+      "epoch": 0.20665,
+      "grad_norm": 1.376225639854085,
+      "learning_rate": 0.003,
+      "loss": 4.0052,
+      "step": 20665
+    },
+    {
+      "epoch": 0.20666,
+      "grad_norm": 1.2415491933836773,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 20666
+    },
+    {
+      "epoch": 0.20667,
+      "grad_norm": 1.1787054133313857,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 20667
+    },
+    {
+      "epoch": 0.20668,
+      "grad_norm": 1.362267845670038,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 20668
+    },
+    {
+      "epoch": 0.20669,
+      "grad_norm": 1.0902815479880927,
+      "learning_rate": 0.003,
+      "loss": 3.9717,
+      "step": 20669
+    },
+    {
+      "epoch": 0.2067,
+      "grad_norm": 1.3816746211237632,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 20670
+    },
+    {
+      "epoch": 0.20671,
+      "grad_norm": 1.0922986068393714,
+      "learning_rate": 0.003,
+      "loss": 4.0033,
+      "step": 20671
+    },
+    {
+      "epoch": 0.20672,
+      "grad_norm": 1.428276902598254,
+      "learning_rate": 0.003,
+      "loss": 4.0318,
+      "step": 20672
+    },
+    {
+      "epoch": 0.20673,
+      "grad_norm": 1.233053263102435,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 20673
+    },
+    {
+      "epoch": 0.20674,
+      "grad_norm": 1.4063505898997375,
+      "learning_rate": 0.003,
+      "loss": 3.9858,
+      "step": 20674
+    },
+    {
+      "epoch": 0.20675,
+      "grad_norm": 1.3812546537852057,
+      "learning_rate": 0.003,
+      "loss": 4.0305,
+      "step": 20675
+    },
+    {
+      "epoch": 0.20676,
+      "grad_norm": 1.21906759164386,
+      "learning_rate": 0.003,
+      "loss": 3.9946,
+      "step": 20676
+    },
+    {
+      "epoch": 0.20677,
+      "grad_norm": 1.2977669940591485,
+      "learning_rate": 0.003,
+      "loss": 3.975,
+      "step": 20677
+    },
+    {
+      "epoch": 0.20678,
+      "grad_norm": 1.1407187398419785,
+      "learning_rate": 0.003,
+      "loss": 4.0042,
+      "step": 20678
+    },
+    {
+      "epoch": 0.20679,
+      "grad_norm": 1.4003206700491742,
+      "learning_rate": 0.003,
+      "loss": 4.0388,
+      "step": 20679
+    },
+    {
+      "epoch": 0.2068,
+      "grad_norm": 1.185002888761693,
+      "learning_rate": 0.003,
+      "loss": 4.0042,
+      "step": 20680
+    },
+    {
+      "epoch": 0.20681,
+      "grad_norm": 1.4307246065912882,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 20681
+    },
+    {
+      "epoch": 0.20682,
+      "grad_norm": 1.0988166617689916,
+      "learning_rate": 0.003,
+      "loss": 3.9651,
+      "step": 20682
+    },
+    {
+      "epoch": 0.20683,
+      "grad_norm": 1.3493442609773985,
+      "learning_rate": 0.003,
+      "loss": 3.9963,
+      "step": 20683
+    },
+    {
+      "epoch": 0.20684,
+      "grad_norm": 1.174629793616725,
+      "learning_rate": 0.003,
+      "loss": 4.0163,
+      "step": 20684
+    },
+    {
+      "epoch": 0.20685,
+      "grad_norm": 1.2722670210540725,
+      "learning_rate": 0.003,
+      "loss": 3.986,
+      "step": 20685
+    },
+    {
+      "epoch": 0.20686,
+      "grad_norm": 1.3432316722671938,
+      "learning_rate": 0.003,
+      "loss": 4.0191,
+      "step": 20686
+    },
+    {
+      "epoch": 0.20687,
+      "grad_norm": 1.4251511683598665,
+      "learning_rate": 0.003,
+      "loss": 3.9779,
+      "step": 20687
+    },
+    {
+      "epoch": 0.20688,
+      "grad_norm": 1.2702409179527885,
+      "learning_rate": 0.003,
+      "loss": 4.0039,
+      "step": 20688
+    },
+    {
+      "epoch": 0.20689,
+      "grad_norm": 1.3540547528204405,
+      "learning_rate": 0.003,
+      "loss": 3.9958,
+      "step": 20689
+    },
+    {
+      "epoch": 0.2069,
+      "grad_norm": 1.2373425670072522,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 20690
+    },
+    {
+      "epoch": 0.20691,
+      "grad_norm": 1.3664240434372155,
+      "learning_rate": 0.003,
+      "loss": 4.0305,
+      "step": 20691
+    },
+    {
+      "epoch": 0.20692,
+      "grad_norm": 1.191253007054927,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 20692
+    },
+    {
+      "epoch": 0.20693,
+      "grad_norm": 1.374403158257714,
+      "learning_rate": 0.003,
+      "loss": 3.9848,
+      "step": 20693
+    },
+    {
+      "epoch": 0.20694,
+      "grad_norm": 1.1771697436083528,
+      "learning_rate": 0.003,
+      "loss": 3.9954,
+      "step": 20694
+    },
+    {
+      "epoch": 0.20695,
+      "grad_norm": 1.48097177425113,
+      "learning_rate": 0.003,
+      "loss": 4.0022,
+      "step": 20695
+    },
+    {
+      "epoch": 0.20696,
+      "grad_norm": 1.0130894722928716,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 20696
+    },
+    {
+      "epoch": 0.20697,
+      "grad_norm": 1.5468434394501385,
+      "learning_rate": 0.003,
+      "loss": 4.0085,
+      "step": 20697
+    },
+    {
+      "epoch": 0.20698,
+      "grad_norm": 1.2870689064812881,
+      "learning_rate": 0.003,
+      "loss": 4.0128,
+      "step": 20698
+    },
+    {
+      "epoch": 0.20699,
+      "grad_norm": 1.5312212457141665,
+      "learning_rate": 0.003,
+      "loss": 4.0363,
+      "step": 20699
+    },
+    {
+      "epoch": 0.207,
+      "grad_norm": 1.392568501537993,
+      "learning_rate": 0.003,
+      "loss": 4.0014,
+      "step": 20700
+    },
+    {
+      "epoch": 0.20701,
+      "grad_norm": 1.1403734007521096,
+      "learning_rate": 0.003,
+      "loss": 4.02,
+      "step": 20701
+    },
+    {
+      "epoch": 0.20702,
+      "grad_norm": 1.194152404792695,
+      "learning_rate": 0.003,
+      "loss": 4.0285,
+      "step": 20702
+    },
+    {
+      "epoch": 0.20703,
+      "grad_norm": 1.3632185855689758,
+      "learning_rate": 0.003,
+      "loss": 3.995,
+      "step": 20703
+    },
+    {
+      "epoch": 0.20704,
+      "grad_norm": 1.3918710485699248,
+      "learning_rate": 0.003,
+      "loss": 4.0129,
+      "step": 20704
+    },
+    {
+      "epoch": 0.20705,
+      "grad_norm": 1.2117858382962787,
+      "learning_rate": 0.003,
+      "loss": 4.0029,
+      "step": 20705
+    },
+    {
+      "epoch": 0.20706,
+      "grad_norm": 1.4534391333265622,
+      "learning_rate": 0.003,
+      "loss": 3.9978,
+      "step": 20706
+    },
+    {
+      "epoch": 0.20707,
+      "grad_norm": 1.2311747956897248,
+      "learning_rate": 0.003,
+      "loss": 4.0029,
+      "step": 20707
+    },
+    {
+      "epoch": 0.20708,
+      "grad_norm": 1.154244274853029,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 20708
+    },
+    {
+      "epoch": 0.20709,
+      "grad_norm": 1.2588712245534515,
+      "learning_rate": 0.003,
+      "loss": 4.0172,
+      "step": 20709
+    },
+    {
+      "epoch": 0.2071,
+      "grad_norm": 1.1199682038217924,
+      "learning_rate": 0.003,
+      "loss": 3.9839,
+      "step": 20710
+    },
+    {
+      "epoch": 0.20711,
+      "grad_norm": 1.242815824650162,
+      "learning_rate": 0.003,
+      "loss": 4.0265,
+      "step": 20711
+    },
+    {
+      "epoch": 0.20712,
+      "grad_norm": 1.199687443416815,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 20712
+    },
+    {
+      "epoch": 0.20713,
+      "grad_norm": 1.4011147370295212,
+      "learning_rate": 0.003,
+      "loss": 3.9983,
+      "step": 20713
+    },
+    {
+      "epoch": 0.20714,
+      "grad_norm": 1.3889339867150936,
+      "learning_rate": 0.003,
+      "loss": 4.006,
+      "step": 20714
+    },
+    {
+      "epoch": 0.20715,
+      "grad_norm": 1.3791162445570997,
+      "learning_rate": 0.003,
+      "loss": 4.0053,
+      "step": 20715
+    },
+    {
+      "epoch": 0.20716,
+      "grad_norm": 1.3848688663249529,
+      "learning_rate": 0.003,
+      "loss": 3.993,
+      "step": 20716
+    },
+    {
+      "epoch": 0.20717,
+      "grad_norm": 1.0682848244643386,
+      "learning_rate": 0.003,
+      "loss": 3.9915,
+      "step": 20717
+    },
+    {
+      "epoch": 0.20718,
+      "grad_norm": 1.4117279775919247,
+      "learning_rate": 0.003,
+      "loss": 4.0239,
+      "step": 20718
+    },
+    {
+      "epoch": 0.20719,
+      "grad_norm": 1.0104646813563232,
+      "learning_rate": 0.003,
+      "loss": 4.0135,
+      "step": 20719
+    },
+    {
+      "epoch": 0.2072,
+      "grad_norm": 1.3508238842242124,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 20720
+    },
+    {
+      "epoch": 0.20721,
+      "grad_norm": 1.3606526550845897,
+      "learning_rate": 0.003,
+      "loss": 4.0459,
+      "step": 20721
+    },
+    {
+      "epoch": 0.20722,
+      "grad_norm": 1.1153649551211486,
+      "learning_rate": 0.003,
+      "loss": 4.0186,
+      "step": 20722
+    },
+    {
+      "epoch": 0.20723,
+      "grad_norm": 1.4087524912260174,
+      "learning_rate": 0.003,
+      "loss": 4.0264,
+      "step": 20723
+    },
+    {
+      "epoch": 0.20724,
+      "grad_norm": 1.1634798503769788,
+      "learning_rate": 0.003,
+      "loss": 4.0025,
+      "step": 20724
+    },
+    {
+      "epoch": 0.20725,
+      "grad_norm": 1.353255865520132,
+      "learning_rate": 0.003,
+      "loss": 4.0405,
+      "step": 20725
+    },
+    {
+      "epoch": 0.20726,
+      "grad_norm": 1.3053899490743734,
+      "learning_rate": 0.003,
+      "loss": 4.0047,
+      "step": 20726
+    },
+    {
+      "epoch": 0.20727,
+      "grad_norm": 1.2761599390148615,
+      "learning_rate": 0.003,
+      "loss": 3.9964,
+      "step": 20727
+    },
+    {
+      "epoch": 0.20728,
+      "grad_norm": 1.3535293874816996,
+      "learning_rate": 0.003,
+      "loss": 4.0068,
+      "step": 20728
+    },
+    {
+      "epoch": 0.20729,
+      "grad_norm": 1.1160122844154825,
+      "learning_rate": 0.003,
+      "loss": 3.989,
+      "step": 20729
+    },
+    {
+      "epoch": 0.2073,
+      "grad_norm": 1.4603381559467412,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 20730
+    },
+    {
+      "epoch": 0.20731,
+      "grad_norm": 1.3619902186698736,
+      "learning_rate": 0.003,
+      "loss": 3.9886,
+      "step": 20731
+    },
+    {
+      "epoch": 0.20732,
+      "grad_norm": 1.370862438754088,
+      "learning_rate": 0.003,
+      "loss": 3.9647,
+      "step": 20732
+    },
+    {
+      "epoch": 0.20733,
+      "grad_norm": 1.1538581243827948,
+      "learning_rate": 0.003,
+      "loss": 3.9882,
+      "step": 20733
+    },
+    {
+      "epoch": 0.20734,
+      "grad_norm": 1.2160670915743559,
+      "learning_rate": 0.003,
+      "loss": 4.0084,
+      "step": 20734
+    },
+    {
+      "epoch": 0.20735,
+      "grad_norm": 1.3774866452867383,
+      "learning_rate": 0.003,
+      "loss": 4.0352,
+      "step": 20735
+    },
+    {
+      "epoch": 0.20736,
+      "grad_norm": 1.1941753023916182,
+      "learning_rate": 0.003,
+      "loss": 4.0039,
+      "step": 20736
+    },
+    {
+      "epoch": 0.20737,
+      "grad_norm": 1.5219723574569388,
+      "learning_rate": 0.003,
+      "loss": 4.0098,
+      "step": 20737
+    },
+    {
+      "epoch": 0.20738,
+      "grad_norm": 1.1705040250652128,
+      "learning_rate": 0.003,
+      "loss": 4.0167,
+      "step": 20738
+    },
+    {
+      "epoch": 0.20739,
+      "grad_norm": 1.3300273074518532,
+      "learning_rate": 0.003,
+      "loss": 4.0059,
+      "step": 20739
+    },
+    {
+      "epoch": 0.2074,
+      "grad_norm": 1.3519116547813115,
+      "learning_rate": 0.003,
+      "loss": 4.0146,
+      "step": 20740
+    },
+    {
+      "epoch": 0.20741,
+      "grad_norm": 1.2437283628637685,
+      "learning_rate": 0.003,
+      "loss": 4.0068,
+      "step": 20741
+    },
+    {
+      "epoch": 0.20742,
+      "grad_norm": 1.2139133471208732,
+      "learning_rate": 0.003,
+      "loss": 3.9908,
+      "step": 20742
+    },
+    {
+      "epoch": 0.20743,
+      "grad_norm": 1.011757550463894,
+      "learning_rate": 0.003,
+      "loss": 4.0069,
+      "step": 20743
+    },
+    {
+      "epoch": 0.20744,
+      "grad_norm": 1.476849468515335,
+      "learning_rate": 0.003,
+      "loss": 4.0431,
+      "step": 20744
+    },
+    {
+      "epoch": 0.20745,
+      "grad_norm": 1.124190433730722,
+      "learning_rate": 0.003,
+      "loss": 3.9892,
+      "step": 20745
+    },
+    {
+      "epoch": 0.20746,
+      "grad_norm": 1.511443329096388,
+      "learning_rate": 0.003,
+      "loss": 4.0043,
+      "step": 20746
+    },
+    {
+      "epoch": 0.20747,
+      "grad_norm": 0.9825076361804065,
+      "learning_rate": 0.003,
+      "loss": 4.0146,
+      "step": 20747
+    },
+    {
+      "epoch": 0.20748,
+      "grad_norm": 1.5064621839673242,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 20748
+    },
+    {
+      "epoch": 0.20749,
+      "grad_norm": 1.2827956509312328,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 20749
+    },
+    {
+      "epoch": 0.2075,
+      "grad_norm": 1.3916254376191106,
+      "learning_rate": 0.003,
+      "loss": 4.0402,
+      "step": 20750
+    },
+    {
+      "epoch": 0.20751,
+      "grad_norm": 1.3100723021927003,
+      "learning_rate": 0.003,
+      "loss": 4.0099,
+      "step": 20751
+    },
+    {
+      "epoch": 0.20752,
+      "grad_norm": 1.3098096513551125,
+      "learning_rate": 0.003,
+      "loss": 4.041,
+      "step": 20752
+    },
+    {
+      "epoch": 0.20753,
+      "grad_norm": 1.2926125070104577,
+      "learning_rate": 0.003,
+      "loss": 4.024,
+      "step": 20753
+    },
+    {
+      "epoch": 0.20754,
+      "grad_norm": 1.4281878678811832,
+      "learning_rate": 0.003,
+      "loss": 4.0204,
+      "step": 20754
+    },
+    {
+      "epoch": 0.20755,
+      "grad_norm": 1.3968188177213192,
+      "learning_rate": 0.003,
+      "loss": 4.0132,
+      "step": 20755
+    },
+    {
+      "epoch": 0.20756,
+      "grad_norm": 1.433140678814419,
+      "learning_rate": 0.003,
+      "loss": 4.0052,
+      "step": 20756
+    },
+    {
+      "epoch": 0.20757,
+      "grad_norm": 1.3649186304567098,
+      "learning_rate": 0.003,
+      "loss": 4.0155,
+      "step": 20757
+    },
+    {
+      "epoch": 0.20758,
+      "grad_norm": 1.0490527414403288,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 20758
+    },
+    {
+      "epoch": 0.20759,
+      "grad_norm": 1.2655298684550977,
+      "learning_rate": 0.003,
+      "loss": 3.9909,
+      "step": 20759
+    },
+    {
+      "epoch": 0.2076,
+      "grad_norm": 1.0217947523084412,
+      "learning_rate": 0.003,
+      "loss": 3.9711,
+      "step": 20760
+    },
+    {
+      "epoch": 0.20761,
+      "grad_norm": 1.4718944547170667,
+      "learning_rate": 0.003,
+      "loss": 4.0035,
+      "step": 20761
+    },
+    {
+      "epoch": 0.20762,
+      "grad_norm": 1.131937511658232,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 20762
+    },
+    {
+      "epoch": 0.20763,
+      "grad_norm": 1.4730515140899714,
+      "learning_rate": 0.003,
+      "loss": 4.0128,
+      "step": 20763
+    },
+    {
+      "epoch": 0.20764,
+      "grad_norm": 1.3322254925719181,
+      "learning_rate": 0.003,
+      "loss": 4.0311,
+      "step": 20764
+    },
+    {
+      "epoch": 0.20765,
+      "grad_norm": 1.2302313544070185,
+      "learning_rate": 0.003,
+      "loss": 3.9996,
+      "step": 20765
+    },
+    {
+      "epoch": 0.20766,
+      "grad_norm": 1.577993326336582,
+      "learning_rate": 0.003,
+      "loss": 4.0257,
+      "step": 20766
+    },
+    {
+      "epoch": 0.20767,
+      "grad_norm": 1.4112168266005138,
+      "learning_rate": 0.003,
+      "loss": 4.0357,
+      "step": 20767
+    },
+    {
+      "epoch": 0.20768,
+      "grad_norm": 0.9286027845956305,
+      "learning_rate": 0.003,
+      "loss": 4.0275,
+      "step": 20768
+    },
+    {
+      "epoch": 0.20769,
+      "grad_norm": 1.2672174109465204,
+      "learning_rate": 0.003,
+      "loss": 4.0091,
+      "step": 20769
+    },
+    {
+      "epoch": 0.2077,
+      "grad_norm": 1.3292904225378337,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 20770
+    },
+    {
+      "epoch": 0.20771,
+      "grad_norm": 1.267402357750023,
+      "learning_rate": 0.003,
+      "loss": 3.9679,
+      "step": 20771
+    },
+    {
+      "epoch": 0.20772,
+      "grad_norm": 1.1606444074798226,
+      "learning_rate": 0.003,
+      "loss": 3.9883,
+      "step": 20772
+    },
+    {
+      "epoch": 0.20773,
+      "grad_norm": 1.4224852426464072,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 20773
+    },
+    {
+      "epoch": 0.20774,
+      "grad_norm": 1.0794472944753835,
+      "learning_rate": 0.003,
+      "loss": 4.0033,
+      "step": 20774
+    },
+    {
+      "epoch": 0.20775,
+      "grad_norm": 1.5700931108159009,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 20775
+    },
+    {
+      "epoch": 0.20776,
+      "grad_norm": 1.2117575695534015,
+      "learning_rate": 0.003,
+      "loss": 4.0008,
+      "step": 20776
+    },
+    {
+      "epoch": 0.20777,
+      "grad_norm": 1.1635798890179556,
+      "learning_rate": 0.003,
+      "loss": 4.039,
+      "step": 20777
+    },
+    {
+      "epoch": 0.20778,
+      "grad_norm": 1.332541192592899,
+      "learning_rate": 0.003,
+      "loss": 4.0264,
+      "step": 20778
+    },
+    {
+      "epoch": 0.20779,
+      "grad_norm": 1.07754798751962,
+      "learning_rate": 0.003,
+      "loss": 4.0041,
+      "step": 20779
+    },
+    {
+      "epoch": 0.2078,
+      "grad_norm": 1.4150568316394432,
+      "learning_rate": 0.003,
+      "loss": 3.9979,
+      "step": 20780
+    },
+    {
+      "epoch": 0.20781,
+      "grad_norm": 1.2569495751056659,
+      "learning_rate": 0.003,
+      "loss": 4.0251,
+      "step": 20781
+    },
+    {
+      "epoch": 0.20782,
+      "grad_norm": 1.303935631429353,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 20782
+    },
+    {
+      "epoch": 0.20783,
+      "grad_norm": 1.070262931021732,
+      "learning_rate": 0.003,
+      "loss": 4.0049,
+      "step": 20783
+    },
+    {
+      "epoch": 0.20784,
+      "grad_norm": 1.2463366521176107,
+      "learning_rate": 0.003,
+      "loss": 3.9917,
+      "step": 20784
+    },
+    {
+      "epoch": 0.20785,
+      "grad_norm": 1.332859569457362,
+      "learning_rate": 0.003,
+      "loss": 3.9998,
+      "step": 20785
+    },
+    {
+      "epoch": 0.20786,
+      "grad_norm": 1.555431904423007,
+      "learning_rate": 0.003,
+      "loss": 4.0345,
+      "step": 20786
+    },
+    {
+      "epoch": 0.20787,
+      "grad_norm": 1.5210815629933276,
+      "learning_rate": 0.003,
+      "loss": 4.0041,
+      "step": 20787
+    },
+    {
+      "epoch": 0.20788,
+      "grad_norm": 1.165457018427775,
+      "learning_rate": 0.003,
+      "loss": 3.9856,
+      "step": 20788
+    },
+    {
+      "epoch": 0.20789,
+      "grad_norm": 1.4178219764680484,
+      "learning_rate": 0.003,
+      "loss": 4.0318,
+      "step": 20789
+    },
+    {
+      "epoch": 0.2079,
+      "grad_norm": 1.1854969763823446,
+      "learning_rate": 0.003,
+      "loss": 4.0162,
+      "step": 20790
+    },
+    {
+      "epoch": 0.20791,
+      "grad_norm": 1.3568456533168791,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 20791
+    },
+    {
+      "epoch": 0.20792,
+      "grad_norm": 1.078205671141796,
+      "learning_rate": 0.003,
+      "loss": 4.0164,
+      "step": 20792
+    },
+    {
+      "epoch": 0.20793,
+      "grad_norm": 1.399454875613823,
+      "learning_rate": 0.003,
+      "loss": 4.0425,
+      "step": 20793
+    },
+    {
+      "epoch": 0.20794,
+      "grad_norm": 1.047424292569248,
+      "learning_rate": 0.003,
+      "loss": 4.0177,
+      "step": 20794
+    },
+    {
+      "epoch": 0.20795,
+      "grad_norm": 1.4960949989355845,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 20795
+    },
+    {
+      "epoch": 0.20796,
+      "grad_norm": 1.1708483719124951,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 20796
+    },
+    {
+      "epoch": 0.20797,
+      "grad_norm": 1.3591757273193632,
+      "learning_rate": 0.003,
+      "loss": 3.9679,
+      "step": 20797
+    },
+    {
+      "epoch": 0.20798,
+      "grad_norm": 1.1381751592273424,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 20798
+    },
+    {
+      "epoch": 0.20799,
+      "grad_norm": 1.3842694374877977,
+      "learning_rate": 0.003,
+      "loss": 4.0153,
+      "step": 20799
+    },
+    {
+      "epoch": 0.208,
+      "grad_norm": 1.2717777939290427,
+      "learning_rate": 0.003,
+      "loss": 4.001,
+      "step": 20800
+    },
+    {
+      "epoch": 0.20801,
+      "grad_norm": 1.673543247494367,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 20801
+    },
+    {
+      "epoch": 0.20802,
+      "grad_norm": 1.1391768832265765,
+      "learning_rate": 0.003,
+      "loss": 4.0288,
+      "step": 20802
+    },
+    {
+      "epoch": 0.20803,
+      "grad_norm": 1.3249789689990805,
+      "learning_rate": 0.003,
+      "loss": 3.9989,
+      "step": 20803
+    },
+    {
+      "epoch": 0.20804,
+      "grad_norm": 1.1419414901724636,
+      "learning_rate": 0.003,
+      "loss": 4.0394,
+      "step": 20804
+    },
+    {
+      "epoch": 0.20805,
+      "grad_norm": 1.3004736578234641,
+      "learning_rate": 0.003,
+      "loss": 4.0032,
+      "step": 20805
+    },
+    {
+      "epoch": 0.20806,
+      "grad_norm": 1.0913698211834306,
+      "learning_rate": 0.003,
+      "loss": 4.0348,
+      "step": 20806
+    },
+    {
+      "epoch": 0.20807,
+      "grad_norm": 1.631126860591073,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 20807
+    },
+    {
+      "epoch": 0.20808,
+      "grad_norm": 1.0782997477311531,
+      "learning_rate": 0.003,
+      "loss": 3.9936,
+      "step": 20808
+    },
+    {
+      "epoch": 0.20809,
+      "grad_norm": 1.4599194338314092,
+      "learning_rate": 0.003,
+      "loss": 4.0266,
+      "step": 20809
+    },
+    {
+      "epoch": 0.2081,
+      "grad_norm": 1.3475472121598244,
+      "learning_rate": 0.003,
+      "loss": 4.0264,
+      "step": 20810
+    },
+    {
+      "epoch": 0.20811,
+      "grad_norm": 1.391921105172872,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 20811
+    },
+    {
+      "epoch": 0.20812,
+      "grad_norm": 1.1615546155092566,
+      "learning_rate": 0.003,
+      "loss": 4.0099,
+      "step": 20812
+    },
+    {
+      "epoch": 0.20813,
+      "grad_norm": 1.470093653345452,
+      "learning_rate": 0.003,
+      "loss": 4.0349,
+      "step": 20813
+    },
+    {
+      "epoch": 0.20814,
+      "grad_norm": 1.1524547889534358,
+      "learning_rate": 0.003,
+      "loss": 4.034,
+      "step": 20814
+    },
+    {
+      "epoch": 0.20815,
+      "grad_norm": 1.3009815958092343,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 20815
+    },
+    {
+      "epoch": 0.20816,
+      "grad_norm": 1.0347613315400521,
+      "learning_rate": 0.003,
+      "loss": 4.0116,
+      "step": 20816
+    },
+    {
+      "epoch": 0.20817,
+      "grad_norm": 1.2376279673360198,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 20817
+    },
+    {
+      "epoch": 0.20818,
+      "grad_norm": 1.43748282615632,
+      "learning_rate": 0.003,
+      "loss": 4.0442,
+      "step": 20818
+    },
+    {
+      "epoch": 0.20819,
+      "grad_norm": 1.3146545910267338,
+      "learning_rate": 0.003,
+      "loss": 4.0016,
+      "step": 20819
+    },
+    {
+      "epoch": 0.2082,
+      "grad_norm": 1.3140342819154898,
+      "learning_rate": 0.003,
+      "loss": 4.0154,
+      "step": 20820
+    },
+    {
+      "epoch": 0.20821,
+      "grad_norm": 1.1283767018277284,
+      "learning_rate": 0.003,
+      "loss": 3.9895,
+      "step": 20821
+    },
+    {
+      "epoch": 0.20822,
+      "grad_norm": 1.2414655674025967,
+      "learning_rate": 0.003,
+      "loss": 4.0354,
+      "step": 20822
+    },
+    {
+      "epoch": 0.20823,
+      "grad_norm": 1.4824330805251629,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 20823
+    },
+    {
+      "epoch": 0.20824,
+      "grad_norm": 1.1643365672873889,
+      "learning_rate": 0.003,
+      "loss": 4.0379,
+      "step": 20824
+    },
+    {
+      "epoch": 0.20825,
+      "grad_norm": 1.2843731391744342,
+      "learning_rate": 0.003,
+      "loss": 4.0303,
+      "step": 20825
+    },
+    {
+      "epoch": 0.20826,
+      "grad_norm": 1.13954319317425,
+      "learning_rate": 0.003,
+      "loss": 4.0309,
+      "step": 20826
+    },
+    {
+      "epoch": 0.20827,
+      "grad_norm": 1.3462498957908857,
+      "learning_rate": 0.003,
+      "loss": 3.9997,
+      "step": 20827
+    },
+    {
+      "epoch": 0.20828,
+      "grad_norm": 1.1197594007395637,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 20828
+    },
+    {
+      "epoch": 0.20829,
+      "grad_norm": 1.508424835574956,
+      "learning_rate": 0.003,
+      "loss": 4.0149,
+      "step": 20829
+    },
+    {
+      "epoch": 0.2083,
+      "grad_norm": 1.1426482207964674,
+      "learning_rate": 0.003,
+      "loss": 3.9933,
+      "step": 20830
+    },
+    {
+      "epoch": 0.20831,
+      "grad_norm": 1.501334615124463,
+      "learning_rate": 0.003,
+      "loss": 4.0281,
+      "step": 20831
+    },
+    {
+      "epoch": 0.20832,
+      "grad_norm": 1.1407177606222458,
+      "learning_rate": 0.003,
+      "loss": 3.9962,
+      "step": 20832
+    },
+    {
+      "epoch": 0.20833,
+      "grad_norm": 1.6613078855726637,
+      "learning_rate": 0.003,
+      "loss": 4.053,
+      "step": 20833
+    },
+    {
+      "epoch": 0.20834,
+      "grad_norm": 1.0033287750508755,
+      "learning_rate": 0.003,
+      "loss": 3.9987,
+      "step": 20834
+    },
+    {
+      "epoch": 0.20835,
+      "grad_norm": 1.4374401538502266,
+      "learning_rate": 0.003,
+      "loss": 4.039,
+      "step": 20835
+    },
+    {
+      "epoch": 0.20836,
+      "grad_norm": 1.0935475297295707,
+      "learning_rate": 0.003,
+      "loss": 4.0276,
+      "step": 20836
+    },
+    {
+      "epoch": 0.20837,
+      "grad_norm": 1.4131584994295754,
+      "learning_rate": 0.003,
+      "loss": 4.0479,
+      "step": 20837
+    },
+    {
+      "epoch": 0.20838,
+      "grad_norm": 1.2294335094649325,
+      "learning_rate": 0.003,
+      "loss": 3.993,
+      "step": 20838
+    },
+    {
+      "epoch": 0.20839,
+      "grad_norm": 1.472671249281069,
+      "learning_rate": 0.003,
+      "loss": 4.021,
+      "step": 20839
+    },
+    {
+      "epoch": 0.2084,
+      "grad_norm": 1.1940720000074654,
+      "learning_rate": 0.003,
+      "loss": 4.0011,
+      "step": 20840
+    },
+    {
+      "epoch": 0.20841,
+      "grad_norm": 1.5240340417727574,
+      "learning_rate": 0.003,
+      "loss": 4.0015,
+      "step": 20841
+    },
+    {
+      "epoch": 0.20842,
+      "grad_norm": 1.0576897803130114,
+      "learning_rate": 0.003,
+      "loss": 4.0346,
+      "step": 20842
+    },
+    {
+      "epoch": 0.20843,
+      "grad_norm": 1.399109973232184,
+      "learning_rate": 0.003,
+      "loss": 3.9992,
+      "step": 20843
+    },
+    {
+      "epoch": 0.20844,
+      "grad_norm": 1.201599226730298,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 20844
+    },
+    {
+      "epoch": 0.20845,
+      "grad_norm": 1.4341061355773497,
+      "learning_rate": 0.003,
+      "loss": 4.015,
+      "step": 20845
+    },
+    {
+      "epoch": 0.20846,
+      "grad_norm": 1.3792120707292328,
+      "learning_rate": 0.003,
+      "loss": 4.0058,
+      "step": 20846
+    },
+    {
+      "epoch": 0.20847,
+      "grad_norm": 1.3301536053199357,
+      "learning_rate": 0.003,
+      "loss": 4.0393,
+      "step": 20847
+    },
+    {
+      "epoch": 0.20848,
+      "grad_norm": 1.4318432994732744,
+      "learning_rate": 0.003,
+      "loss": 4.0008,
+      "step": 20848
+    },
+    {
+      "epoch": 0.20849,
+      "grad_norm": 1.1220748806678396,
+      "learning_rate": 0.003,
+      "loss": 4.004,
+      "step": 20849
+    },
+    {
+      "epoch": 0.2085,
+      "grad_norm": 1.5563610202056835,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 20850
+    },
+    {
+      "epoch": 0.20851,
+      "grad_norm": 1.1916118048537319,
+      "learning_rate": 0.003,
+      "loss": 3.9965,
+      "step": 20851
+    },
+    {
+      "epoch": 0.20852,
+      "grad_norm": 1.3917243433589763,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 20852
+    },
+    {
+      "epoch": 0.20853,
+      "grad_norm": 0.9829251420864882,
+      "learning_rate": 0.003,
+      "loss": 3.9985,
+      "step": 20853
+    },
+    {
+      "epoch": 0.20854,
+      "grad_norm": 1.484250504274921,
+      "learning_rate": 0.003,
+      "loss": 4.0329,
+      "step": 20854
+    },
+    {
+      "epoch": 0.20855,
+      "grad_norm": 1.2513668225153176,
+      "learning_rate": 0.003,
+      "loss": 4.0069,
+      "step": 20855
+    },
+    {
+      "epoch": 0.20856,
+      "grad_norm": 1.4523932295277415,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 20856
+    },
+    {
+      "epoch": 0.20857,
+      "grad_norm": 1.2910304787177638,
+      "learning_rate": 0.003,
+      "loss": 4.0289,
+      "step": 20857
+    },
+    {
+      "epoch": 0.20858,
+      "grad_norm": 1.1418392651256097,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 20858
+    },
+    {
+      "epoch": 0.20859,
+      "grad_norm": 1.2668562927176923,
+      "learning_rate": 0.003,
+      "loss": 4.0464,
+      "step": 20859
+    },
+    {
+      "epoch": 0.2086,
+      "grad_norm": 1.4214567798945295,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 20860
+    },
+    {
+      "epoch": 0.20861,
+      "grad_norm": 1.1480111389004846,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 20861
+    },
+    {
+      "epoch": 0.20862,
+      "grad_norm": 1.2274962404976055,
+      "learning_rate": 0.003,
+      "loss": 4.0007,
+      "step": 20862
+    },
+    {
+      "epoch": 0.20863,
+      "grad_norm": 1.27374838045122,
+      "learning_rate": 0.003,
+      "loss": 4.0209,
+      "step": 20863
+    },
+    {
+      "epoch": 0.20864,
+      "grad_norm": 1.2200686497382318,
+      "learning_rate": 0.003,
+      "loss": 3.9821,
+      "step": 20864
+    },
+    {
+      "epoch": 0.20865,
+      "grad_norm": 1.2442953387451878,
+      "learning_rate": 0.003,
+      "loss": 4.008,
+      "step": 20865
+    },
+    {
+      "epoch": 0.20866,
+      "grad_norm": 1.1650642613840394,
+      "learning_rate": 0.003,
+      "loss": 4.0068,
+      "step": 20866
+    },
+    {
+      "epoch": 0.20867,
+      "grad_norm": 1.5364798780309676,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 20867
+    },
+    {
+      "epoch": 0.20868,
+      "grad_norm": 1.2471734611742733,
+      "learning_rate": 0.003,
+      "loss": 4.0001,
+      "step": 20868
+    },
+    {
+      "epoch": 0.20869,
+      "grad_norm": 1.3291685662832533,
+      "learning_rate": 0.003,
+      "loss": 4.0247,
+      "step": 20869
+    },
+    {
+      "epoch": 0.2087,
+      "grad_norm": 1.3411980481798331,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 20870
+    },
+    {
+      "epoch": 0.20871,
+      "grad_norm": 1.3715362672953424,
+      "learning_rate": 0.003,
+      "loss": 4.0314,
+      "step": 20871
+    },
+    {
+      "epoch": 0.20872,
+      "grad_norm": 0.9983099319581852,
+      "learning_rate": 0.003,
+      "loss": 4.0375,
+      "step": 20872
+    },
+    {
+      "epoch": 0.20873,
+      "grad_norm": 1.2465703888512631,
+      "learning_rate": 0.003,
+      "loss": 4.0049,
+      "step": 20873
+    },
+    {
+      "epoch": 0.20874,
+      "grad_norm": 1.268492851746666,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 20874
+    },
+    {
+      "epoch": 0.20875,
+      "grad_norm": 1.3237560560328707,
+      "learning_rate": 0.003,
+      "loss": 3.9968,
+      "step": 20875
+    },
+    {
+      "epoch": 0.20876,
+      "grad_norm": 1.212713327915906,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 20876
+    },
+    {
+      "epoch": 0.20877,
+      "grad_norm": 1.35991248945412,
+      "learning_rate": 0.003,
+      "loss": 3.9983,
+      "step": 20877
+    },
+    {
+      "epoch": 0.20878,
+      "grad_norm": 1.2941770872136193,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 20878
+    },
+    {
+      "epoch": 0.20879,
+      "grad_norm": 1.349236628104444,
+      "learning_rate": 0.003,
+      "loss": 4.0065,
+      "step": 20879
+    },
+    {
+      "epoch": 0.2088,
+      "grad_norm": 1.111820171868114,
+      "learning_rate": 0.003,
+      "loss": 4.0096,
+      "step": 20880
+    },
+    {
+      "epoch": 0.20881,
+      "grad_norm": 1.417026905232516,
+      "learning_rate": 0.003,
+      "loss": 4.0254,
+      "step": 20881
+    },
+    {
+      "epoch": 0.20882,
+      "grad_norm": 1.480137036015666,
+      "learning_rate": 0.003,
+      "loss": 4.0259,
+      "step": 20882
+    },
+    {
+      "epoch": 0.20883,
+      "grad_norm": 1.2642757657636645,
+      "learning_rate": 0.003,
+      "loss": 4.0109,
+      "step": 20883
+    },
+    {
+      "epoch": 0.20884,
+      "grad_norm": 1.3747794452666113,
+      "learning_rate": 0.003,
+      "loss": 4.0428,
+      "step": 20884
+    },
+    {
+      "epoch": 0.20885,
+      "grad_norm": 1.2437901348416465,
+      "learning_rate": 0.003,
+      "loss": 4.006,
+      "step": 20885
+    },
+    {
+      "epoch": 0.20886,
+      "grad_norm": 1.2896036091080418,
+      "learning_rate": 0.003,
+      "loss": 3.9975,
+      "step": 20886
+    },
+    {
+      "epoch": 0.20887,
+      "grad_norm": 1.1710095132315907,
+      "learning_rate": 0.003,
+      "loss": 4.0446,
+      "step": 20887
+    },
+    {
+      "epoch": 0.20888,
+      "grad_norm": 1.435226503891415,
+      "learning_rate": 0.003,
+      "loss": 4.0339,
+      "step": 20888
+    },
+    {
+      "epoch": 0.20889,
+      "grad_norm": 1.086233771944457,
+      "learning_rate": 0.003,
+      "loss": 4.0141,
+      "step": 20889
+    },
+    {
+      "epoch": 0.2089,
+      "grad_norm": 1.257460661003727,
+      "learning_rate": 0.003,
+      "loss": 3.9823,
+      "step": 20890
+    },
+    {
+      "epoch": 0.20891,
+      "grad_norm": 1.2099953935582919,
+      "learning_rate": 0.003,
+      "loss": 3.9963,
+      "step": 20891
+    },
+    {
+      "epoch": 0.20892,
+      "grad_norm": 1.2353827481489195,
+      "learning_rate": 0.003,
+      "loss": 4.0182,
+      "step": 20892
+    },
+    {
+      "epoch": 0.20893,
+      "grad_norm": 1.3587299496919547,
+      "learning_rate": 0.003,
+      "loss": 3.9956,
+      "step": 20893
+    },
+    {
+      "epoch": 0.20894,
+      "grad_norm": 1.3272217581387291,
+      "learning_rate": 0.003,
+      "loss": 3.9676,
+      "step": 20894
+    },
+    {
+      "epoch": 0.20895,
+      "grad_norm": 1.0978762697383098,
+      "learning_rate": 0.003,
+      "loss": 4.0299,
+      "step": 20895
+    },
+    {
+      "epoch": 0.20896,
+      "grad_norm": 1.1916421289786143,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 20896
+    },
+    {
+      "epoch": 0.20897,
+      "grad_norm": 1.122132726075262,
+      "learning_rate": 0.003,
+      "loss": 3.995,
+      "step": 20897
+    },
+    {
+      "epoch": 0.20898,
+      "grad_norm": 1.6292753930404273,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 20898
+    },
+    {
+      "epoch": 0.20899,
+      "grad_norm": 1.208907119628758,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 20899
+    },
+    {
+      "epoch": 0.209,
+      "grad_norm": 1.636360595449662,
+      "learning_rate": 0.003,
+      "loss": 4.0025,
+      "step": 20900
+    },
+    {
+      "epoch": 0.20901,
+      "grad_norm": 1.134323633843346,
+      "learning_rate": 0.003,
+      "loss": 4.0413,
+      "step": 20901
+    },
+    {
+      "epoch": 0.20902,
+      "grad_norm": 1.190116579787095,
+      "learning_rate": 0.003,
+      "loss": 4.0023,
+      "step": 20902
+    },
+    {
+      "epoch": 0.20903,
+      "grad_norm": 1.4453255374232301,
+      "learning_rate": 0.003,
+      "loss": 3.9898,
+      "step": 20903
+    },
+    {
+      "epoch": 0.20904,
+      "grad_norm": 1.3558102337820357,
+      "learning_rate": 0.003,
+      "loss": 4.0057,
+      "step": 20904
+    },
+    {
+      "epoch": 0.20905,
+      "grad_norm": 1.350475312101927,
+      "learning_rate": 0.003,
+      "loss": 4.051,
+      "step": 20905
+    },
+    {
+      "epoch": 0.20906,
+      "grad_norm": 1.3242363143984324,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 20906
+    },
+    {
+      "epoch": 0.20907,
+      "grad_norm": 1.2990795278588714,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 20907
+    },
+    {
+      "epoch": 0.20908,
+      "grad_norm": 1.0584069261196563,
+      "learning_rate": 0.003,
+      "loss": 3.9836,
+      "step": 20908
+    },
+    {
+      "epoch": 0.20909,
+      "grad_norm": 1.5779688926743314,
+      "learning_rate": 0.003,
+      "loss": 3.9945,
+      "step": 20909
+    },
+    {
+      "epoch": 0.2091,
+      "grad_norm": 0.968561392808828,
+      "learning_rate": 0.003,
+      "loss": 4.0177,
+      "step": 20910
+    },
+    {
+      "epoch": 0.20911,
+      "grad_norm": 1.4869033307180153,
+      "learning_rate": 0.003,
+      "loss": 4.0276,
+      "step": 20911
+    },
+    {
+      "epoch": 0.20912,
+      "grad_norm": 1.2472397489966522,
+      "learning_rate": 0.003,
+      "loss": 4.0075,
+      "step": 20912
+    },
+    {
+      "epoch": 0.20913,
+      "grad_norm": 1.1943919775368714,
+      "learning_rate": 0.003,
+      "loss": 4.0182,
+      "step": 20913
+    },
+    {
+      "epoch": 0.20914,
+      "grad_norm": 1.3110083589780148,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 20914
+    },
+    {
+      "epoch": 0.20915,
+      "grad_norm": 1.1125060135367526,
+      "learning_rate": 0.003,
+      "loss": 4.0171,
+      "step": 20915
+    },
+    {
+      "epoch": 0.20916,
+      "grad_norm": 1.692721948610048,
+      "learning_rate": 0.003,
+      "loss": 4.0489,
+      "step": 20916
+    },
+    {
+      "epoch": 0.20917,
+      "grad_norm": 1.0080651902147466,
+      "learning_rate": 0.003,
+      "loss": 3.9825,
+      "step": 20917
+    },
+    {
+      "epoch": 0.20918,
+      "grad_norm": 1.6541579730934663,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 20918
+    },
+    {
+      "epoch": 0.20919,
+      "grad_norm": 1.1882879206737498,
+      "learning_rate": 0.003,
+      "loss": 4.0292,
+      "step": 20919
+    },
+    {
+      "epoch": 0.2092,
+      "grad_norm": 1.1928966350989192,
+      "learning_rate": 0.003,
+      "loss": 4.0236,
+      "step": 20920
+    },
+    {
+      "epoch": 0.20921,
+      "grad_norm": 1.3281674142026325,
+      "learning_rate": 0.003,
+      "loss": 4.0198,
+      "step": 20921
+    },
+    {
+      "epoch": 0.20922,
+      "grad_norm": 1.1281466809423264,
+      "learning_rate": 0.003,
+      "loss": 3.9992,
+      "step": 20922
+    },
+    {
+      "epoch": 0.20923,
+      "grad_norm": 1.405317684930867,
+      "learning_rate": 0.003,
+      "loss": 4.0489,
+      "step": 20923
+    },
+    {
+      "epoch": 0.20924,
+      "grad_norm": 1.2996071800213322,
+      "learning_rate": 0.003,
+      "loss": 3.9971,
+      "step": 20924
+    },
+    {
+      "epoch": 0.20925,
+      "grad_norm": 1.4798297342438993,
+      "learning_rate": 0.003,
+      "loss": 4.0359,
+      "step": 20925
+    },
+    {
+      "epoch": 0.20926,
+      "grad_norm": 0.9910328299841175,
+      "learning_rate": 0.003,
+      "loss": 4.0088,
+      "step": 20926
+    },
+    {
+      "epoch": 0.20927,
+      "grad_norm": 1.5054054101988479,
+      "learning_rate": 0.003,
+      "loss": 4.0151,
+      "step": 20927
+    },
+    {
+      "epoch": 0.20928,
+      "grad_norm": 1.2440736855379702,
+      "learning_rate": 0.003,
+      "loss": 3.9921,
+      "step": 20928
+    },
+    {
+      "epoch": 0.20929,
+      "grad_norm": 1.3046514138811083,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 20929
+    },
+    {
+      "epoch": 0.2093,
+      "grad_norm": 1.4542832610800758,
+      "learning_rate": 0.003,
+      "loss": 4.0021,
+      "step": 20930
+    },
+    {
+      "epoch": 0.20931,
+      "grad_norm": 1.0506587656577084,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 20931
+    },
+    {
+      "epoch": 0.20932,
+      "grad_norm": 1.485776450071703,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 20932
+    },
+    {
+      "epoch": 0.20933,
+      "grad_norm": 1.1129187452645215,
+      "learning_rate": 0.003,
+      "loss": 3.9883,
+      "step": 20933
+    },
+    {
+      "epoch": 0.20934,
+      "grad_norm": 1.4503475196015443,
+      "learning_rate": 0.003,
+      "loss": 4.0322,
+      "step": 20934
+    },
+    {
+      "epoch": 0.20935,
+      "grad_norm": 1.2543700385634864,
+      "learning_rate": 0.003,
+      "loss": 4.0312,
+      "step": 20935
+    },
+    {
+      "epoch": 0.20936,
+      "grad_norm": 1.4533892287731454,
+      "learning_rate": 0.003,
+      "loss": 3.9788,
+      "step": 20936
+    },
+    {
+      "epoch": 0.20937,
+      "grad_norm": 1.1414007351312137,
+      "learning_rate": 0.003,
+      "loss": 4.0043,
+      "step": 20937
+    },
+    {
+      "epoch": 0.20938,
+      "grad_norm": 1.251294910318001,
+      "learning_rate": 0.003,
+      "loss": 4.0075,
+      "step": 20938
+    },
+    {
+      "epoch": 0.20939,
+      "grad_norm": 1.1816910191914949,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 20939
+    },
+    {
+      "epoch": 0.2094,
+      "grad_norm": 1.345794539806514,
+      "learning_rate": 0.003,
+      "loss": 4.0209,
+      "step": 20940
+    },
+    {
+      "epoch": 0.20941,
+      "grad_norm": 1.1287102077071347,
+      "learning_rate": 0.003,
+      "loss": 4.0184,
+      "step": 20941
+    },
+    {
+      "epoch": 0.20942,
+      "grad_norm": 1.7059360765530418,
+      "learning_rate": 0.003,
+      "loss": 4.0233,
+      "step": 20942
+    },
+    {
+      "epoch": 0.20943,
+      "grad_norm": 0.9725796875070399,
+      "learning_rate": 0.003,
+      "loss": 4.0126,
+      "step": 20943
+    },
+    {
+      "epoch": 0.20944,
+      "grad_norm": 1.4227583733274918,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 20944
+    },
+    {
+      "epoch": 0.20945,
+      "grad_norm": 1.3462901465800026,
+      "learning_rate": 0.003,
+      "loss": 4.034,
+      "step": 20945
+    },
+    {
+      "epoch": 0.20946,
+      "grad_norm": 1.3232371071169384,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 20946
+    },
+    {
+      "epoch": 0.20947,
+      "grad_norm": 1.2248052701779697,
+      "learning_rate": 0.003,
+      "loss": 3.9961,
+      "step": 20947
+    },
+    {
+      "epoch": 0.20948,
+      "grad_norm": 1.4871470534289315,
+      "learning_rate": 0.003,
+      "loss": 3.9994,
+      "step": 20948
+    },
+    {
+      "epoch": 0.20949,
+      "grad_norm": 1.1315933153070257,
+      "learning_rate": 0.003,
+      "loss": 4.0032,
+      "step": 20949
+    },
+    {
+      "epoch": 0.2095,
+      "grad_norm": 1.1270368023326598,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 20950
+    },
+    {
+      "epoch": 0.20951,
+      "grad_norm": 1.1241772227392153,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 20951
+    },
+    {
+      "epoch": 0.20952,
+      "grad_norm": 1.4724044363304891,
+      "learning_rate": 0.003,
+      "loss": 4.0463,
+      "step": 20952
+    },
+    {
+      "epoch": 0.20953,
+      "grad_norm": 0.9499607561075719,
+      "learning_rate": 0.003,
+      "loss": 3.9764,
+      "step": 20953
+    },
+    {
+      "epoch": 0.20954,
+      "grad_norm": 1.4605437266261925,
+      "learning_rate": 0.003,
+      "loss": 3.9795,
+      "step": 20954
+    },
+    {
+      "epoch": 0.20955,
+      "grad_norm": 1.1489102623569274,
+      "learning_rate": 0.003,
+      "loss": 3.9959,
+      "step": 20955
+    },
+    {
+      "epoch": 0.20956,
+      "grad_norm": 1.5273005662502421,
+      "learning_rate": 0.003,
+      "loss": 4.0064,
+      "step": 20956
+    },
+    {
+      "epoch": 0.20957,
+      "grad_norm": 1.0026624263343034,
+      "learning_rate": 0.003,
+      "loss": 3.9934,
+      "step": 20957
+    },
+    {
+      "epoch": 0.20958,
+      "grad_norm": 1.3073050152222774,
+      "learning_rate": 0.003,
+      "loss": 4.0321,
+      "step": 20958
+    },
+    {
+      "epoch": 0.20959,
+      "grad_norm": 1.3324447364034024,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 20959
+    },
+    {
+      "epoch": 0.2096,
+      "grad_norm": 1.2337033940702584,
+      "learning_rate": 0.003,
+      "loss": 3.9988,
+      "step": 20960
+    },
+    {
+      "epoch": 0.20961,
+      "grad_norm": 1.3637803086304312,
+      "learning_rate": 0.003,
+      "loss": 3.9996,
+      "step": 20961
+    },
+    {
+      "epoch": 0.20962,
+      "grad_norm": 1.2146625047395507,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 20962
+    },
+    {
+      "epoch": 0.20963,
+      "grad_norm": 1.3748678345887644,
+      "learning_rate": 0.003,
+      "loss": 3.9958,
+      "step": 20963
+    },
+    {
+      "epoch": 0.20964,
+      "grad_norm": 1.0949866482552657,
+      "learning_rate": 0.003,
+      "loss": 4.0058,
+      "step": 20964
+    },
+    {
+      "epoch": 0.20965,
+      "grad_norm": 1.270735005537506,
+      "learning_rate": 0.003,
+      "loss": 4.0074,
+      "step": 20965
+    },
+    {
+      "epoch": 0.20966,
+      "grad_norm": 1.373823448387118,
+      "learning_rate": 0.003,
+      "loss": 3.9873,
+      "step": 20966
+    },
+    {
+      "epoch": 0.20967,
+      "grad_norm": 1.054114676775662,
+      "learning_rate": 0.003,
+      "loss": 3.9937,
+      "step": 20967
+    },
+    {
+      "epoch": 0.20968,
+      "grad_norm": 1.4429330087355257,
+      "learning_rate": 0.003,
+      "loss": 4.0229,
+      "step": 20968
+    },
+    {
+      "epoch": 0.20969,
+      "grad_norm": 1.0481154144518454,
+      "learning_rate": 0.003,
+      "loss": 4.006,
+      "step": 20969
+    },
+    {
+      "epoch": 0.2097,
+      "grad_norm": 1.2741545221380652,
+      "learning_rate": 0.003,
+      "loss": 4.0063,
+      "step": 20970
+    },
+    {
+      "epoch": 0.20971,
+      "grad_norm": 1.3559480875835053,
+      "learning_rate": 0.003,
+      "loss": 4.0096,
+      "step": 20971
+    },
+    {
+      "epoch": 0.20972,
+      "grad_norm": 1.3707470670490596,
+      "learning_rate": 0.003,
+      "loss": 4.0283,
+      "step": 20972
+    },
+    {
+      "epoch": 0.20973,
+      "grad_norm": 1.269143570049498,
+      "learning_rate": 0.003,
+      "loss": 3.9909,
+      "step": 20973
+    },
+    {
+      "epoch": 0.20974,
+      "grad_norm": 1.471821638408851,
+      "learning_rate": 0.003,
+      "loss": 4.031,
+      "step": 20974
+    },
+    {
+      "epoch": 0.20975,
+      "grad_norm": 1.1597546030173398,
+      "learning_rate": 0.003,
+      "loss": 4.0362,
+      "step": 20975
+    },
+    {
+      "epoch": 0.20976,
+      "grad_norm": 1.2716315390796005,
+      "learning_rate": 0.003,
+      "loss": 3.9922,
+      "step": 20976
+    },
+    {
+      "epoch": 0.20977,
+      "grad_norm": 1.2396585803890137,
+      "learning_rate": 0.003,
+      "loss": 4.0429,
+      "step": 20977
+    },
+    {
+      "epoch": 0.20978,
+      "grad_norm": 1.1273775371894441,
+      "learning_rate": 0.003,
+      "loss": 3.9841,
+      "step": 20978
+    },
+    {
+      "epoch": 0.20979,
+      "grad_norm": 1.1579893963134125,
+      "learning_rate": 0.003,
+      "loss": 4.0503,
+      "step": 20979
+    },
+    {
+      "epoch": 0.2098,
+      "grad_norm": 1.4770136889416856,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 20980
+    },
+    {
+      "epoch": 0.20981,
+      "grad_norm": 1.2525591883130254,
+      "learning_rate": 0.003,
+      "loss": 3.9856,
+      "step": 20981
+    },
+    {
+      "epoch": 0.20982,
+      "grad_norm": 1.1521818196021785,
+      "learning_rate": 0.003,
+      "loss": 4.0142,
+      "step": 20982
+    },
+    {
+      "epoch": 0.20983,
+      "grad_norm": 1.46384638181763,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 20983
+    },
+    {
+      "epoch": 0.20984,
+      "grad_norm": 1.3818191030963758,
+      "learning_rate": 0.003,
+      "loss": 3.995,
+      "step": 20984
+    },
+    {
+      "epoch": 0.20985,
+      "grad_norm": 1.4162840722372632,
+      "learning_rate": 0.003,
+      "loss": 4.0383,
+      "step": 20985
+    },
+    {
+      "epoch": 0.20986,
+      "grad_norm": 1.253322408134988,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 20986
+    },
+    {
+      "epoch": 0.20987,
+      "grad_norm": 1.6227389818820805,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 20987
+    },
+    {
+      "epoch": 0.20988,
+      "grad_norm": 1.146151961671114,
+      "learning_rate": 0.003,
+      "loss": 4.001,
+      "step": 20988
+    },
+    {
+      "epoch": 0.20989,
+      "grad_norm": 1.3767610749270778,
+      "learning_rate": 0.003,
+      "loss": 4.0004,
+      "step": 20989
+    },
+    {
+      "epoch": 0.2099,
+      "grad_norm": 1.2004378651192353,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 20990
+    },
+    {
+      "epoch": 0.20991,
+      "grad_norm": 1.4464699255172628,
+      "learning_rate": 0.003,
+      "loss": 4.0297,
+      "step": 20991
+    },
+    {
+      "epoch": 0.20992,
+      "grad_norm": 1.1316848430378001,
+      "learning_rate": 0.003,
+      "loss": 4.0249,
+      "step": 20992
+    },
+    {
+      "epoch": 0.20993,
+      "grad_norm": 1.4153563571018821,
+      "learning_rate": 0.003,
+      "loss": 4.0668,
+      "step": 20993
+    },
+    {
+      "epoch": 0.20994,
+      "grad_norm": 1.1866069808551145,
+      "learning_rate": 0.003,
+      "loss": 4.0255,
+      "step": 20994
+    },
+    {
+      "epoch": 0.20995,
+      "grad_norm": 1.4210869979103622,
+      "learning_rate": 0.003,
+      "loss": 3.9855,
+      "step": 20995
+    },
+    {
+      "epoch": 0.20996,
+      "grad_norm": 1.220423579265803,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 20996
+    },
+    {
+      "epoch": 0.20997,
+      "grad_norm": 1.550320873073864,
+      "learning_rate": 0.003,
+      "loss": 3.9953,
+      "step": 20997
+    },
+    {
+      "epoch": 0.20998,
+      "grad_norm": 1.2052910374405408,
+      "learning_rate": 0.003,
+      "loss": 4.003,
+      "step": 20998
+    },
+    {
+      "epoch": 0.20999,
+      "grad_norm": 1.2626508910416614,
+      "learning_rate": 0.003,
+      "loss": 4.0109,
+      "step": 20999
+    },
+    {
+      "epoch": 0.21,
+      "grad_norm": 1.4657094273895415,
+      "learning_rate": 0.003,
+      "loss": 4.0437,
+      "step": 21000
+    },
+    {
+      "epoch": 0.21001,
+      "grad_norm": 0.8426466033842976,
+      "learning_rate": 0.003,
+      "loss": 3.9955,
+      "step": 21001
+    },
+    {
+      "epoch": 0.21002,
+      "grad_norm": 1.2162380406344855,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 21002
+    },
+    {
+      "epoch": 0.21003,
+      "grad_norm": 1.247299129417583,
+      "learning_rate": 0.003,
+      "loss": 4.0013,
+      "step": 21003
+    },
+    {
+      "epoch": 0.21004,
+      "grad_norm": 1.0497703558196676,
+      "learning_rate": 0.003,
+      "loss": 3.9842,
+      "step": 21004
+    },
+    {
+      "epoch": 0.21005,
+      "grad_norm": 1.4297853219221703,
+      "learning_rate": 0.003,
+      "loss": 4.0073,
+      "step": 21005
+    },
+    {
+      "epoch": 0.21006,
+      "grad_norm": 1.3668254442780212,
+      "learning_rate": 0.003,
+      "loss": 4.0051,
+      "step": 21006
+    },
+    {
+      "epoch": 0.21007,
+      "grad_norm": 1.3091479199392788,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 21007
+    },
+    {
+      "epoch": 0.21008,
+      "grad_norm": 1.1503344038772567,
+      "learning_rate": 0.003,
+      "loss": 3.9987,
+      "step": 21008
+    },
+    {
+      "epoch": 0.21009,
+      "grad_norm": 1.2694370522716472,
+      "learning_rate": 0.003,
+      "loss": 4.0043,
+      "step": 21009
+    },
+    {
+      "epoch": 0.2101,
+      "grad_norm": 1.2791155348899692,
+      "learning_rate": 0.003,
+      "loss": 3.9891,
+      "step": 21010
+    },
+    {
+      "epoch": 0.21011,
+      "grad_norm": 1.2916566176769724,
+      "learning_rate": 0.003,
+      "loss": 4.0006,
+      "step": 21011
+    },
+    {
+      "epoch": 0.21012,
+      "grad_norm": 1.2066521168500823,
+      "learning_rate": 0.003,
+      "loss": 4.0245,
+      "step": 21012
+    },
+    {
+      "epoch": 0.21013,
+      "grad_norm": 1.150050377124669,
+      "learning_rate": 0.003,
+      "loss": 4.0017,
+      "step": 21013
+    },
+    {
+      "epoch": 0.21014,
+      "grad_norm": 1.250403826729695,
+      "learning_rate": 0.003,
+      "loss": 4.0182,
+      "step": 21014
+    },
+    {
+      "epoch": 0.21015,
+      "grad_norm": 1.2682030173775363,
+      "learning_rate": 0.003,
+      "loss": 3.9945,
+      "step": 21015
+    },
+    {
+      "epoch": 0.21016,
+      "grad_norm": 1.346590192420962,
+      "learning_rate": 0.003,
+      "loss": 4.0241,
+      "step": 21016
+    },
+    {
+      "epoch": 0.21017,
+      "grad_norm": 1.2444908446914722,
+      "learning_rate": 0.003,
+      "loss": 3.9939,
+      "step": 21017
+    },
+    {
+      "epoch": 0.21018,
+      "grad_norm": 1.1814548626955845,
+      "learning_rate": 0.003,
+      "loss": 3.995,
+      "step": 21018
+    },
+    {
+      "epoch": 0.21019,
+      "grad_norm": 1.1802264531515365,
+      "learning_rate": 0.003,
+      "loss": 3.9948,
+      "step": 21019
+    },
+    {
+      "epoch": 0.2102,
+      "grad_norm": 1.4055546693015075,
+      "learning_rate": 0.003,
+      "loss": 4.0084,
+      "step": 21020
+    },
+    {
+      "epoch": 0.21021,
+      "grad_norm": 1.3276315943777508,
+      "learning_rate": 0.003,
+      "loss": 4.0213,
+      "step": 21021
+    },
+    {
+      "epoch": 0.21022,
+      "grad_norm": 1.4067770249435363,
+      "learning_rate": 0.003,
+      "loss": 4.0075,
+      "step": 21022
+    },
+    {
+      "epoch": 0.21023,
+      "grad_norm": 1.1286118229062,
+      "learning_rate": 0.003,
+      "loss": 3.9764,
+      "step": 21023
+    },
+    {
+      "epoch": 0.21024,
+      "grad_norm": 1.5151752380705241,
+      "learning_rate": 0.003,
+      "loss": 4.012,
+      "step": 21024
+    },
+    {
+      "epoch": 0.21025,
+      "grad_norm": 1.4351006532320592,
+      "learning_rate": 0.003,
+      "loss": 4.0077,
+      "step": 21025
+    },
+    {
+      "epoch": 0.21026,
+      "grad_norm": 1.2755208333175851,
+      "learning_rate": 0.003,
+      "loss": 4.0121,
+      "step": 21026
+    },
+    {
+      "epoch": 0.21027,
+      "grad_norm": 1.5326479194480152,
+      "learning_rate": 0.003,
+      "loss": 4.0389,
+      "step": 21027
+    },
+    {
+      "epoch": 0.21028,
+      "grad_norm": 1.1975456551922903,
+      "learning_rate": 0.003,
+      "loss": 3.9994,
+      "step": 21028
+    },
+    {
+      "epoch": 0.21029,
+      "grad_norm": 1.293909836603582,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 21029
+    },
+    {
+      "epoch": 0.2103,
+      "grad_norm": 1.1873533656071764,
+      "learning_rate": 0.003,
+      "loss": 4.0126,
+      "step": 21030
+    },
+    {
+      "epoch": 0.21031,
+      "grad_norm": 1.3542935137011907,
+      "learning_rate": 0.003,
+      "loss": 3.999,
+      "step": 21031
+    },
+    {
+      "epoch": 0.21032,
+      "grad_norm": 1.3454802136916275,
+      "learning_rate": 0.003,
+      "loss": 4.0002,
+      "step": 21032
+    },
+    {
+      "epoch": 0.21033,
+      "grad_norm": 1.4319767531483432,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 21033
+    },
+    {
+      "epoch": 0.21034,
+      "grad_norm": 1.5311868287561543,
+      "learning_rate": 0.003,
+      "loss": 3.9989,
+      "step": 21034
+    },
+    {
+      "epoch": 0.21035,
+      "grad_norm": 1.0635668223136652,
+      "learning_rate": 0.003,
+      "loss": 3.9905,
+      "step": 21035
+    },
+    {
+      "epoch": 0.21036,
+      "grad_norm": 1.3992802359851222,
+      "learning_rate": 0.003,
+      "loss": 3.9935,
+      "step": 21036
+    },
+    {
+      "epoch": 0.21037,
+      "grad_norm": 1.101610180781639,
+      "learning_rate": 0.003,
+      "loss": 4.0042,
+      "step": 21037
+    },
+    {
+      "epoch": 0.21038,
+      "grad_norm": 1.6727626016844743,
+      "learning_rate": 0.003,
+      "loss": 4.0287,
+      "step": 21038
+    },
+    {
+      "epoch": 0.21039,
+      "grad_norm": 1.0503441658961554,
+      "learning_rate": 0.003,
+      "loss": 3.9944,
+      "step": 21039
+    },
+    {
+      "epoch": 0.2104,
+      "grad_norm": 1.651521482017359,
+      "learning_rate": 0.003,
+      "loss": 4.0254,
+      "step": 21040
+    },
+    {
+      "epoch": 0.21041,
+      "grad_norm": 1.1222898948926991,
+      "learning_rate": 0.003,
+      "loss": 4.0105,
+      "step": 21041
+    },
+    {
+      "epoch": 0.21042,
+      "grad_norm": 1.1895038194778,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 21042
+    },
+    {
+      "epoch": 0.21043,
+      "grad_norm": 1.4185472706486735,
+      "learning_rate": 0.003,
+      "loss": 4.0224,
+      "step": 21043
+    },
+    {
+      "epoch": 0.21044,
+      "grad_norm": 1.1929862417245685,
+      "learning_rate": 0.003,
+      "loss": 4.0002,
+      "step": 21044
+    },
+    {
+      "epoch": 0.21045,
+      "grad_norm": 1.4127058660166885,
+      "learning_rate": 0.003,
+      "loss": 3.9939,
+      "step": 21045
+    },
+    {
+      "epoch": 0.21046,
+      "grad_norm": 1.1841882626446538,
+      "learning_rate": 0.003,
+      "loss": 4.0082,
+      "step": 21046
+    },
+    {
+      "epoch": 0.21047,
+      "grad_norm": 1.4199594552451236,
+      "learning_rate": 0.003,
+      "loss": 3.9904,
+      "step": 21047
+    },
+    {
+      "epoch": 0.21048,
+      "grad_norm": 1.3935221160042208,
+      "learning_rate": 0.003,
+      "loss": 4.026,
+      "step": 21048
+    },
+    {
+      "epoch": 0.21049,
+      "grad_norm": 1.3406735873786433,
+      "learning_rate": 0.003,
+      "loss": 3.9986,
+      "step": 21049
+    },
+    {
+      "epoch": 0.2105,
+      "grad_norm": 1.2900406268940852,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 21050
+    },
+    {
+      "epoch": 0.21051,
+      "grad_norm": 1.0942924994969878,
+      "learning_rate": 0.003,
+      "loss": 4.0417,
+      "step": 21051
+    },
+    {
+      "epoch": 0.21052,
+      "grad_norm": 1.5115795749679444,
+      "learning_rate": 0.003,
+      "loss": 3.9988,
+      "step": 21052
+    },
+    {
+      "epoch": 0.21053,
+      "grad_norm": 0.987699006005316,
+      "learning_rate": 0.003,
+      "loss": 3.9862,
+      "step": 21053
+    },
+    {
+      "epoch": 0.21054,
+      "grad_norm": 1.6226452918593997,
+      "learning_rate": 0.003,
+      "loss": 4.0397,
+      "step": 21054
+    },
+    {
+      "epoch": 0.21055,
+      "grad_norm": 1.1075132236580925,
+      "learning_rate": 0.003,
+      "loss": 4.0259,
+      "step": 21055
+    },
+    {
+      "epoch": 0.21056,
+      "grad_norm": 1.2913376821432705,
+      "learning_rate": 0.003,
+      "loss": 4.0218,
+      "step": 21056
+    },
+    {
+      "epoch": 0.21057,
+      "grad_norm": 1.1302175982467895,
+      "learning_rate": 0.003,
+      "loss": 4.0014,
+      "step": 21057
+    },
+    {
+      "epoch": 0.21058,
+      "grad_norm": 1.4493129591507272,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 21058
+    },
+    {
+      "epoch": 0.21059,
+      "grad_norm": 1.0930744676151898,
+      "learning_rate": 0.003,
+      "loss": 3.9973,
+      "step": 21059
+    },
+    {
+      "epoch": 0.2106,
+      "grad_norm": 1.6498175362929546,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 21060
+    },
+    {
+      "epoch": 0.21061,
+      "grad_norm": 1.1566122332822928,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 21061
+    },
+    {
+      "epoch": 0.21062,
+      "grad_norm": 1.1773004778131306,
+      "learning_rate": 0.003,
+      "loss": 3.9953,
+      "step": 21062
+    },
+    {
+      "epoch": 0.21063,
+      "grad_norm": 1.3418062796509926,
+      "learning_rate": 0.003,
+      "loss": 4.0343,
+      "step": 21063
+    },
+    {
+      "epoch": 0.21064,
+      "grad_norm": 1.214846334840867,
+      "learning_rate": 0.003,
+      "loss": 4.0225,
+      "step": 21064
+    },
+    {
+      "epoch": 0.21065,
+      "grad_norm": 1.1592282011731485,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 21065
+    },
+    {
+      "epoch": 0.21066,
+      "grad_norm": 1.1829194312236775,
+      "learning_rate": 0.003,
+      "loss": 4.0024,
+      "step": 21066
+    },
+    {
+      "epoch": 0.21067,
+      "grad_norm": 1.1453211903455005,
+      "learning_rate": 0.003,
+      "loss": 3.9988,
+      "step": 21067
+    },
+    {
+      "epoch": 0.21068,
+      "grad_norm": 1.6792141295044758,
+      "learning_rate": 0.003,
+      "loss": 4.0096,
+      "step": 21068
+    },
+    {
+      "epoch": 0.21069,
+      "grad_norm": 1.248624612245966,
+      "learning_rate": 0.003,
+      "loss": 4.028,
+      "step": 21069
+    },
+    {
+      "epoch": 0.2107,
+      "grad_norm": 1.3307055884158077,
+      "learning_rate": 0.003,
+      "loss": 4.02,
+      "step": 21070
+    },
+    {
+      "epoch": 0.21071,
+      "grad_norm": 1.310414259405094,
+      "learning_rate": 0.003,
+      "loss": 3.9843,
+      "step": 21071
+    },
+    {
+      "epoch": 0.21072,
+      "grad_norm": 1.3485595491448568,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 21072
+    },
+    {
+      "epoch": 0.21073,
+      "grad_norm": 1.150016878506225,
+      "learning_rate": 0.003,
+      "loss": 4.0228,
+      "step": 21073
+    },
+    {
+      "epoch": 0.21074,
+      "grad_norm": 1.5636638520179817,
+      "learning_rate": 0.003,
+      "loss": 4.0225,
+      "step": 21074
+    },
+    {
+      "epoch": 0.21075,
+      "grad_norm": 0.9103378989825072,
+      "learning_rate": 0.003,
+      "loss": 4.0225,
+      "step": 21075
+    },
+    {
+      "epoch": 0.21076,
+      "grad_norm": 1.6435064365914083,
+      "learning_rate": 0.003,
+      "loss": 4.0191,
+      "step": 21076
+    },
+    {
+      "epoch": 0.21077,
+      "grad_norm": 1.389869727075436,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 21077
+    },
+    {
+      "epoch": 0.21078,
+      "grad_norm": 1.2968667365055693,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 21078
+    },
+    {
+      "epoch": 0.21079,
+      "grad_norm": 1.2852164509409945,
+      "learning_rate": 0.003,
+      "loss": 4.0032,
+      "step": 21079
+    },
+    {
+      "epoch": 0.2108,
+      "grad_norm": 1.2863709229281344,
+      "learning_rate": 0.003,
+      "loss": 3.9712,
+      "step": 21080
+    },
+    {
+      "epoch": 0.21081,
+      "grad_norm": 1.165286701517474,
+      "learning_rate": 0.003,
+      "loss": 4.0239,
+      "step": 21081
+    },
+    {
+      "epoch": 0.21082,
+      "grad_norm": 1.1831702445738859,
+      "learning_rate": 0.003,
+      "loss": 4.0053,
+      "step": 21082
+    },
+    {
+      "epoch": 0.21083,
+      "grad_norm": 1.3698874275408328,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 21083
+    },
+    {
+      "epoch": 0.21084,
+      "grad_norm": 1.070155197844283,
+      "learning_rate": 0.003,
+      "loss": 3.9705,
+      "step": 21084
+    },
+    {
+      "epoch": 0.21085,
+      "grad_norm": 1.593572081936517,
+      "learning_rate": 0.003,
+      "loss": 4.0111,
+      "step": 21085
+    },
+    {
+      "epoch": 0.21086,
+      "grad_norm": 1.0050184761337113,
+      "learning_rate": 0.003,
+      "loss": 3.9902,
+      "step": 21086
+    },
+    {
+      "epoch": 0.21087,
+      "grad_norm": 1.3248452916758007,
+      "learning_rate": 0.003,
+      "loss": 4.0259,
+      "step": 21087
+    },
+    {
+      "epoch": 0.21088,
+      "grad_norm": 1.2574847715468032,
+      "learning_rate": 0.003,
+      "loss": 3.9945,
+      "step": 21088
+    },
+    {
+      "epoch": 0.21089,
+      "grad_norm": 1.5059041185140845,
+      "learning_rate": 0.003,
+      "loss": 4.0375,
+      "step": 21089
+    },
+    {
+      "epoch": 0.2109,
+      "grad_norm": 1.2814550650514651,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 21090
+    },
+    {
+      "epoch": 0.21091,
+      "grad_norm": 1.265478656418895,
+      "learning_rate": 0.003,
+      "loss": 3.9912,
+      "step": 21091
+    },
+    {
+      "epoch": 0.21092,
+      "grad_norm": 1.4710173927962453,
+      "learning_rate": 0.003,
+      "loss": 4.0303,
+      "step": 21092
+    },
+    {
+      "epoch": 0.21093,
+      "grad_norm": 1.124084105327157,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 21093
+    },
+    {
+      "epoch": 0.21094,
+      "grad_norm": 1.2903391392014474,
+      "learning_rate": 0.003,
+      "loss": 4.0064,
+      "step": 21094
+    },
+    {
+      "epoch": 0.21095,
+      "grad_norm": 1.138267319770349,
+      "learning_rate": 0.003,
+      "loss": 4.0182,
+      "step": 21095
+    },
+    {
+      "epoch": 0.21096,
+      "grad_norm": 1.3545573663070989,
+      "learning_rate": 0.003,
+      "loss": 3.995,
+      "step": 21096
+    },
+    {
+      "epoch": 0.21097,
+      "grad_norm": 1.1737661476679708,
+      "learning_rate": 0.003,
+      "loss": 3.9992,
+      "step": 21097
+    },
+    {
+      "epoch": 0.21098,
+      "grad_norm": 1.4812522281228715,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 21098
+    },
+    {
+      "epoch": 0.21099,
+      "grad_norm": 1.1802122750236088,
+      "learning_rate": 0.003,
+      "loss": 3.9921,
+      "step": 21099
+    },
+    {
+      "epoch": 0.211,
+      "grad_norm": 1.3917582376759665,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 21100
+    },
+    {
+      "epoch": 0.21101,
+      "grad_norm": 1.204102248892187,
+      "learning_rate": 0.003,
+      "loss": 3.9705,
+      "step": 21101
+    },
+    {
+      "epoch": 0.21102,
+      "grad_norm": 1.4138395551661178,
+      "learning_rate": 0.003,
+      "loss": 3.9876,
+      "step": 21102
+    },
+    {
+      "epoch": 0.21103,
+      "grad_norm": 1.0780545100150152,
+      "learning_rate": 0.003,
+      "loss": 4.0123,
+      "step": 21103
+    },
+    {
+      "epoch": 0.21104,
+      "grad_norm": 1.8828123474872969,
+      "learning_rate": 0.003,
+      "loss": 4.0157,
+      "step": 21104
+    },
+    {
+      "epoch": 0.21105,
+      "grad_norm": 1.074311571175294,
+      "learning_rate": 0.003,
+      "loss": 4.0275,
+      "step": 21105
+    },
+    {
+      "epoch": 0.21106,
+      "grad_norm": 1.3443121477824183,
+      "learning_rate": 0.003,
+      "loss": 4.0036,
+      "step": 21106
+    },
+    {
+      "epoch": 0.21107,
+      "grad_norm": 1.3230201601297726,
+      "learning_rate": 0.003,
+      "loss": 4.0207,
+      "step": 21107
+    },
+    {
+      "epoch": 0.21108,
+      "grad_norm": 1.357944008595562,
+      "learning_rate": 0.003,
+      "loss": 4.0204,
+      "step": 21108
+    },
+    {
+      "epoch": 0.21109,
+      "grad_norm": 1.180808947757267,
+      "learning_rate": 0.003,
+      "loss": 4.0207,
+      "step": 21109
+    },
+    {
+      "epoch": 0.2111,
+      "grad_norm": 1.2478080750731326,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 21110
+    },
+    {
+      "epoch": 0.21111,
+      "grad_norm": 1.3521947162150847,
+      "learning_rate": 0.003,
+      "loss": 3.992,
+      "step": 21111
+    },
+    {
+      "epoch": 0.21112,
+      "grad_norm": 1.0976268891110357,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 21112
+    },
+    {
+      "epoch": 0.21113,
+      "grad_norm": 1.3818483763584866,
+      "learning_rate": 0.003,
+      "loss": 4.0478,
+      "step": 21113
+    },
+    {
+      "epoch": 0.21114,
+      "grad_norm": 1.2578417994011797,
+      "learning_rate": 0.003,
+      "loss": 4.0376,
+      "step": 21114
+    },
+    {
+      "epoch": 0.21115,
+      "grad_norm": 1.3902811835130122,
+      "learning_rate": 0.003,
+      "loss": 4.0272,
+      "step": 21115
+    },
+    {
+      "epoch": 0.21116,
+      "grad_norm": 1.37366404359566,
+      "learning_rate": 0.003,
+      "loss": 4.0305,
+      "step": 21116
+    },
+    {
+      "epoch": 0.21117,
+      "grad_norm": 1.445033660678071,
+      "learning_rate": 0.003,
+      "loss": 4.0129,
+      "step": 21117
+    },
+    {
+      "epoch": 0.21118,
+      "grad_norm": 1.2422670048266182,
+      "learning_rate": 0.003,
+      "loss": 3.9693,
+      "step": 21118
+    },
+    {
+      "epoch": 0.21119,
+      "grad_norm": 1.3384360813281369,
+      "learning_rate": 0.003,
+      "loss": 4.0056,
+      "step": 21119
+    },
+    {
+      "epoch": 0.2112,
+      "grad_norm": 1.3856165342606725,
+      "learning_rate": 0.003,
+      "loss": 4.0334,
+      "step": 21120
+    },
+    {
+      "epoch": 0.21121,
+      "grad_norm": 1.1978637673350985,
+      "learning_rate": 0.003,
+      "loss": 3.9954,
+      "step": 21121
+    },
+    {
+      "epoch": 0.21122,
+      "grad_norm": 1.1699913770043076,
+      "learning_rate": 0.003,
+      "loss": 3.983,
+      "step": 21122
+    },
+    {
+      "epoch": 0.21123,
+      "grad_norm": 1.41661631111151,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 21123
+    },
+    {
+      "epoch": 0.21124,
+      "grad_norm": 1.1704038954773548,
+      "learning_rate": 0.003,
+      "loss": 3.9978,
+      "step": 21124
+    },
+    {
+      "epoch": 0.21125,
+      "grad_norm": 1.299620782241244,
+      "learning_rate": 0.003,
+      "loss": 4.0272,
+      "step": 21125
+    },
+    {
+      "epoch": 0.21126,
+      "grad_norm": 1.2174487967720073,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 21126
+    },
+    {
+      "epoch": 0.21127,
+      "grad_norm": 1.393982683315909,
+      "learning_rate": 0.003,
+      "loss": 4.024,
+      "step": 21127
+    },
+    {
+      "epoch": 0.21128,
+      "grad_norm": 1.3240217199864575,
+      "learning_rate": 0.003,
+      "loss": 4.0144,
+      "step": 21128
+    },
+    {
+      "epoch": 0.21129,
+      "grad_norm": 1.2993691202699973,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 21129
+    },
+    {
+      "epoch": 0.2113,
+      "grad_norm": 1.1787335642082368,
+      "learning_rate": 0.003,
+      "loss": 4.008,
+      "step": 21130
+    },
+    {
+      "epoch": 0.21131,
+      "grad_norm": 1.445809140642193,
+      "learning_rate": 0.003,
+      "loss": 4.0209,
+      "step": 21131
+    },
+    {
+      "epoch": 0.21132,
+      "grad_norm": 1.2324873762168307,
+      "learning_rate": 0.003,
+      "loss": 4.0144,
+      "step": 21132
+    },
+    {
+      "epoch": 0.21133,
+      "grad_norm": 1.268946911603336,
+      "learning_rate": 0.003,
+      "loss": 3.9748,
+      "step": 21133
+    },
+    {
+      "epoch": 0.21134,
+      "grad_norm": 1.2067948759056248,
+      "learning_rate": 0.003,
+      "loss": 4.0041,
+      "step": 21134
+    },
+    {
+      "epoch": 0.21135,
+      "grad_norm": 1.6419534750273073,
+      "learning_rate": 0.003,
+      "loss": 4.0181,
+      "step": 21135
+    },
+    {
+      "epoch": 0.21136,
+      "grad_norm": 1.119240468427281,
+      "learning_rate": 0.003,
+      "loss": 4.0058,
+      "step": 21136
+    },
+    {
+      "epoch": 0.21137,
+      "grad_norm": 1.4929187913273507,
+      "learning_rate": 0.003,
+      "loss": 4.0035,
+      "step": 21137
+    },
+    {
+      "epoch": 0.21138,
+      "grad_norm": 1.111352429473447,
+      "learning_rate": 0.003,
+      "loss": 3.9775,
+      "step": 21138
+    },
+    {
+      "epoch": 0.21139,
+      "grad_norm": 1.3678438972361697,
+      "learning_rate": 0.003,
+      "loss": 4.0064,
+      "step": 21139
+    },
+    {
+      "epoch": 0.2114,
+      "grad_norm": 1.1760781129856996,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 21140
+    },
+    {
+      "epoch": 0.21141,
+      "grad_norm": 1.4054599165807802,
+      "learning_rate": 0.003,
+      "loss": 3.9999,
+      "step": 21141
+    },
+    {
+      "epoch": 0.21142,
+      "grad_norm": 1.3083418704104623,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 21142
+    },
+    {
+      "epoch": 0.21143,
+      "grad_norm": 1.545234464149026,
+      "learning_rate": 0.003,
+      "loss": 3.9966,
+      "step": 21143
+    },
+    {
+      "epoch": 0.21144,
+      "grad_norm": 1.021963450164767,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 21144
+    },
+    {
+      "epoch": 0.21145,
+      "grad_norm": 1.6117880071532047,
+      "learning_rate": 0.003,
+      "loss": 3.9973,
+      "step": 21145
+    },
+    {
+      "epoch": 0.21146,
+      "grad_norm": 1.0852313479915172,
+      "learning_rate": 0.003,
+      "loss": 4.0099,
+      "step": 21146
+    },
+    {
+      "epoch": 0.21147,
+      "grad_norm": 1.6406506749866605,
+      "learning_rate": 0.003,
+      "loss": 4.0485,
+      "step": 21147
+    },
+    {
+      "epoch": 0.21148,
+      "grad_norm": 1.1086528379341336,
+      "learning_rate": 0.003,
+      "loss": 4.0084,
+      "step": 21148
+    },
+    {
+      "epoch": 0.21149,
+      "grad_norm": 1.2074324107997394,
+      "learning_rate": 0.003,
+      "loss": 4.05,
+      "step": 21149
+    },
+    {
+      "epoch": 0.2115,
+      "grad_norm": 1.2565883617470976,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 21150
+    },
+    {
+      "epoch": 0.21151,
+      "grad_norm": 1.3749799932962032,
+      "learning_rate": 0.003,
+      "loss": 4.0162,
+      "step": 21151
+    },
+    {
+      "epoch": 0.21152,
+      "grad_norm": 1.0141556827650298,
+      "learning_rate": 0.003,
+      "loss": 3.993,
+      "step": 21152
+    },
+    {
+      "epoch": 0.21153,
+      "grad_norm": 1.4091018054828766,
+      "learning_rate": 0.003,
+      "loss": 4.0057,
+      "step": 21153
+    },
+    {
+      "epoch": 0.21154,
+      "grad_norm": 1.3023257100575532,
+      "learning_rate": 0.003,
+      "loss": 3.998,
+      "step": 21154
+    },
+    {
+      "epoch": 0.21155,
+      "grad_norm": 1.3574208743600995,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 21155
+    },
+    {
+      "epoch": 0.21156,
+      "grad_norm": 1.0617336708276712,
+      "learning_rate": 0.003,
+      "loss": 4.002,
+      "step": 21156
+    },
+    {
+      "epoch": 0.21157,
+      "grad_norm": 1.5076212705854435,
+      "learning_rate": 0.003,
+      "loss": 3.9975,
+      "step": 21157
+    },
+    {
+      "epoch": 0.21158,
+      "grad_norm": 1.2291871443389546,
+      "learning_rate": 0.003,
+      "loss": 4.0112,
+      "step": 21158
+    },
+    {
+      "epoch": 0.21159,
+      "grad_norm": 1.3655401640930263,
+      "learning_rate": 0.003,
+      "loss": 3.9861,
+      "step": 21159
+    },
+    {
+      "epoch": 0.2116,
+      "grad_norm": 1.0443193430636346,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 21160
+    },
+    {
+      "epoch": 0.21161,
+      "grad_norm": 1.4299310999365014,
+      "learning_rate": 0.003,
+      "loss": 4.0007,
+      "step": 21161
+    },
+    {
+      "epoch": 0.21162,
+      "grad_norm": 1.2692880074849462,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 21162
+    },
+    {
+      "epoch": 0.21163,
+      "grad_norm": 1.4467051000318716,
+      "learning_rate": 0.003,
+      "loss": 4.0006,
+      "step": 21163
+    },
+    {
+      "epoch": 0.21164,
+      "grad_norm": 1.2263805149790108,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 21164
+    },
+    {
+      "epoch": 0.21165,
+      "grad_norm": 1.2532661606448459,
+      "learning_rate": 0.003,
+      "loss": 4.0047,
+      "step": 21165
+    },
+    {
+      "epoch": 0.21166,
+      "grad_norm": 1.2656021765074004,
+      "learning_rate": 0.003,
+      "loss": 3.9952,
+      "step": 21166
+    },
+    {
+      "epoch": 0.21167,
+      "grad_norm": 1.5371437632072553,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 21167
+    },
+    {
+      "epoch": 0.21168,
+      "grad_norm": 1.1799093813679427,
+      "learning_rate": 0.003,
+      "loss": 4.0069,
+      "step": 21168
+    },
+    {
+      "epoch": 0.21169,
+      "grad_norm": 1.325110285511314,
+      "learning_rate": 0.003,
+      "loss": 4.0105,
+      "step": 21169
+    },
+    {
+      "epoch": 0.2117,
+      "grad_norm": 1.22533828774372,
+      "learning_rate": 0.003,
+      "loss": 4.0091,
+      "step": 21170
+    },
+    {
+      "epoch": 0.21171,
+      "grad_norm": 1.3652978701819354,
+      "learning_rate": 0.003,
+      "loss": 4.0298,
+      "step": 21171
+    },
+    {
+      "epoch": 0.21172,
+      "grad_norm": 1.1544571379270874,
+      "learning_rate": 0.003,
+      "loss": 4.0261,
+      "step": 21172
+    },
+    {
+      "epoch": 0.21173,
+      "grad_norm": 1.3994496860010797,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 21173
+    },
+    {
+      "epoch": 0.21174,
+      "grad_norm": 1.123817944555958,
+      "learning_rate": 0.003,
+      "loss": 4.0093,
+      "step": 21174
+    },
+    {
+      "epoch": 0.21175,
+      "grad_norm": 1.4295865239118823,
+      "learning_rate": 0.003,
+      "loss": 4.0233,
+      "step": 21175
+    },
+    {
+      "epoch": 0.21176,
+      "grad_norm": 1.0348636678471874,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 21176
+    },
+    {
+      "epoch": 0.21177,
+      "grad_norm": 1.6072331151182246,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 21177
+    },
+    {
+      "epoch": 0.21178,
+      "grad_norm": 1.1050078156271919,
+      "learning_rate": 0.003,
+      "loss": 3.9797,
+      "step": 21178
+    },
+    {
+      "epoch": 0.21179,
+      "grad_norm": 1.2591936996096824,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 21179
+    },
+    {
+      "epoch": 0.2118,
+      "grad_norm": 1.3893481065272373,
+      "learning_rate": 0.003,
+      "loss": 4.0088,
+      "step": 21180
+    },
+    {
+      "epoch": 0.21181,
+      "grad_norm": 1.231384484431582,
+      "learning_rate": 0.003,
+      "loss": 3.9858,
+      "step": 21181
+    },
+    {
+      "epoch": 0.21182,
+      "grad_norm": 1.2593482817011141,
+      "learning_rate": 0.003,
+      "loss": 4.0149,
+      "step": 21182
+    },
+    {
+      "epoch": 0.21183,
+      "grad_norm": 1.5188009035828427,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 21183
+    },
+    {
+      "epoch": 0.21184,
+      "grad_norm": 0.9300588947910111,
+      "learning_rate": 0.003,
+      "loss": 4.024,
+      "step": 21184
+    },
+    {
+      "epoch": 0.21185,
+      "grad_norm": 1.4765518696942888,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 21185
+    },
+    {
+      "epoch": 0.21186,
+      "grad_norm": 1.308708043301671,
+      "learning_rate": 0.003,
+      "loss": 4.04,
+      "step": 21186
+    },
+    {
+      "epoch": 0.21187,
+      "grad_norm": 1.2400948459783754,
+      "learning_rate": 0.003,
+      "loss": 3.9979,
+      "step": 21187
+    },
+    {
+      "epoch": 0.21188,
+      "grad_norm": 1.3130035673237144,
+      "learning_rate": 0.003,
+      "loss": 3.9982,
+      "step": 21188
+    },
+    {
+      "epoch": 0.21189,
+      "grad_norm": 1.3112422848593148,
+      "learning_rate": 0.003,
+      "loss": 3.9862,
+      "step": 21189
+    },
+    {
+      "epoch": 0.2119,
+      "grad_norm": 1.234793747656434,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 21190
+    },
+    {
+      "epoch": 0.21191,
+      "grad_norm": 1.3178223161666065,
+      "learning_rate": 0.003,
+      "loss": 4.0118,
+      "step": 21191
+    },
+    {
+      "epoch": 0.21192,
+      "grad_norm": 1.0896180388630579,
+      "learning_rate": 0.003,
+      "loss": 3.998,
+      "step": 21192
+    },
+    {
+      "epoch": 0.21193,
+      "grad_norm": 1.3829030955309882,
+      "learning_rate": 0.003,
+      "loss": 3.9827,
+      "step": 21193
+    },
+    {
+      "epoch": 0.21194,
+      "grad_norm": 1.2610078086332306,
+      "learning_rate": 0.003,
+      "loss": 4.0391,
+      "step": 21194
+    },
+    {
+      "epoch": 0.21195,
+      "grad_norm": 1.3994103501887596,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 21195
+    },
+    {
+      "epoch": 0.21196,
+      "grad_norm": 1.1993987801944324,
+      "learning_rate": 0.003,
+      "loss": 4.0288,
+      "step": 21196
+    },
+    {
+      "epoch": 0.21197,
+      "grad_norm": 1.4211816838775149,
+      "learning_rate": 0.003,
+      "loss": 4.0167,
+      "step": 21197
+    },
+    {
+      "epoch": 0.21198,
+      "grad_norm": 1.3207625342701859,
+      "learning_rate": 0.003,
+      "loss": 4.0093,
+      "step": 21198
+    },
+    {
+      "epoch": 0.21199,
+      "grad_norm": 1.0758771183435807,
+      "learning_rate": 0.003,
+      "loss": 3.9941,
+      "step": 21199
+    },
+    {
+      "epoch": 0.212,
+      "grad_norm": 1.6162045608429785,
+      "learning_rate": 0.003,
+      "loss": 4.0068,
+      "step": 21200
+    },
+    {
+      "epoch": 0.21201,
+      "grad_norm": 1.0790395045590022,
+      "learning_rate": 0.003,
+      "loss": 4.0032,
+      "step": 21201
+    },
+    {
+      "epoch": 0.21202,
+      "grad_norm": 1.6472784630646147,
+      "learning_rate": 0.003,
+      "loss": 4.0098,
+      "step": 21202
+    },
+    {
+      "epoch": 0.21203,
+      "grad_norm": 1.1335521484918203,
+      "learning_rate": 0.003,
+      "loss": 4.022,
+      "step": 21203
+    },
+    {
+      "epoch": 0.21204,
+      "grad_norm": 1.3954711483586664,
+      "learning_rate": 0.003,
+      "loss": 4.0184,
+      "step": 21204
+    },
+    {
+      "epoch": 0.21205,
+      "grad_norm": 1.260586187770137,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 21205
+    },
+    {
+      "epoch": 0.21206,
+      "grad_norm": 1.2769729410825157,
+      "learning_rate": 0.003,
+      "loss": 4.0098,
+      "step": 21206
+    },
+    {
+      "epoch": 0.21207,
+      "grad_norm": 1.256410062460252,
+      "learning_rate": 0.003,
+      "loss": 4.0479,
+      "step": 21207
+    },
+    {
+      "epoch": 0.21208,
+      "grad_norm": 1.132304345314271,
+      "learning_rate": 0.003,
+      "loss": 4.0357,
+      "step": 21208
+    },
+    {
+      "epoch": 0.21209,
+      "grad_norm": 1.3677394624036245,
+      "learning_rate": 0.003,
+      "loss": 4.001,
+      "step": 21209
+    },
+    {
+      "epoch": 0.2121,
+      "grad_norm": 1.1444877643698947,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 21210
+    },
+    {
+      "epoch": 0.21211,
+      "grad_norm": 1.5283058825969142,
+      "learning_rate": 0.003,
+      "loss": 4.0169,
+      "step": 21211
+    },
+    {
+      "epoch": 0.21212,
+      "grad_norm": 1.1604966173902387,
+      "learning_rate": 0.003,
+      "loss": 4.0063,
+      "step": 21212
+    },
+    {
+      "epoch": 0.21213,
+      "grad_norm": 1.4297043972795795,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 21213
+    },
+    {
+      "epoch": 0.21214,
+      "grad_norm": 1.2774619845532906,
+      "learning_rate": 0.003,
+      "loss": 3.996,
+      "step": 21214
+    },
+    {
+      "epoch": 0.21215,
+      "grad_norm": 1.363797220566659,
+      "learning_rate": 0.003,
+      "loss": 3.9875,
+      "step": 21215
+    },
+    {
+      "epoch": 0.21216,
+      "grad_norm": 1.1316247770105718,
+      "learning_rate": 0.003,
+      "loss": 3.9944,
+      "step": 21216
+    },
+    {
+      "epoch": 0.21217,
+      "grad_norm": 1.3718156467132354,
+      "learning_rate": 0.003,
+      "loss": 3.9873,
+      "step": 21217
+    },
+    {
+      "epoch": 0.21218,
+      "grad_norm": 1.0877161459964804,
+      "learning_rate": 0.003,
+      "loss": 3.9964,
+      "step": 21218
+    },
+    {
+      "epoch": 0.21219,
+      "grad_norm": 1.3839190340504315,
+      "learning_rate": 0.003,
+      "loss": 4.0177,
+      "step": 21219
+    },
+    {
+      "epoch": 0.2122,
+      "grad_norm": 1.2799548858568972,
+      "learning_rate": 0.003,
+      "loss": 4.0311,
+      "step": 21220
+    },
+    {
+      "epoch": 0.21221,
+      "grad_norm": 1.4556264698251036,
+      "learning_rate": 0.003,
+      "loss": 4.0132,
+      "step": 21221
+    },
+    {
+      "epoch": 0.21222,
+      "grad_norm": 1.3097882774362106,
+      "learning_rate": 0.003,
+      "loss": 4.0006,
+      "step": 21222
+    },
+    {
+      "epoch": 0.21223,
+      "grad_norm": 1.2055914198945927,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 21223
+    },
+    {
+      "epoch": 0.21224,
+      "grad_norm": 1.223304682917192,
+      "learning_rate": 0.003,
+      "loss": 3.9916,
+      "step": 21224
+    },
+    {
+      "epoch": 0.21225,
+      "grad_norm": 1.1841725520333635,
+      "learning_rate": 0.003,
+      "loss": 3.9763,
+      "step": 21225
+    },
+    {
+      "epoch": 0.21226,
+      "grad_norm": 1.1767385630970966,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 21226
+    },
+    {
+      "epoch": 0.21227,
+      "grad_norm": 1.4446032690434942,
+      "learning_rate": 0.003,
+      "loss": 4.0267,
+      "step": 21227
+    },
+    {
+      "epoch": 0.21228,
+      "grad_norm": 1.1971968443793868,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 21228
+    },
+    {
+      "epoch": 0.21229,
+      "grad_norm": 1.3803089736356173,
+      "learning_rate": 0.003,
+      "loss": 4.0177,
+      "step": 21229
+    },
+    {
+      "epoch": 0.2123,
+      "grad_norm": 1.3745377610142673,
+      "learning_rate": 0.003,
+      "loss": 4.0018,
+      "step": 21230
+    },
+    {
+      "epoch": 0.21231,
+      "grad_norm": 1.336353792415373,
+      "learning_rate": 0.003,
+      "loss": 4.0257,
+      "step": 21231
+    },
+    {
+      "epoch": 0.21232,
+      "grad_norm": 1.3480835217619898,
+      "learning_rate": 0.003,
+      "loss": 3.9762,
+      "step": 21232
+    },
+    {
+      "epoch": 0.21233,
+      "grad_norm": 1.29922723438427,
+      "learning_rate": 0.003,
+      "loss": 4.005,
+      "step": 21233
+    },
+    {
+      "epoch": 0.21234,
+      "grad_norm": 1.3082049009246703,
+      "learning_rate": 0.003,
+      "loss": 4.005,
+      "step": 21234
+    },
+    {
+      "epoch": 0.21235,
+      "grad_norm": 1.4359919055611325,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 21235
+    },
+    {
+      "epoch": 0.21236,
+      "grad_norm": 1.3516132572632047,
+      "learning_rate": 0.003,
+      "loss": 3.9899,
+      "step": 21236
+    },
+    {
+      "epoch": 0.21237,
+      "grad_norm": 1.1728754873626648,
+      "learning_rate": 0.003,
+      "loss": 3.987,
+      "step": 21237
+    },
+    {
+      "epoch": 0.21238,
+      "grad_norm": 1.3178500080278042,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 21238
+    },
+    {
+      "epoch": 0.21239,
+      "grad_norm": 1.2032317332130826,
+      "learning_rate": 0.003,
+      "loss": 3.9974,
+      "step": 21239
+    },
+    {
+      "epoch": 0.2124,
+      "grad_norm": 1.4083790793202176,
+      "learning_rate": 0.003,
+      "loss": 3.984,
+      "step": 21240
+    },
+    {
+      "epoch": 0.21241,
+      "grad_norm": 1.0902518036476174,
+      "learning_rate": 0.003,
+      "loss": 4.015,
+      "step": 21241
+    },
+    {
+      "epoch": 0.21242,
+      "grad_norm": 1.3964181006615006,
+      "learning_rate": 0.003,
+      "loss": 3.9936,
+      "step": 21242
+    },
+    {
+      "epoch": 0.21243,
+      "grad_norm": 1.1141556346665313,
+      "learning_rate": 0.003,
+      "loss": 3.9903,
+      "step": 21243
+    },
+    {
+      "epoch": 0.21244,
+      "grad_norm": 1.360287346610363,
+      "learning_rate": 0.003,
+      "loss": 3.9963,
+      "step": 21244
+    },
+    {
+      "epoch": 0.21245,
+      "grad_norm": 1.1476238440548074,
+      "learning_rate": 0.003,
+      "loss": 4.0033,
+      "step": 21245
+    },
+    {
+      "epoch": 0.21246,
+      "grad_norm": 1.4498650374787685,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 21246
+    },
+    {
+      "epoch": 0.21247,
+      "grad_norm": 1.219634209806882,
+      "learning_rate": 0.003,
+      "loss": 4.0036,
+      "step": 21247
+    },
+    {
+      "epoch": 0.21248,
+      "grad_norm": 1.3677626752722343,
+      "learning_rate": 0.003,
+      "loss": 4.0002,
+      "step": 21248
+    },
+    {
+      "epoch": 0.21249,
+      "grad_norm": 1.1730841227727533,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 21249
+    },
+    {
+      "epoch": 0.2125,
+      "grad_norm": 1.283121563545289,
+      "learning_rate": 0.003,
+      "loss": 4.0285,
+      "step": 21250
+    },
+    {
+      "epoch": 0.21251,
+      "grad_norm": 1.5899711206972593,
+      "learning_rate": 0.003,
+      "loss": 4.0283,
+      "step": 21251
+    },
+    {
+      "epoch": 0.21252,
+      "grad_norm": 1.1279668522018718,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 21252
+    },
+    {
+      "epoch": 0.21253,
+      "grad_norm": 1.4016566409211078,
+      "learning_rate": 0.003,
+      "loss": 4.008,
+      "step": 21253
+    },
+    {
+      "epoch": 0.21254,
+      "grad_norm": 1.1211305110660514,
+      "learning_rate": 0.003,
+      "loss": 3.993,
+      "step": 21254
+    },
+    {
+      "epoch": 0.21255,
+      "grad_norm": 1.2678127873644853,
+      "learning_rate": 0.003,
+      "loss": 3.9781,
+      "step": 21255
+    },
+    {
+      "epoch": 0.21256,
+      "grad_norm": 1.159226715669131,
+      "learning_rate": 0.003,
+      "loss": 3.9887,
+      "step": 21256
+    },
+    {
+      "epoch": 0.21257,
+      "grad_norm": 1.401600018057241,
+      "learning_rate": 0.003,
+      "loss": 4.0105,
+      "step": 21257
+    },
+    {
+      "epoch": 0.21258,
+      "grad_norm": 1.1026264776335892,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 21258
+    },
+    {
+      "epoch": 0.21259,
+      "grad_norm": 1.3058149541218889,
+      "learning_rate": 0.003,
+      "loss": 4.0024,
+      "step": 21259
+    },
+    {
+      "epoch": 0.2126,
+      "grad_norm": 1.1550427999452855,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 21260
+    },
+    {
+      "epoch": 0.21261,
+      "grad_norm": 1.4578555975512333,
+      "learning_rate": 0.003,
+      "loss": 4.0194,
+      "step": 21261
+    },
+    {
+      "epoch": 0.21262,
+      "grad_norm": 1.2485963272479557,
+      "learning_rate": 0.003,
+      "loss": 3.9878,
+      "step": 21262
+    },
+    {
+      "epoch": 0.21263,
+      "grad_norm": 1.4034550145512787,
+      "learning_rate": 0.003,
+      "loss": 3.9642,
+      "step": 21263
+    },
+    {
+      "epoch": 0.21264,
+      "grad_norm": 1.2735800476724184,
+      "learning_rate": 0.003,
+      "loss": 3.975,
+      "step": 21264
+    },
+    {
+      "epoch": 0.21265,
+      "grad_norm": 1.2827274536484716,
+      "learning_rate": 0.003,
+      "loss": 3.9989,
+      "step": 21265
+    },
+    {
+      "epoch": 0.21266,
+      "grad_norm": 1.354228376291486,
+      "learning_rate": 0.003,
+      "loss": 4.003,
+      "step": 21266
+    },
+    {
+      "epoch": 0.21267,
+      "grad_norm": 1.1627210296817798,
+      "learning_rate": 0.003,
+      "loss": 4.0,
+      "step": 21267
+    },
+    {
+      "epoch": 0.21268,
+      "grad_norm": 1.267854984356927,
+      "learning_rate": 0.003,
+      "loss": 4.0068,
+      "step": 21268
+    },
+    {
+      "epoch": 0.21269,
+      "grad_norm": 1.2604412859041527,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 21269
+    },
+    {
+      "epoch": 0.2127,
+      "grad_norm": 1.2660641521622518,
+      "learning_rate": 0.003,
+      "loss": 3.9778,
+      "step": 21270
+    },
+    {
+      "epoch": 0.21271,
+      "grad_norm": 1.4597987476940328,
+      "learning_rate": 0.003,
+      "loss": 4.0328,
+      "step": 21271
+    },
+    {
+      "epoch": 0.21272,
+      "grad_norm": 1.2362391320302166,
+      "learning_rate": 0.003,
+      "loss": 3.984,
+      "step": 21272
+    },
+    {
+      "epoch": 0.21273,
+      "grad_norm": 1.2000551547748395,
+      "learning_rate": 0.003,
+      "loss": 4.0009,
+      "step": 21273
+    },
+    {
+      "epoch": 0.21274,
+      "grad_norm": 1.4783597163078859,
+      "learning_rate": 0.003,
+      "loss": 4.0385,
+      "step": 21274
+    },
+    {
+      "epoch": 0.21275,
+      "grad_norm": 1.2112814570056818,
+      "learning_rate": 0.003,
+      "loss": 3.9656,
+      "step": 21275
+    },
+    {
+      "epoch": 0.21276,
+      "grad_norm": 1.6225107617285,
+      "learning_rate": 0.003,
+      "loss": 4.0063,
+      "step": 21276
+    },
+    {
+      "epoch": 0.21277,
+      "grad_norm": 1.0994356146259867,
+      "learning_rate": 0.003,
+      "loss": 3.976,
+      "step": 21277
+    },
+    {
+      "epoch": 0.21278,
+      "grad_norm": 1.3355542311221413,
+      "learning_rate": 0.003,
+      "loss": 4.0098,
+      "step": 21278
+    },
+    {
+      "epoch": 0.21279,
+      "grad_norm": 1.4528812021900295,
+      "learning_rate": 0.003,
+      "loss": 4.0098,
+      "step": 21279
+    },
+    {
+      "epoch": 0.2128,
+      "grad_norm": 1.1270010183870507,
+      "learning_rate": 0.003,
+      "loss": 3.9846,
+      "step": 21280
+    },
+    {
+      "epoch": 0.21281,
+      "grad_norm": 1.6222837511228707,
+      "learning_rate": 0.003,
+      "loss": 4.0075,
+      "step": 21281
+    },
+    {
+      "epoch": 0.21282,
+      "grad_norm": 1.3560559171024427,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 21282
+    },
+    {
+      "epoch": 0.21283,
+      "grad_norm": 1.206377337998466,
+      "learning_rate": 0.003,
+      "loss": 3.9823,
+      "step": 21283
+    },
+    {
+      "epoch": 0.21284,
+      "grad_norm": 1.4478849666553033,
+      "learning_rate": 0.003,
+      "loss": 4.0051,
+      "step": 21284
+    },
+    {
+      "epoch": 0.21285,
+      "grad_norm": 1.031682289816814,
+      "learning_rate": 0.003,
+      "loss": 4.0314,
+      "step": 21285
+    },
+    {
+      "epoch": 0.21286,
+      "grad_norm": 1.5492431397673714,
+      "learning_rate": 0.003,
+      "loss": 4.0003,
+      "step": 21286
+    },
+    {
+      "epoch": 0.21287,
+      "grad_norm": 0.9294348924620607,
+      "learning_rate": 0.003,
+      "loss": 3.9942,
+      "step": 21287
+    },
+    {
+      "epoch": 0.21288,
+      "grad_norm": 1.3343708198068434,
+      "learning_rate": 0.003,
+      "loss": 4.012,
+      "step": 21288
+    },
+    {
+      "epoch": 0.21289,
+      "grad_norm": 1.2126015330442157,
+      "learning_rate": 0.003,
+      "loss": 4.0063,
+      "step": 21289
+    },
+    {
+      "epoch": 0.2129,
+      "grad_norm": 1.310241506031151,
+      "learning_rate": 0.003,
+      "loss": 3.983,
+      "step": 21290
+    },
+    {
+      "epoch": 0.21291,
+      "grad_norm": 1.272249554497266,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 21291
+    },
+    {
+      "epoch": 0.21292,
+      "grad_norm": 1.226696730447073,
+      "learning_rate": 0.003,
+      "loss": 3.9995,
+      "step": 21292
+    },
+    {
+      "epoch": 0.21293,
+      "grad_norm": 1.2684456382263114,
+      "learning_rate": 0.003,
+      "loss": 4.0065,
+      "step": 21293
+    },
+    {
+      "epoch": 0.21294,
+      "grad_norm": 1.1753006797691539,
+      "learning_rate": 0.003,
+      "loss": 4.0318,
+      "step": 21294
+    },
+    {
+      "epoch": 0.21295,
+      "grad_norm": 1.3836189603806852,
+      "learning_rate": 0.003,
+      "loss": 4.0238,
+      "step": 21295
+    },
+    {
+      "epoch": 0.21296,
+      "grad_norm": 1.2423560997932626,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 21296
+    },
+    {
+      "epoch": 0.21297,
+      "grad_norm": 1.415963176126685,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 21297
+    },
+    {
+      "epoch": 0.21298,
+      "grad_norm": 1.5576290526647152,
+      "learning_rate": 0.003,
+      "loss": 4.0523,
+      "step": 21298
+    },
+    {
+      "epoch": 0.21299,
+      "grad_norm": 1.092855683681634,
+      "learning_rate": 0.003,
+      "loss": 4.0184,
+      "step": 21299
+    },
+    {
+      "epoch": 0.213,
+      "grad_norm": 1.4105239129756129,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 21300
+    },
+    {
+      "epoch": 0.21301,
+      "grad_norm": 1.1931889775606974,
+      "learning_rate": 0.003,
+      "loss": 4.0036,
+      "step": 21301
+    },
+    {
+      "epoch": 0.21302,
+      "grad_norm": 1.3281142647686974,
+      "learning_rate": 0.003,
+      "loss": 4.0003,
+      "step": 21302
+    },
+    {
+      "epoch": 0.21303,
+      "grad_norm": 1.3356632188378508,
+      "learning_rate": 0.003,
+      "loss": 4.0082,
+      "step": 21303
+    },
+    {
+      "epoch": 0.21304,
+      "grad_norm": 1.299170991171862,
+      "learning_rate": 0.003,
+      "loss": 3.9906,
+      "step": 21304
+    },
+    {
+      "epoch": 0.21305,
+      "grad_norm": 1.2414543840786276,
+      "learning_rate": 0.003,
+      "loss": 3.9942,
+      "step": 21305
+    },
+    {
+      "epoch": 0.21306,
+      "grad_norm": 1.5792973486716455,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 21306
+    },
+    {
+      "epoch": 0.21307,
+      "grad_norm": 1.0003613200836647,
+      "learning_rate": 0.003,
+      "loss": 3.986,
+      "step": 21307
+    },
+    {
+      "epoch": 0.21308,
+      "grad_norm": 1.5683257258969845,
+      "learning_rate": 0.003,
+      "loss": 4.0196,
+      "step": 21308
+    },
+    {
+      "epoch": 0.21309,
+      "grad_norm": 1.1250512755794553,
+      "learning_rate": 0.003,
+      "loss": 3.9688,
+      "step": 21309
+    },
+    {
+      "epoch": 0.2131,
+      "grad_norm": 1.2735480228773357,
+      "learning_rate": 0.003,
+      "loss": 3.986,
+      "step": 21310
+    },
+    {
+      "epoch": 0.21311,
+      "grad_norm": 1.1773729391010723,
+      "learning_rate": 0.003,
+      "loss": 4.0051,
+      "step": 21311
+    },
+    {
+      "epoch": 0.21312,
+      "grad_norm": 1.2610886885253147,
+      "learning_rate": 0.003,
+      "loss": 4.0031,
+      "step": 21312
+    },
+    {
+      "epoch": 0.21313,
+      "grad_norm": 1.1574334866357563,
+      "learning_rate": 0.003,
+      "loss": 4.0082,
+      "step": 21313
+    },
+    {
+      "epoch": 0.21314,
+      "grad_norm": 1.2808612310624075,
+      "learning_rate": 0.003,
+      "loss": 4.003,
+      "step": 21314
+    },
+    {
+      "epoch": 0.21315,
+      "grad_norm": 1.349130588459404,
+      "learning_rate": 0.003,
+      "loss": 4.0222,
+      "step": 21315
+    },
+    {
+      "epoch": 0.21316,
+      "grad_norm": 1.1423955234689311,
+      "learning_rate": 0.003,
+      "loss": 4.0218,
+      "step": 21316
+    },
+    {
+      "epoch": 0.21317,
+      "grad_norm": 1.8164151986280979,
+      "learning_rate": 0.003,
+      "loss": 4.0417,
+      "step": 21317
+    },
+    {
+      "epoch": 0.21318,
+      "grad_norm": 1.1210772084413818,
+      "learning_rate": 0.003,
+      "loss": 4.0233,
+      "step": 21318
+    },
+    {
+      "epoch": 0.21319,
+      "grad_norm": 1.6191899228858475,
+      "learning_rate": 0.003,
+      "loss": 3.9924,
+      "step": 21319
+    },
+    {
+      "epoch": 0.2132,
+      "grad_norm": 1.1237918111085823,
+      "learning_rate": 0.003,
+      "loss": 4.0289,
+      "step": 21320
+    },
+    {
+      "epoch": 0.21321,
+      "grad_norm": 1.2988708232693222,
+      "learning_rate": 0.003,
+      "loss": 4.0029,
+      "step": 21321
+    },
+    {
+      "epoch": 0.21322,
+      "grad_norm": 1.2005726508870185,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 21322
+    },
+    {
+      "epoch": 0.21323,
+      "grad_norm": 1.1443523272905045,
+      "learning_rate": 0.003,
+      "loss": 3.9964,
+      "step": 21323
+    },
+    {
+      "epoch": 0.21324,
+      "grad_norm": 1.3226837192294245,
+      "learning_rate": 0.003,
+      "loss": 4.0096,
+      "step": 21324
+    },
+    {
+      "epoch": 0.21325,
+      "grad_norm": 1.1074293784611595,
+      "learning_rate": 0.003,
+      "loss": 3.9932,
+      "step": 21325
+    },
+    {
+      "epoch": 0.21326,
+      "grad_norm": 1.5446720373953398,
+      "learning_rate": 0.003,
+      "loss": 4.034,
+      "step": 21326
+    },
+    {
+      "epoch": 0.21327,
+      "grad_norm": 1.2244182787707605,
+      "learning_rate": 0.003,
+      "loss": 3.9968,
+      "step": 21327
+    },
+    {
+      "epoch": 0.21328,
+      "grad_norm": 1.470114012074915,
+      "learning_rate": 0.003,
+      "loss": 3.9874,
+      "step": 21328
+    },
+    {
+      "epoch": 0.21329,
+      "grad_norm": 1.211871809487852,
+      "learning_rate": 0.003,
+      "loss": 4.0032,
+      "step": 21329
+    },
+    {
+      "epoch": 0.2133,
+      "grad_norm": 1.4899181351460404,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 21330
+    },
+    {
+      "epoch": 0.21331,
+      "grad_norm": 1.1453057803915716,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 21331
+    },
+    {
+      "epoch": 0.21332,
+      "grad_norm": 1.1285885084390557,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 21332
+    },
+    {
+      "epoch": 0.21333,
+      "grad_norm": 1.4747605208858212,
+      "learning_rate": 0.003,
+      "loss": 4.0126,
+      "step": 21333
+    },
+    {
+      "epoch": 0.21334,
+      "grad_norm": 1.5361029100826216,
+      "learning_rate": 0.003,
+      "loss": 4.0032,
+      "step": 21334
+    },
+    {
+      "epoch": 0.21335,
+      "grad_norm": 1.0461949717899224,
+      "learning_rate": 0.003,
+      "loss": 4.0139,
+      "step": 21335
+    },
+    {
+      "epoch": 0.21336,
+      "grad_norm": 1.4826082082744698,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 21336
+    },
+    {
+      "epoch": 0.21337,
+      "grad_norm": 1.256608497172299,
+      "learning_rate": 0.003,
+      "loss": 4.025,
+      "step": 21337
+    },
+    {
+      "epoch": 0.21338,
+      "grad_norm": 1.26022149364038,
+      "learning_rate": 0.003,
+      "loss": 4.025,
+      "step": 21338
+    },
+    {
+      "epoch": 0.21339,
+      "grad_norm": 1.5966159579768393,
+      "learning_rate": 0.003,
+      "loss": 4.0222,
+      "step": 21339
+    },
+    {
+      "epoch": 0.2134,
+      "grad_norm": 1.2374109671712106,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 21340
+    },
+    {
+      "epoch": 0.21341,
+      "grad_norm": 1.3149471938773223,
+      "learning_rate": 0.003,
+      "loss": 4.0279,
+      "step": 21341
+    },
+    {
+      "epoch": 0.21342,
+      "grad_norm": 1.3164788239225016,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 21342
+    },
+    {
+      "epoch": 0.21343,
+      "grad_norm": 1.1776331709735899,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 21343
+    },
+    {
+      "epoch": 0.21344,
+      "grad_norm": 1.3151643264593478,
+      "learning_rate": 0.003,
+      "loss": 4.0099,
+      "step": 21344
+    },
+    {
+      "epoch": 0.21345,
+      "grad_norm": 1.2954014699369114,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 21345
+    },
+    {
+      "epoch": 0.21346,
+      "grad_norm": 1.1601775387560542,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 21346
+    },
+    {
+      "epoch": 0.21347,
+      "grad_norm": 1.4393013756118482,
+      "learning_rate": 0.003,
+      "loss": 4.0301,
+      "step": 21347
+    },
+    {
+      "epoch": 0.21348,
+      "grad_norm": 1.190822005653938,
+      "learning_rate": 0.003,
+      "loss": 4.0155,
+      "step": 21348
+    },
+    {
+      "epoch": 0.21349,
+      "grad_norm": 1.4618647769557138,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 21349
+    },
+    {
+      "epoch": 0.2135,
+      "grad_norm": 1.293711295223463,
+      "learning_rate": 0.003,
+      "loss": 4.0018,
+      "step": 21350
+    },
+    {
+      "epoch": 0.21351,
+      "grad_norm": 1.25873860579439,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 21351
+    },
+    {
+      "epoch": 0.21352,
+      "grad_norm": 1.2304525206180121,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 21352
+    },
+    {
+      "epoch": 0.21353,
+      "grad_norm": 1.1840839682881577,
+      "learning_rate": 0.003,
+      "loss": 3.9998,
+      "step": 21353
+    },
+    {
+      "epoch": 0.21354,
+      "grad_norm": 1.3984978933015169,
+      "learning_rate": 0.003,
+      "loss": 4.019,
+      "step": 21354
+    },
+    {
+      "epoch": 0.21355,
+      "grad_norm": 1.2661975355284674,
+      "learning_rate": 0.003,
+      "loss": 3.9889,
+      "step": 21355
+    },
+    {
+      "epoch": 0.21356,
+      "grad_norm": 1.347473468869683,
+      "learning_rate": 0.003,
+      "loss": 4.0123,
+      "step": 21356
+    },
+    {
+      "epoch": 0.21357,
+      "grad_norm": 1.0885962108686378,
+      "learning_rate": 0.003,
+      "loss": 4.0186,
+      "step": 21357
+    },
+    {
+      "epoch": 0.21358,
+      "grad_norm": 1.1314897911001327,
+      "learning_rate": 0.003,
+      "loss": 3.9813,
+      "step": 21358
+    },
+    {
+      "epoch": 0.21359,
+      "grad_norm": 1.3005439871359972,
+      "learning_rate": 0.003,
+      "loss": 3.997,
+      "step": 21359
+    },
+    {
+      "epoch": 0.2136,
+      "grad_norm": 1.5073575783852275,
+      "learning_rate": 0.003,
+      "loss": 4.0096,
+      "step": 21360
+    },
+    {
+      "epoch": 0.21361,
+      "grad_norm": 1.0652492379261547,
+      "learning_rate": 0.003,
+      "loss": 4.0,
+      "step": 21361
+    },
+    {
+      "epoch": 0.21362,
+      "grad_norm": 1.216758340178142,
+      "learning_rate": 0.003,
+      "loss": 4.0139,
+      "step": 21362
+    },
+    {
+      "epoch": 0.21363,
+      "grad_norm": 1.1995866387632559,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 21363
+    },
+    {
+      "epoch": 0.21364,
+      "grad_norm": 1.4131807551457323,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 21364
+    },
+    {
+      "epoch": 0.21365,
+      "grad_norm": 1.5043171830474817,
+      "learning_rate": 0.003,
+      "loss": 4.0014,
+      "step": 21365
+    },
+    {
+      "epoch": 0.21366,
+      "grad_norm": 1.0624516728743951,
+      "learning_rate": 0.003,
+      "loss": 4.0093,
+      "step": 21366
+    },
+    {
+      "epoch": 0.21367,
+      "grad_norm": 1.3607057371756948,
+      "learning_rate": 0.003,
+      "loss": 4.0002,
+      "step": 21367
+    },
+    {
+      "epoch": 0.21368,
+      "grad_norm": 1.3096954634897635,
+      "learning_rate": 0.003,
+      "loss": 4.0162,
+      "step": 21368
+    },
+    {
+      "epoch": 0.21369,
+      "grad_norm": 1.2522586992594944,
+      "learning_rate": 0.003,
+      "loss": 3.9812,
+      "step": 21369
+    },
+    {
+      "epoch": 0.2137,
+      "grad_norm": 1.403120464272327,
+      "learning_rate": 0.003,
+      "loss": 4.0458,
+      "step": 21370
+    },
+    {
+      "epoch": 0.21371,
+      "grad_norm": 1.35181527667303,
+      "learning_rate": 0.003,
+      "loss": 4.0269,
+      "step": 21371
+    },
+    {
+      "epoch": 0.21372,
+      "grad_norm": 1.3008981433425613,
+      "learning_rate": 0.003,
+      "loss": 4.037,
+      "step": 21372
+    },
+    {
+      "epoch": 0.21373,
+      "grad_norm": 1.2515627769029236,
+      "learning_rate": 0.003,
+      "loss": 4.0113,
+      "step": 21373
+    },
+    {
+      "epoch": 0.21374,
+      "grad_norm": 1.2148352999355096,
+      "learning_rate": 0.003,
+      "loss": 3.9927,
+      "step": 21374
+    },
+    {
+      "epoch": 0.21375,
+      "grad_norm": 1.4139390953454516,
+      "learning_rate": 0.003,
+      "loss": 3.9902,
+      "step": 21375
+    },
+    {
+      "epoch": 0.21376,
+      "grad_norm": 1.0759238654505578,
+      "learning_rate": 0.003,
+      "loss": 3.9838,
+      "step": 21376
+    },
+    {
+      "epoch": 0.21377,
+      "grad_norm": 1.6849312862891452,
+      "learning_rate": 0.003,
+      "loss": 4.0321,
+      "step": 21377
+    },
+    {
+      "epoch": 0.21378,
+      "grad_norm": 1.1059074948266352,
+      "learning_rate": 0.003,
+      "loss": 3.9999,
+      "step": 21378
+    },
+    {
+      "epoch": 0.21379,
+      "grad_norm": 1.3182515548313258,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 21379
+    },
+    {
+      "epoch": 0.2138,
+      "grad_norm": 1.2574813585308635,
+      "learning_rate": 0.003,
+      "loss": 4.0119,
+      "step": 21380
+    },
+    {
+      "epoch": 0.21381,
+      "grad_norm": 1.2399070771067897,
+      "learning_rate": 0.003,
+      "loss": 3.9971,
+      "step": 21381
+    },
+    {
+      "epoch": 0.21382,
+      "grad_norm": 1.3641032279001806,
+      "learning_rate": 0.003,
+      "loss": 3.9784,
+      "step": 21382
+    },
+    {
+      "epoch": 0.21383,
+      "grad_norm": 1.1099829198459543,
+      "learning_rate": 0.003,
+      "loss": 4.0022,
+      "step": 21383
+    },
+    {
+      "epoch": 0.21384,
+      "grad_norm": 1.4251897190054135,
+      "learning_rate": 0.003,
+      "loss": 3.9955,
+      "step": 21384
+    },
+    {
+      "epoch": 0.21385,
+      "grad_norm": 1.158582942078021,
+      "learning_rate": 0.003,
+      "loss": 3.9935,
+      "step": 21385
+    },
+    {
+      "epoch": 0.21386,
+      "grad_norm": 1.3353797687285938,
+      "learning_rate": 0.003,
+      "loss": 4.0025,
+      "step": 21386
+    },
+    {
+      "epoch": 0.21387,
+      "grad_norm": 1.2214721877347425,
+      "learning_rate": 0.003,
+      "loss": 3.9904,
+      "step": 21387
+    },
+    {
+      "epoch": 0.21388,
+      "grad_norm": 1.4260190324506035,
+      "learning_rate": 0.003,
+      "loss": 4.0014,
+      "step": 21388
+    },
+    {
+      "epoch": 0.21389,
+      "grad_norm": 1.2079203894619288,
+      "learning_rate": 0.003,
+      "loss": 4.0215,
+      "step": 21389
+    },
+    {
+      "epoch": 0.2139,
+      "grad_norm": 1.3252664547362911,
+      "learning_rate": 0.003,
+      "loss": 3.9878,
+      "step": 21390
+    },
+    {
+      "epoch": 0.21391,
+      "grad_norm": 1.1213318980684008,
+      "learning_rate": 0.003,
+      "loss": 3.995,
+      "step": 21391
+    },
+    {
+      "epoch": 0.21392,
+      "grad_norm": 1.443050277228066,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 21392
+    },
+    {
+      "epoch": 0.21393,
+      "grad_norm": 1.0634393333504684,
+      "learning_rate": 0.003,
+      "loss": 4.0031,
+      "step": 21393
+    },
+    {
+      "epoch": 0.21394,
+      "grad_norm": 1.2988684282009924,
+      "learning_rate": 0.003,
+      "loss": 4.0209,
+      "step": 21394
+    },
+    {
+      "epoch": 0.21395,
+      "grad_norm": 1.2909380005113325,
+      "learning_rate": 0.003,
+      "loss": 4.0123,
+      "step": 21395
+    },
+    {
+      "epoch": 0.21396,
+      "grad_norm": 1.217411864513033,
+      "learning_rate": 0.003,
+      "loss": 3.9909,
+      "step": 21396
+    },
+    {
+      "epoch": 0.21397,
+      "grad_norm": 1.2409544569046358,
+      "learning_rate": 0.003,
+      "loss": 4.0134,
+      "step": 21397
+    },
+    {
+      "epoch": 0.21398,
+      "grad_norm": 1.3481818038287408,
+      "learning_rate": 0.003,
+      "loss": 4.0061,
+      "step": 21398
+    },
+    {
+      "epoch": 0.21399,
+      "grad_norm": 1.2349195631598209,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 21399
+    },
+    {
+      "epoch": 0.214,
+      "grad_norm": 1.453933386247848,
+      "learning_rate": 0.003,
+      "loss": 4.0319,
+      "step": 21400
+    },
+    {
+      "epoch": 0.21401,
+      "grad_norm": 1.2639852374422023,
+      "learning_rate": 0.003,
+      "loss": 4.0135,
+      "step": 21401
+    },
+    {
+      "epoch": 0.21402,
+      "grad_norm": 1.2683657297189679,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 21402
+    },
+    {
+      "epoch": 0.21403,
+      "grad_norm": 1.5789091020620567,
+      "learning_rate": 0.003,
+      "loss": 4.0292,
+      "step": 21403
+    },
+    {
+      "epoch": 0.21404,
+      "grad_norm": 1.1410429952755408,
+      "learning_rate": 0.003,
+      "loss": 4.0121,
+      "step": 21404
+    },
+    {
+      "epoch": 0.21405,
+      "grad_norm": 1.3913877893115831,
+      "learning_rate": 0.003,
+      "loss": 3.999,
+      "step": 21405
+    },
+    {
+      "epoch": 0.21406,
+      "grad_norm": 1.442491921078408,
+      "learning_rate": 0.003,
+      "loss": 3.9927,
+      "step": 21406
+    },
+    {
+      "epoch": 0.21407,
+      "grad_norm": 1.1217783508484191,
+      "learning_rate": 0.003,
+      "loss": 4.0215,
+      "step": 21407
+    },
+    {
+      "epoch": 0.21408,
+      "grad_norm": 1.5234491519218374,
+      "learning_rate": 0.003,
+      "loss": 3.9798,
+      "step": 21408
+    },
+    {
+      "epoch": 0.21409,
+      "grad_norm": 1.0170027424104313,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 21409
+    },
+    {
+      "epoch": 0.2141,
+      "grad_norm": 1.5807878224787788,
+      "learning_rate": 0.003,
+      "loss": 3.9995,
+      "step": 21410
+    },
+    {
+      "epoch": 0.21411,
+      "grad_norm": 1.1118604708356918,
+      "learning_rate": 0.003,
+      "loss": 4.0088,
+      "step": 21411
+    },
+    {
+      "epoch": 0.21412,
+      "grad_norm": 1.377643628162498,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 21412
+    },
+    {
+      "epoch": 0.21413,
+      "grad_norm": 1.2533109098303772,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 21413
+    },
+    {
+      "epoch": 0.21414,
+      "grad_norm": 1.395891997233696,
+      "learning_rate": 0.003,
+      "loss": 3.9749,
+      "step": 21414
+    },
+    {
+      "epoch": 0.21415,
+      "grad_norm": 1.2769178610219172,
+      "learning_rate": 0.003,
+      "loss": 3.9918,
+      "step": 21415
+    },
+    {
+      "epoch": 0.21416,
+      "grad_norm": 1.404707776522886,
+      "learning_rate": 0.003,
+      "loss": 3.9945,
+      "step": 21416
+    },
+    {
+      "epoch": 0.21417,
+      "grad_norm": 1.2219210917427004,
+      "learning_rate": 0.003,
+      "loss": 4.0028,
+      "step": 21417
+    },
+    {
+      "epoch": 0.21418,
+      "grad_norm": 1.3301764256413287,
+      "learning_rate": 0.003,
+      "loss": 4.021,
+      "step": 21418
+    },
+    {
+      "epoch": 0.21419,
+      "grad_norm": 1.255608479546366,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 21419
+    },
+    {
+      "epoch": 0.2142,
+      "grad_norm": 1.0150683005258947,
+      "learning_rate": 0.003,
+      "loss": 3.9975,
+      "step": 21420
+    },
+    {
+      "epoch": 0.21421,
+      "grad_norm": 1.4938975794565952,
+      "learning_rate": 0.003,
+      "loss": 4.0204,
+      "step": 21421
+    },
+    {
+      "epoch": 0.21422,
+      "grad_norm": 1.0870969814531026,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 21422
+    },
+    {
+      "epoch": 0.21423,
+      "grad_norm": 1.6389893248907068,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 21423
+    },
+    {
+      "epoch": 0.21424,
+      "grad_norm": 1.0803050974565995,
+      "learning_rate": 0.003,
+      "loss": 4.0049,
+      "step": 21424
+    },
+    {
+      "epoch": 0.21425,
+      "grad_norm": 1.3629649486136104,
+      "learning_rate": 0.003,
+      "loss": 4.0378,
+      "step": 21425
+    },
+    {
+      "epoch": 0.21426,
+      "grad_norm": 1.27790761440876,
+      "learning_rate": 0.003,
+      "loss": 4.035,
+      "step": 21426
+    },
+    {
+      "epoch": 0.21427,
+      "grad_norm": 1.3471173057991788,
+      "learning_rate": 0.003,
+      "loss": 4.0266,
+      "step": 21427
+    },
+    {
+      "epoch": 0.21428,
+      "grad_norm": 1.1650916012836452,
+      "learning_rate": 0.003,
+      "loss": 4.0081,
+      "step": 21428
+    },
+    {
+      "epoch": 0.21429,
+      "grad_norm": 1.3525680889018905,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 21429
+    },
+    {
+      "epoch": 0.2143,
+      "grad_norm": 1.3578685013555272,
+      "learning_rate": 0.003,
+      "loss": 4.0115,
+      "step": 21430
+    },
+    {
+      "epoch": 0.21431,
+      "grad_norm": 1.5313463457738652,
+      "learning_rate": 0.003,
+      "loss": 4.0389,
+      "step": 21431
+    },
+    {
+      "epoch": 0.21432,
+      "grad_norm": 1.1221053706443744,
+      "learning_rate": 0.003,
+      "loss": 4.0284,
+      "step": 21432
+    },
+    {
+      "epoch": 0.21433,
+      "grad_norm": 1.262878111973225,
+      "learning_rate": 0.003,
+      "loss": 4.003,
+      "step": 21433
+    },
+    {
+      "epoch": 0.21434,
+      "grad_norm": 1.112496416079058,
+      "learning_rate": 0.003,
+      "loss": 3.9856,
+      "step": 21434
+    },
+    {
+      "epoch": 0.21435,
+      "grad_norm": 1.3488067706818132,
+      "learning_rate": 0.003,
+      "loss": 4.0058,
+      "step": 21435
+    },
+    {
+      "epoch": 0.21436,
+      "grad_norm": 1.2722101358484585,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 21436
+    },
+    {
+      "epoch": 0.21437,
+      "grad_norm": 1.3003697516118862,
+      "learning_rate": 0.003,
+      "loss": 3.9965,
+      "step": 21437
+    },
+    {
+      "epoch": 0.21438,
+      "grad_norm": 1.3446776104487859,
+      "learning_rate": 0.003,
+      "loss": 3.9924,
+      "step": 21438
+    },
+    {
+      "epoch": 0.21439,
+      "grad_norm": 1.13573298412396,
+      "learning_rate": 0.003,
+      "loss": 4.0222,
+      "step": 21439
+    },
+    {
+      "epoch": 0.2144,
+      "grad_norm": 1.2671007531062772,
+      "learning_rate": 0.003,
+      "loss": 3.9825,
+      "step": 21440
+    },
+    {
+      "epoch": 0.21441,
+      "grad_norm": 1.1054463986022816,
+      "learning_rate": 0.003,
+      "loss": 3.9846,
+      "step": 21441
+    },
+    {
+      "epoch": 0.21442,
+      "grad_norm": 1.543097375970362,
+      "learning_rate": 0.003,
+      "loss": 4.0348,
+      "step": 21442
+    },
+    {
+      "epoch": 0.21443,
+      "grad_norm": 1.07568513756536,
+      "learning_rate": 0.003,
+      "loss": 4.0247,
+      "step": 21443
+    },
+    {
+      "epoch": 0.21444,
+      "grad_norm": 1.5448034457264181,
+      "learning_rate": 0.003,
+      "loss": 4.0532,
+      "step": 21444
+    },
+    {
+      "epoch": 0.21445,
+      "grad_norm": 1.118102941141119,
+      "learning_rate": 0.003,
+      "loss": 4.035,
+      "step": 21445
+    },
+    {
+      "epoch": 0.21446,
+      "grad_norm": 1.3245607697334398,
+      "learning_rate": 0.003,
+      "loss": 4.0079,
+      "step": 21446
+    },
+    {
+      "epoch": 0.21447,
+      "grad_norm": 1.1060064073278655,
+      "learning_rate": 0.003,
+      "loss": 3.9851,
+      "step": 21447
+    },
+    {
+      "epoch": 0.21448,
+      "grad_norm": 1.3605482738300085,
+      "learning_rate": 0.003,
+      "loss": 4.0171,
+      "step": 21448
+    },
+    {
+      "epoch": 0.21449,
+      "grad_norm": 1.2014023092926354,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 21449
+    },
+    {
+      "epoch": 0.2145,
+      "grad_norm": 1.5949864538879863,
+      "learning_rate": 0.003,
+      "loss": 4.0098,
+      "step": 21450
+    },
+    {
+      "epoch": 0.21451,
+      "grad_norm": 1.0462933231610054,
+      "learning_rate": 0.003,
+      "loss": 4.0191,
+      "step": 21451
+    },
+    {
+      "epoch": 0.21452,
+      "grad_norm": 1.0730870629231792,
+      "learning_rate": 0.003,
+      "loss": 4.0004,
+      "step": 21452
+    },
+    {
+      "epoch": 0.21453,
+      "grad_norm": 1.3531634152425605,
+      "learning_rate": 0.003,
+      "loss": 4.0012,
+      "step": 21453
+    },
+    {
+      "epoch": 0.21454,
+      "grad_norm": 1.2492241430343944,
+      "learning_rate": 0.003,
+      "loss": 3.9898,
+      "step": 21454
+    },
+    {
+      "epoch": 0.21455,
+      "grad_norm": 1.2562263465305519,
+      "learning_rate": 0.003,
+      "loss": 3.9899,
+      "step": 21455
+    },
+    {
+      "epoch": 0.21456,
+      "grad_norm": 1.3416900438392367,
+      "learning_rate": 0.003,
+      "loss": 4.0334,
+      "step": 21456
+    },
+    {
+      "epoch": 0.21457,
+      "grad_norm": 1.2882097788811127,
+      "learning_rate": 0.003,
+      "loss": 4.0089,
+      "step": 21457
+    },
+    {
+      "epoch": 0.21458,
+      "grad_norm": 1.6754492054948766,
+      "learning_rate": 0.003,
+      "loss": 4.0048,
+      "step": 21458
+    },
+    {
+      "epoch": 0.21459,
+      "grad_norm": 1.2750228404207182,
+      "learning_rate": 0.003,
+      "loss": 4.0311,
+      "step": 21459
+    },
+    {
+      "epoch": 0.2146,
+      "grad_norm": 1.2312690083780862,
+      "learning_rate": 0.003,
+      "loss": 4.0209,
+      "step": 21460
+    },
+    {
+      "epoch": 0.21461,
+      "grad_norm": 1.415119412197984,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 21461
+    },
+    {
+      "epoch": 0.21462,
+      "grad_norm": 1.042354813227625,
+      "learning_rate": 0.003,
+      "loss": 4.0285,
+      "step": 21462
+    },
+    {
+      "epoch": 0.21463,
+      "grad_norm": 1.4205642690972138,
+      "learning_rate": 0.003,
+      "loss": 4.0351,
+      "step": 21463
+    },
+    {
+      "epoch": 0.21464,
+      "grad_norm": 1.2038177455095624,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 21464
+    },
+    {
+      "epoch": 0.21465,
+      "grad_norm": 1.5209852052323058,
+      "learning_rate": 0.003,
+      "loss": 4.0154,
+      "step": 21465
+    },
+    {
+      "epoch": 0.21466,
+      "grad_norm": 1.1719406615878747,
+      "learning_rate": 0.003,
+      "loss": 4.0251,
+      "step": 21466
+    },
+    {
+      "epoch": 0.21467,
+      "grad_norm": 1.2860622089796727,
+      "learning_rate": 0.003,
+      "loss": 4.0292,
+      "step": 21467
+    },
+    {
+      "epoch": 0.21468,
+      "grad_norm": 1.2243853816287118,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 21468
+    },
+    {
+      "epoch": 0.21469,
+      "grad_norm": 1.1212517738190864,
+      "learning_rate": 0.003,
+      "loss": 4.0381,
+      "step": 21469
+    },
+    {
+      "epoch": 0.2147,
+      "grad_norm": 1.3962602151682324,
+      "learning_rate": 0.003,
+      "loss": 4.0372,
+      "step": 21470
+    },
+    {
+      "epoch": 0.21471,
+      "grad_norm": 1.1418886482327282,
+      "learning_rate": 0.003,
+      "loss": 3.9954,
+      "step": 21471
+    },
+    {
+      "epoch": 0.21472,
+      "grad_norm": 1.2181218711146389,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 21472
+    },
+    {
+      "epoch": 0.21473,
+      "grad_norm": 1.1252515483849532,
+      "learning_rate": 0.003,
+      "loss": 3.9996,
+      "step": 21473
+    },
+    {
+      "epoch": 0.21474,
+      "grad_norm": 1.458273668384171,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 21474
+    },
+    {
+      "epoch": 0.21475,
+      "grad_norm": 1.2525130639963844,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 21475
+    },
+    {
+      "epoch": 0.21476,
+      "grad_norm": 1.2556582836326315,
+      "learning_rate": 0.003,
+      "loss": 4.0333,
+      "step": 21476
+    },
+    {
+      "epoch": 0.21477,
+      "grad_norm": 1.1993392452488705,
+      "learning_rate": 0.003,
+      "loss": 4.0317,
+      "step": 21477
+    },
+    {
+      "epoch": 0.21478,
+      "grad_norm": 1.3649808422080891,
+      "learning_rate": 0.003,
+      "loss": 3.9771,
+      "step": 21478
+    },
+    {
+      "epoch": 0.21479,
+      "grad_norm": 1.4171615759163327,
+      "learning_rate": 0.003,
+      "loss": 3.9926,
+      "step": 21479
+    },
+    {
+      "epoch": 0.2148,
+      "grad_norm": 1.1018595505645379,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 21480
+    },
+    {
+      "epoch": 0.21481,
+      "grad_norm": 1.5358015328014705,
+      "learning_rate": 0.003,
+      "loss": 3.9918,
+      "step": 21481
+    },
+    {
+      "epoch": 0.21482,
+      "grad_norm": 1.1275084769898691,
+      "learning_rate": 0.003,
+      "loss": 3.9887,
+      "step": 21482
+    },
+    {
+      "epoch": 0.21483,
+      "grad_norm": 1.4868032279174914,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 21483
+    },
+    {
+      "epoch": 0.21484,
+      "grad_norm": 1.212412362015427,
+      "learning_rate": 0.003,
+      "loss": 4.0207,
+      "step": 21484
+    },
+    {
+      "epoch": 0.21485,
+      "grad_norm": 1.3202183971504091,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 21485
+    },
+    {
+      "epoch": 0.21486,
+      "grad_norm": 1.3196587805978937,
+      "learning_rate": 0.003,
+      "loss": 4.0375,
+      "step": 21486
+    },
+    {
+      "epoch": 0.21487,
+      "grad_norm": 1.248637036083901,
+      "learning_rate": 0.003,
+      "loss": 4.0042,
+      "step": 21487
+    },
+    {
+      "epoch": 0.21488,
+      "grad_norm": 1.3820373428235133,
+      "learning_rate": 0.003,
+      "loss": 4.0481,
+      "step": 21488
+    },
+    {
+      "epoch": 0.21489,
+      "grad_norm": 1.307355208344091,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 21489
+    },
+    {
+      "epoch": 0.2149,
+      "grad_norm": 1.3202726915499903,
+      "learning_rate": 0.003,
+      "loss": 4.0111,
+      "step": 21490
+    },
+    {
+      "epoch": 0.21491,
+      "grad_norm": 1.287520544838405,
+      "learning_rate": 0.003,
+      "loss": 4.0477,
+      "step": 21491
+    },
+    {
+      "epoch": 0.21492,
+      "grad_norm": 1.49770344778566,
+      "learning_rate": 0.003,
+      "loss": 4.0078,
+      "step": 21492
+    },
+    {
+      "epoch": 0.21493,
+      "grad_norm": 1.077593416463218,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 21493
+    },
+    {
+      "epoch": 0.21494,
+      "grad_norm": 1.4410608889447494,
+      "learning_rate": 0.003,
+      "loss": 4.0493,
+      "step": 21494
+    },
+    {
+      "epoch": 0.21495,
+      "grad_norm": 1.192103509332826,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 21495
+    },
+    {
+      "epoch": 0.21496,
+      "grad_norm": 1.21422316517921,
+      "learning_rate": 0.003,
+      "loss": 3.9912,
+      "step": 21496
+    },
+    {
+      "epoch": 0.21497,
+      "grad_norm": 1.4007812486101756,
+      "learning_rate": 0.003,
+      "loss": 4.0128,
+      "step": 21497
+    },
+    {
+      "epoch": 0.21498,
+      "grad_norm": 1.2377016010563344,
+      "learning_rate": 0.003,
+      "loss": 4.0272,
+      "step": 21498
+    },
+    {
+      "epoch": 0.21499,
+      "grad_norm": 1.2812928825175924,
+      "learning_rate": 0.003,
+      "loss": 3.9921,
+      "step": 21499
+    },
+    {
+      "epoch": 0.215,
+      "grad_norm": 1.3011255692312629,
+      "learning_rate": 0.003,
+      "loss": 3.9887,
+      "step": 21500
+    },
+    {
+      "epoch": 0.21501,
+      "grad_norm": 1.3848494007403251,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 21501
+    },
+    {
+      "epoch": 0.21502,
+      "grad_norm": 1.1629301338044207,
+      "learning_rate": 0.003,
+      "loss": 3.9917,
+      "step": 21502
+    },
+    {
+      "epoch": 0.21503,
+      "grad_norm": 1.1111612135670403,
+      "learning_rate": 0.003,
+      "loss": 3.9943,
+      "step": 21503
+    },
+    {
+      "epoch": 0.21504,
+      "grad_norm": 1.3145510880194946,
+      "learning_rate": 0.003,
+      "loss": 3.9964,
+      "step": 21504
+    },
+    {
+      "epoch": 0.21505,
+      "grad_norm": 1.2718876175438374,
+      "learning_rate": 0.003,
+      "loss": 3.9936,
+      "step": 21505
+    },
+    {
+      "epoch": 0.21506,
+      "grad_norm": 1.3691633475516427,
+      "learning_rate": 0.003,
+      "loss": 3.9751,
+      "step": 21506
+    },
+    {
+      "epoch": 0.21507,
+      "grad_norm": 1.300701360877103,
+      "learning_rate": 0.003,
+      "loss": 4.0048,
+      "step": 21507
+    },
+    {
+      "epoch": 0.21508,
+      "grad_norm": 1.2663418016031303,
+      "learning_rate": 0.003,
+      "loss": 4.0036,
+      "step": 21508
+    },
+    {
+      "epoch": 0.21509,
+      "grad_norm": 1.3091183945038203,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 21509
+    },
+    {
+      "epoch": 0.2151,
+      "grad_norm": 1.238548275613207,
+      "learning_rate": 0.003,
+      "loss": 3.9942,
+      "step": 21510
+    },
+    {
+      "epoch": 0.21511,
+      "grad_norm": 1.3447995961323977,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 21511
+    },
+    {
+      "epoch": 0.21512,
+      "grad_norm": 1.2700646163007345,
+      "learning_rate": 0.003,
+      "loss": 3.9612,
+      "step": 21512
+    },
+    {
+      "epoch": 0.21513,
+      "grad_norm": 1.192192636795088,
+      "learning_rate": 0.003,
+      "loss": 4.0225,
+      "step": 21513
+    },
+    {
+      "epoch": 0.21514,
+      "grad_norm": 1.566529771381087,
+      "learning_rate": 0.003,
+      "loss": 3.9876,
+      "step": 21514
+    },
+    {
+      "epoch": 0.21515,
+      "grad_norm": 1.2346913291823813,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 21515
+    },
+    {
+      "epoch": 0.21516,
+      "grad_norm": 1.567897815090988,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 21516
+    },
+    {
+      "epoch": 0.21517,
+      "grad_norm": 1.0617164953336868,
+      "learning_rate": 0.003,
+      "loss": 4.0052,
+      "step": 21517
+    },
+    {
+      "epoch": 0.21518,
+      "grad_norm": 1.2683577720991326,
+      "learning_rate": 0.003,
+      "loss": 3.9954,
+      "step": 21518
+    },
+    {
+      "epoch": 0.21519,
+      "grad_norm": 1.3018919985938366,
+      "learning_rate": 0.003,
+      "loss": 4.0113,
+      "step": 21519
+    },
+    {
+      "epoch": 0.2152,
+      "grad_norm": 1.3055454741378032,
+      "learning_rate": 0.003,
+      "loss": 3.9985,
+      "step": 21520
+    },
+    {
+      "epoch": 0.21521,
+      "grad_norm": 1.362220291414442,
+      "learning_rate": 0.003,
+      "loss": 4.0099,
+      "step": 21521
+    },
+    {
+      "epoch": 0.21522,
+      "grad_norm": 1.162003439266571,
+      "learning_rate": 0.003,
+      "loss": 4.0006,
+      "step": 21522
+    },
+    {
+      "epoch": 0.21523,
+      "grad_norm": 1.3806136621438434,
+      "learning_rate": 0.003,
+      "loss": 4.0001,
+      "step": 21523
+    },
+    {
+      "epoch": 0.21524,
+      "grad_norm": 1.1756719701949396,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 21524
+    },
+    {
+      "epoch": 0.21525,
+      "grad_norm": 1.6002550402263216,
+      "learning_rate": 0.003,
+      "loss": 4.0247,
+      "step": 21525
+    },
+    {
+      "epoch": 0.21526,
+      "grad_norm": 0.9510201251373374,
+      "learning_rate": 0.003,
+      "loss": 4.0004,
+      "step": 21526
+    },
+    {
+      "epoch": 0.21527,
+      "grad_norm": 1.4230316184313634,
+      "learning_rate": 0.003,
+      "loss": 4.024,
+      "step": 21527
+    },
+    {
+      "epoch": 0.21528,
+      "grad_norm": 1.2252281393771665,
+      "learning_rate": 0.003,
+      "loss": 3.9924,
+      "step": 21528
+    },
+    {
+      "epoch": 0.21529,
+      "grad_norm": 1.5926361397217603,
+      "learning_rate": 0.003,
+      "loss": 4.0187,
+      "step": 21529
+    },
+    {
+      "epoch": 0.2153,
+      "grad_norm": 1.3812537596578873,
+      "learning_rate": 0.003,
+      "loss": 4.0239,
+      "step": 21530
+    },
+    {
+      "epoch": 0.21531,
+      "grad_norm": 1.1737513610992676,
+      "learning_rate": 0.003,
+      "loss": 3.977,
+      "step": 21531
+    },
+    {
+      "epoch": 0.21532,
+      "grad_norm": 1.2391919663735265,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 21532
+    },
+    {
+      "epoch": 0.21533,
+      "grad_norm": 1.3387340929685454,
+      "learning_rate": 0.003,
+      "loss": 3.9829,
+      "step": 21533
+    },
+    {
+      "epoch": 0.21534,
+      "grad_norm": 1.4085696577884634,
+      "learning_rate": 0.003,
+      "loss": 3.9931,
+      "step": 21534
+    },
+    {
+      "epoch": 0.21535,
+      "grad_norm": 1.287757664032639,
+      "learning_rate": 0.003,
+      "loss": 4.0365,
+      "step": 21535
+    },
+    {
+      "epoch": 0.21536,
+      "grad_norm": 1.2367610458982665,
+      "learning_rate": 0.003,
+      "loss": 4.0298,
+      "step": 21536
+    },
+    {
+      "epoch": 0.21537,
+      "grad_norm": 1.1791147699146494,
+      "learning_rate": 0.003,
+      "loss": 3.9961,
+      "step": 21537
+    },
+    {
+      "epoch": 0.21538,
+      "grad_norm": 1.3103459315528823,
+      "learning_rate": 0.003,
+      "loss": 4.0112,
+      "step": 21538
+    },
+    {
+      "epoch": 0.21539,
+      "grad_norm": 1.1040274078506163,
+      "learning_rate": 0.003,
+      "loss": 4.0111,
+      "step": 21539
+    },
+    {
+      "epoch": 0.2154,
+      "grad_norm": 1.4766377659183196,
+      "learning_rate": 0.003,
+      "loss": 4.0074,
+      "step": 21540
+    },
+    {
+      "epoch": 0.21541,
+      "grad_norm": 1.1156256372156026,
+      "learning_rate": 0.003,
+      "loss": 4.0053,
+      "step": 21541
+    },
+    {
+      "epoch": 0.21542,
+      "grad_norm": 1.4069418166838936,
+      "learning_rate": 0.003,
+      "loss": 3.9909,
+      "step": 21542
+    },
+    {
+      "epoch": 0.21543,
+      "grad_norm": 1.1247318380739004,
+      "learning_rate": 0.003,
+      "loss": 3.9966,
+      "step": 21543
+    },
+    {
+      "epoch": 0.21544,
+      "grad_norm": 1.355167105477023,
+      "learning_rate": 0.003,
+      "loss": 4.0049,
+      "step": 21544
+    },
+    {
+      "epoch": 0.21545,
+      "grad_norm": 1.3337309122264525,
+      "learning_rate": 0.003,
+      "loss": 4.0132,
+      "step": 21545
+    },
+    {
+      "epoch": 0.21546,
+      "grad_norm": 1.0436121273661982,
+      "learning_rate": 0.003,
+      "loss": 3.9909,
+      "step": 21546
+    },
+    {
+      "epoch": 0.21547,
+      "grad_norm": 1.7015641085513726,
+      "learning_rate": 0.003,
+      "loss": 4.0172,
+      "step": 21547
+    },
+    {
+      "epoch": 0.21548,
+      "grad_norm": 1.175123400015565,
+      "learning_rate": 0.003,
+      "loss": 4.0181,
+      "step": 21548
+    },
+    {
+      "epoch": 0.21549,
+      "grad_norm": 1.3312914687925124,
+      "learning_rate": 0.003,
+      "loss": 4.0135,
+      "step": 21549
+    },
+    {
+      "epoch": 0.2155,
+      "grad_norm": 1.1516290689968334,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 21550
+    },
+    {
+      "epoch": 0.21551,
+      "grad_norm": 1.28333153594318,
+      "learning_rate": 0.003,
+      "loss": 3.9993,
+      "step": 21551
+    },
+    {
+      "epoch": 0.21552,
+      "grad_norm": 1.3364300882430293,
+      "learning_rate": 0.003,
+      "loss": 3.9922,
+      "step": 21552
+    },
+    {
+      "epoch": 0.21553,
+      "grad_norm": 1.4340714484331327,
+      "learning_rate": 0.003,
+      "loss": 3.9969,
+      "step": 21553
+    },
+    {
+      "epoch": 0.21554,
+      "grad_norm": 1.3014945862086704,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 21554
+    },
+    {
+      "epoch": 0.21555,
+      "grad_norm": 1.3793039844586237,
+      "learning_rate": 0.003,
+      "loss": 4.0105,
+      "step": 21555
+    },
+    {
+      "epoch": 0.21556,
+      "grad_norm": 1.292337084154698,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 21556
+    },
+    {
+      "epoch": 0.21557,
+      "grad_norm": 1.3012859842430289,
+      "learning_rate": 0.003,
+      "loss": 3.9997,
+      "step": 21557
+    },
+    {
+      "epoch": 0.21558,
+      "grad_norm": 1.4466265968979954,
+      "learning_rate": 0.003,
+      "loss": 4.0,
+      "step": 21558
+    },
+    {
+      "epoch": 0.21559,
+      "grad_norm": 1.201840963312194,
+      "learning_rate": 0.003,
+      "loss": 4.0352,
+      "step": 21559
+    },
+    {
+      "epoch": 0.2156,
+      "grad_norm": 1.1252968897872588,
+      "learning_rate": 0.003,
+      "loss": 4.0016,
+      "step": 21560
+    },
+    {
+      "epoch": 0.21561,
+      "grad_norm": 1.3188106793046008,
+      "learning_rate": 0.003,
+      "loss": 4.0381,
+      "step": 21561
+    },
+    {
+      "epoch": 0.21562,
+      "grad_norm": 1.4399410091928935,
+      "learning_rate": 0.003,
+      "loss": 4.0322,
+      "step": 21562
+    },
+    {
+      "epoch": 0.21563,
+      "grad_norm": 1.3050535870583118,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 21563
+    },
+    {
+      "epoch": 0.21564,
+      "grad_norm": 1.2325796186719207,
+      "learning_rate": 0.003,
+      "loss": 3.9804,
+      "step": 21564
+    },
+    {
+      "epoch": 0.21565,
+      "grad_norm": 1.1871720632493905,
+      "learning_rate": 0.003,
+      "loss": 4.0387,
+      "step": 21565
+    },
+    {
+      "epoch": 0.21566,
+      "grad_norm": 1.2980626768718073,
+      "learning_rate": 0.003,
+      "loss": 4.0207,
+      "step": 21566
+    },
+    {
+      "epoch": 0.21567,
+      "grad_norm": 1.3120457358261468,
+      "learning_rate": 0.003,
+      "loss": 4.0053,
+      "step": 21567
+    },
+    {
+      "epoch": 0.21568,
+      "grad_norm": 1.2460441177312536,
+      "learning_rate": 0.003,
+      "loss": 4.0239,
+      "step": 21568
+    },
+    {
+      "epoch": 0.21569,
+      "grad_norm": 1.2384229219806921,
+      "learning_rate": 0.003,
+      "loss": 3.9998,
+      "step": 21569
+    },
+    {
+      "epoch": 0.2157,
+      "grad_norm": 1.269190754788751,
+      "learning_rate": 0.003,
+      "loss": 4.0024,
+      "step": 21570
+    },
+    {
+      "epoch": 0.21571,
+      "grad_norm": 1.07270538026013,
+      "learning_rate": 0.003,
+      "loss": 3.9878,
+      "step": 21571
+    },
+    {
+      "epoch": 0.21572,
+      "grad_norm": 1.494984254269472,
+      "learning_rate": 0.003,
+      "loss": 4.0264,
+      "step": 21572
+    },
+    {
+      "epoch": 0.21573,
+      "grad_norm": 1.5219928963083493,
+      "learning_rate": 0.003,
+      "loss": 3.9997,
+      "step": 21573
+    },
+    {
+      "epoch": 0.21574,
+      "grad_norm": 1.2984546593376043,
+      "learning_rate": 0.003,
+      "loss": 3.9999,
+      "step": 21574
+    },
+    {
+      "epoch": 0.21575,
+      "grad_norm": 1.35461997606962,
+      "learning_rate": 0.003,
+      "loss": 4.0188,
+      "step": 21575
+    },
+    {
+      "epoch": 0.21576,
+      "grad_norm": 1.3005928166211627,
+      "learning_rate": 0.003,
+      "loss": 4.0021,
+      "step": 21576
+    },
+    {
+      "epoch": 0.21577,
+      "grad_norm": 1.3210686653827426,
+      "learning_rate": 0.003,
+      "loss": 4.0191,
+      "step": 21577
+    },
+    {
+      "epoch": 0.21578,
+      "grad_norm": 1.2110012137728905,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 21578
+    },
+    {
+      "epoch": 0.21579,
+      "grad_norm": 1.4269860763663738,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 21579
+    },
+    {
+      "epoch": 0.2158,
+      "grad_norm": 1.1662932840226656,
+      "learning_rate": 0.003,
+      "loss": 3.9741,
+      "step": 21580
+    },
+    {
+      "epoch": 0.21581,
+      "grad_norm": 1.3856283334182,
+      "learning_rate": 0.003,
+      "loss": 4.0043,
+      "step": 21581
+    },
+    {
+      "epoch": 0.21582,
+      "grad_norm": 1.3301065045099503,
+      "learning_rate": 0.003,
+      "loss": 4.0014,
+      "step": 21582
+    },
+    {
+      "epoch": 0.21583,
+      "grad_norm": 1.1895275616563454,
+      "learning_rate": 0.003,
+      "loss": 3.9997,
+      "step": 21583
+    },
+    {
+      "epoch": 0.21584,
+      "grad_norm": 1.3457124049635305,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 21584
+    },
+    {
+      "epoch": 0.21585,
+      "grad_norm": 1.1499220470680713,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 21585
+    },
+    {
+      "epoch": 0.21586,
+      "grad_norm": 1.3402306440082998,
+      "learning_rate": 0.003,
+      "loss": 3.9833,
+      "step": 21586
+    },
+    {
+      "epoch": 0.21587,
+      "grad_norm": 1.0066198572824352,
+      "learning_rate": 0.003,
+      "loss": 4.0355,
+      "step": 21587
+    },
+    {
+      "epoch": 0.21588,
+      "grad_norm": 1.5583470280103575,
+      "learning_rate": 0.003,
+      "loss": 4.0017,
+      "step": 21588
+    },
+    {
+      "epoch": 0.21589,
+      "grad_norm": 1.2200907213305383,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 21589
+    },
+    {
+      "epoch": 0.2159,
+      "grad_norm": 1.252556496702691,
+      "learning_rate": 0.003,
+      "loss": 3.9907,
+      "step": 21590
+    },
+    {
+      "epoch": 0.21591,
+      "grad_norm": 1.2638632006359072,
+      "learning_rate": 0.003,
+      "loss": 3.9949,
+      "step": 21591
+    },
+    {
+      "epoch": 0.21592,
+      "grad_norm": 1.3350262491666196,
+      "learning_rate": 0.003,
+      "loss": 4.0028,
+      "step": 21592
+    },
+    {
+      "epoch": 0.21593,
+      "grad_norm": 1.0951232387339365,
+      "learning_rate": 0.003,
+      "loss": 4.0233,
+      "step": 21593
+    },
+    {
+      "epoch": 0.21594,
+      "grad_norm": 1.2333193009867252,
+      "learning_rate": 0.003,
+      "loss": 3.9789,
+      "step": 21594
+    },
+    {
+      "epoch": 0.21595,
+      "grad_norm": 1.2664374852775644,
+      "learning_rate": 0.003,
+      "loss": 4.0104,
+      "step": 21595
+    },
+    {
+      "epoch": 0.21596,
+      "grad_norm": 1.3239022084911074,
+      "learning_rate": 0.003,
+      "loss": 3.9956,
+      "step": 21596
+    },
+    {
+      "epoch": 0.21597,
+      "grad_norm": 1.416198013508405,
+      "learning_rate": 0.003,
+      "loss": 3.9875,
+      "step": 21597
+    },
+    {
+      "epoch": 0.21598,
+      "grad_norm": 1.2537323869749861,
+      "learning_rate": 0.003,
+      "loss": 4.0292,
+      "step": 21598
+    },
+    {
+      "epoch": 0.21599,
+      "grad_norm": 1.391116233453997,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 21599
+    },
+    {
+      "epoch": 0.216,
+      "grad_norm": 1.3079907275182432,
+      "learning_rate": 0.003,
+      "loss": 4.0266,
+      "step": 21600
+    },
+    {
+      "epoch": 0.21601,
+      "grad_norm": 1.0873175064541083,
+      "learning_rate": 0.003,
+      "loss": 3.9913,
+      "step": 21601
+    },
+    {
+      "epoch": 0.21602,
+      "grad_norm": 1.5935660122574395,
+      "learning_rate": 0.003,
+      "loss": 3.9635,
+      "step": 21602
+    },
+    {
+      "epoch": 0.21603,
+      "grad_norm": 1.0121143826910628,
+      "learning_rate": 0.003,
+      "loss": 3.9993,
+      "step": 21603
+    },
+    {
+      "epoch": 0.21604,
+      "grad_norm": 1.7148121435561192,
+      "learning_rate": 0.003,
+      "loss": 3.9976,
+      "step": 21604
+    },
+    {
+      "epoch": 0.21605,
+      "grad_norm": 0.9726324244427823,
+      "learning_rate": 0.003,
+      "loss": 3.987,
+      "step": 21605
+    },
+    {
+      "epoch": 0.21606,
+      "grad_norm": 1.4420878431916702,
+      "learning_rate": 0.003,
+      "loss": 4.0194,
+      "step": 21606
+    },
+    {
+      "epoch": 0.21607,
+      "grad_norm": 1.3060035380141735,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 21607
+    },
+    {
+      "epoch": 0.21608,
+      "grad_norm": 1.1754846501447693,
+      "learning_rate": 0.003,
+      "loss": 4.0129,
+      "step": 21608
+    },
+    {
+      "epoch": 0.21609,
+      "grad_norm": 1.3342871543174484,
+      "learning_rate": 0.003,
+      "loss": 4.0072,
+      "step": 21609
+    },
+    {
+      "epoch": 0.2161,
+      "grad_norm": 1.374608092298604,
+      "learning_rate": 0.003,
+      "loss": 4.0012,
+      "step": 21610
+    },
+    {
+      "epoch": 0.21611,
+      "grad_norm": 1.20039605615043,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 21611
+    },
+    {
+      "epoch": 0.21612,
+      "grad_norm": 1.3568860773316367,
+      "learning_rate": 0.003,
+      "loss": 3.9998,
+      "step": 21612
+    },
+    {
+      "epoch": 0.21613,
+      "grad_norm": 1.2733747858679545,
+      "learning_rate": 0.003,
+      "loss": 3.9932,
+      "step": 21613
+    },
+    {
+      "epoch": 0.21614,
+      "grad_norm": 1.2305176558244306,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 21614
+    },
+    {
+      "epoch": 0.21615,
+      "grad_norm": 1.4160324108799682,
+      "learning_rate": 0.003,
+      "loss": 4.0173,
+      "step": 21615
+    },
+    {
+      "epoch": 0.21616,
+      "grad_norm": 1.1310488820607207,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 21616
+    },
+    {
+      "epoch": 0.21617,
+      "grad_norm": 1.8259809849359045,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 21617
+    },
+    {
+      "epoch": 0.21618,
+      "grad_norm": 1.051851488393845,
+      "learning_rate": 0.003,
+      "loss": 4.0333,
+      "step": 21618
+    },
+    {
+      "epoch": 0.21619,
+      "grad_norm": 1.4688683843210053,
+      "learning_rate": 0.003,
+      "loss": 4.0304,
+      "step": 21619
+    },
+    {
+      "epoch": 0.2162,
+      "grad_norm": 1.0974349810657484,
+      "learning_rate": 0.003,
+      "loss": 4.0246,
+      "step": 21620
+    },
+    {
+      "epoch": 0.21621,
+      "grad_norm": 1.3610626807784518,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 21621
+    },
+    {
+      "epoch": 0.21622,
+      "grad_norm": 1.2355274722346339,
+      "learning_rate": 0.003,
+      "loss": 4.0501,
+      "step": 21622
+    },
+    {
+      "epoch": 0.21623,
+      "grad_norm": 1.33178724761873,
+      "learning_rate": 0.003,
+      "loss": 4.0327,
+      "step": 21623
+    },
+    {
+      "epoch": 0.21624,
+      "grad_norm": 1.4603083448858682,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 21624
+    },
+    {
+      "epoch": 0.21625,
+      "grad_norm": 1.1243791989782295,
+      "learning_rate": 0.003,
+      "loss": 4.0051,
+      "step": 21625
+    },
+    {
+      "epoch": 0.21626,
+      "grad_norm": 1.484195067622712,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 21626
+    },
+    {
+      "epoch": 0.21627,
+      "grad_norm": 1.218784181073381,
+      "learning_rate": 0.003,
+      "loss": 4.0238,
+      "step": 21627
+    },
+    {
+      "epoch": 0.21628,
+      "grad_norm": 1.3334051004880332,
+      "learning_rate": 0.003,
+      "loss": 3.9629,
+      "step": 21628
+    },
+    {
+      "epoch": 0.21629,
+      "grad_norm": 1.207501320003813,
+      "learning_rate": 0.003,
+      "loss": 4.0085,
+      "step": 21629
+    },
+    {
+      "epoch": 0.2163,
+      "grad_norm": 1.3392180174201664,
+      "learning_rate": 0.003,
+      "loss": 3.9981,
+      "step": 21630
+    },
+    {
+      "epoch": 0.21631,
+      "grad_norm": 1.2710572671613782,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 21631
+    },
+    {
+      "epoch": 0.21632,
+      "grad_norm": 1.3290913698237814,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 21632
+    },
+    {
+      "epoch": 0.21633,
+      "grad_norm": 1.269992234423413,
+      "learning_rate": 0.003,
+      "loss": 4.0276,
+      "step": 21633
+    },
+    {
+      "epoch": 0.21634,
+      "grad_norm": 1.206141856243168,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 21634
+    },
+    {
+      "epoch": 0.21635,
+      "grad_norm": 1.3928225677791977,
+      "learning_rate": 0.003,
+      "loss": 4.0305,
+      "step": 21635
+    },
+    {
+      "epoch": 0.21636,
+      "grad_norm": 1.2005486039226834,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 21636
+    },
+    {
+      "epoch": 0.21637,
+      "grad_norm": 1.3728159047856086,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 21637
+    },
+    {
+      "epoch": 0.21638,
+      "grad_norm": 1.0955566158279648,
+      "learning_rate": 0.003,
+      "loss": 4.0005,
+      "step": 21638
+    },
+    {
+      "epoch": 0.21639,
+      "grad_norm": 1.5940527478748334,
+      "learning_rate": 0.003,
+      "loss": 4.0068,
+      "step": 21639
+    },
+    {
+      "epoch": 0.2164,
+      "grad_norm": 1.0102210518984438,
+      "learning_rate": 0.003,
+      "loss": 3.9752,
+      "step": 21640
+    },
+    {
+      "epoch": 0.21641,
+      "grad_norm": 1.415137545108236,
+      "learning_rate": 0.003,
+      "loss": 4.0324,
+      "step": 21641
+    },
+    {
+      "epoch": 0.21642,
+      "grad_norm": 1.4277910850917153,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 21642
+    },
+    {
+      "epoch": 0.21643,
+      "grad_norm": 1.2487741429658454,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 21643
+    },
+    {
+      "epoch": 0.21644,
+      "grad_norm": 1.5327888142924717,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 21644
+    },
+    {
+      "epoch": 0.21645,
+      "grad_norm": 1.2506162821676914,
+      "learning_rate": 0.003,
+      "loss": 4.0278,
+      "step": 21645
+    },
+    {
+      "epoch": 0.21646,
+      "grad_norm": 1.5241257750209818,
+      "learning_rate": 0.003,
+      "loss": 4.0171,
+      "step": 21646
+    },
+    {
+      "epoch": 0.21647,
+      "grad_norm": 1.197381169980551,
+      "learning_rate": 0.003,
+      "loss": 3.9947,
+      "step": 21647
+    },
+    {
+      "epoch": 0.21648,
+      "grad_norm": 1.185695004725597,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 21648
+    },
+    {
+      "epoch": 0.21649,
+      "grad_norm": 1.4550391037823611,
+      "learning_rate": 0.003,
+      "loss": 4.0077,
+      "step": 21649
+    },
+    {
+      "epoch": 0.2165,
+      "grad_norm": 1.2073659233470535,
+      "learning_rate": 0.003,
+      "loss": 4.0218,
+      "step": 21650
+    },
+    {
+      "epoch": 0.21651,
+      "grad_norm": 1.3404599948545233,
+      "learning_rate": 0.003,
+      "loss": 4.0111,
+      "step": 21651
+    },
+    {
+      "epoch": 0.21652,
+      "grad_norm": 1.20061095623674,
+      "learning_rate": 0.003,
+      "loss": 3.9679,
+      "step": 21652
+    },
+    {
+      "epoch": 0.21653,
+      "grad_norm": 1.2169502751387722,
+      "learning_rate": 0.003,
+      "loss": 4.0173,
+      "step": 21653
+    },
+    {
+      "epoch": 0.21654,
+      "grad_norm": 1.3700323706590056,
+      "learning_rate": 0.003,
+      "loss": 3.9812,
+      "step": 21654
+    },
+    {
+      "epoch": 0.21655,
+      "grad_norm": 1.1127266599738366,
+      "learning_rate": 0.003,
+      "loss": 4.0177,
+      "step": 21655
+    },
+    {
+      "epoch": 0.21656,
+      "grad_norm": 1.402723000731283,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 21656
+    },
+    {
+      "epoch": 0.21657,
+      "grad_norm": 0.9826649562830617,
+      "learning_rate": 0.003,
+      "loss": 3.9834,
+      "step": 21657
+    },
+    {
+      "epoch": 0.21658,
+      "grad_norm": 1.8237487885515333,
+      "learning_rate": 0.003,
+      "loss": 4.0306,
+      "step": 21658
+    },
+    {
+      "epoch": 0.21659,
+      "grad_norm": 1.2025173879128868,
+      "learning_rate": 0.003,
+      "loss": 4.003,
+      "step": 21659
+    },
+    {
+      "epoch": 0.2166,
+      "grad_norm": 1.3851675760367717,
+      "learning_rate": 0.003,
+      "loss": 4.0074,
+      "step": 21660
+    },
+    {
+      "epoch": 0.21661,
+      "grad_norm": 1.30111935773009,
+      "learning_rate": 0.003,
+      "loss": 4.033,
+      "step": 21661
+    },
+    {
+      "epoch": 0.21662,
+      "grad_norm": 1.213149374103388,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 21662
+    },
+    {
+      "epoch": 0.21663,
+      "grad_norm": 1.444962349335395,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 21663
+    },
+    {
+      "epoch": 0.21664,
+      "grad_norm": 1.206997630380186,
+      "learning_rate": 0.003,
+      "loss": 4.0135,
+      "step": 21664
+    },
+    {
+      "epoch": 0.21665,
+      "grad_norm": 1.2379710826969892,
+      "learning_rate": 0.003,
+      "loss": 4.0284,
+      "step": 21665
+    },
+    {
+      "epoch": 0.21666,
+      "grad_norm": 1.397976312571475,
+      "learning_rate": 0.003,
+      "loss": 3.9935,
+      "step": 21666
+    },
+    {
+      "epoch": 0.21667,
+      "grad_norm": 1.1937125362065113,
+      "learning_rate": 0.003,
+      "loss": 4.0056,
+      "step": 21667
+    },
+    {
+      "epoch": 0.21668,
+      "grad_norm": 1.2490990243407358,
+      "learning_rate": 0.003,
+      "loss": 3.9927,
+      "step": 21668
+    },
+    {
+      "epoch": 0.21669,
+      "grad_norm": 1.4043922338296373,
+      "learning_rate": 0.003,
+      "loss": 3.9882,
+      "step": 21669
+    },
+    {
+      "epoch": 0.2167,
+      "grad_norm": 1.2615751481824282,
+      "learning_rate": 0.003,
+      "loss": 4.0162,
+      "step": 21670
+    },
+    {
+      "epoch": 0.21671,
+      "grad_norm": 1.344356231908855,
+      "learning_rate": 0.003,
+      "loss": 4.0277,
+      "step": 21671
+    },
+    {
+      "epoch": 0.21672,
+      "grad_norm": 1.1400790721660696,
+      "learning_rate": 0.003,
+      "loss": 3.9669,
+      "step": 21672
+    },
+    {
+      "epoch": 0.21673,
+      "grad_norm": 1.5424540301549963,
+      "learning_rate": 0.003,
+      "loss": 4.0009,
+      "step": 21673
+    },
+    {
+      "epoch": 0.21674,
+      "grad_norm": 1.0435686313764396,
+      "learning_rate": 0.003,
+      "loss": 3.9679,
+      "step": 21674
+    },
+    {
+      "epoch": 0.21675,
+      "grad_norm": 1.5350962853554635,
+      "learning_rate": 0.003,
+      "loss": 3.9862,
+      "step": 21675
+    },
+    {
+      "epoch": 0.21676,
+      "grad_norm": 1.197188073620894,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 21676
+    },
+    {
+      "epoch": 0.21677,
+      "grad_norm": 1.3310557359090402,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 21677
+    },
+    {
+      "epoch": 0.21678,
+      "grad_norm": 1.3483813696140483,
+      "learning_rate": 0.003,
+      "loss": 4.0039,
+      "step": 21678
+    },
+    {
+      "epoch": 0.21679,
+      "grad_norm": 1.1729218098364484,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 21679
+    },
+    {
+      "epoch": 0.2168,
+      "grad_norm": 1.3972695166124591,
+      "learning_rate": 0.003,
+      "loss": 3.9983,
+      "step": 21680
+    },
+    {
+      "epoch": 0.21681,
+      "grad_norm": 1.3127849633881548,
+      "learning_rate": 0.003,
+      "loss": 4.002,
+      "step": 21681
+    },
+    {
+      "epoch": 0.21682,
+      "grad_norm": 1.1370627065021182,
+      "learning_rate": 0.003,
+      "loss": 4.0006,
+      "step": 21682
+    },
+    {
+      "epoch": 0.21683,
+      "grad_norm": 1.2479174395510761,
+      "learning_rate": 0.003,
+      "loss": 4.0355,
+      "step": 21683
+    },
+    {
+      "epoch": 0.21684,
+      "grad_norm": 1.136086103823961,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 21684
+    },
+    {
+      "epoch": 0.21685,
+      "grad_norm": 1.6604438283201557,
+      "learning_rate": 0.003,
+      "loss": 4.0174,
+      "step": 21685
+    },
+    {
+      "epoch": 0.21686,
+      "grad_norm": 0.9994540058684338,
+      "learning_rate": 0.003,
+      "loss": 3.9959,
+      "step": 21686
+    },
+    {
+      "epoch": 0.21687,
+      "grad_norm": 1.4006978735705777,
+      "learning_rate": 0.003,
+      "loss": 3.989,
+      "step": 21687
+    },
+    {
+      "epoch": 0.21688,
+      "grad_norm": 1.1887855248626609,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 21688
+    },
+    {
+      "epoch": 0.21689,
+      "grad_norm": 1.7287476311703591,
+      "learning_rate": 0.003,
+      "loss": 4.0081,
+      "step": 21689
+    },
+    {
+      "epoch": 0.2169,
+      "grad_norm": 1.2772627880386433,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 21690
+    },
+    {
+      "epoch": 0.21691,
+      "grad_norm": 1.1441100272120863,
+      "learning_rate": 0.003,
+      "loss": 3.9863,
+      "step": 21691
+    },
+    {
+      "epoch": 0.21692,
+      "grad_norm": 1.2524948429553053,
+      "learning_rate": 0.003,
+      "loss": 4.0233,
+      "step": 21692
+    },
+    {
+      "epoch": 0.21693,
+      "grad_norm": 1.2964441258486796,
+      "learning_rate": 0.003,
+      "loss": 4.0016,
+      "step": 21693
+    },
+    {
+      "epoch": 0.21694,
+      "grad_norm": 1.4187669456699001,
+      "learning_rate": 0.003,
+      "loss": 3.995,
+      "step": 21694
+    },
+    {
+      "epoch": 0.21695,
+      "grad_norm": 1.2401018867081617,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 21695
+    },
+    {
+      "epoch": 0.21696,
+      "grad_norm": 1.3213689642653372,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 21696
+    },
+    {
+      "epoch": 0.21697,
+      "grad_norm": 1.250095142155684,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 21697
+    },
+    {
+      "epoch": 0.21698,
+      "grad_norm": 1.2342677618775901,
+      "learning_rate": 0.003,
+      "loss": 3.9603,
+      "step": 21698
+    },
+    {
+      "epoch": 0.21699,
+      "grad_norm": 1.1763537298729496,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 21699
+    },
+    {
+      "epoch": 0.217,
+      "grad_norm": 1.1624684839726565,
+      "learning_rate": 0.003,
+      "loss": 4.0056,
+      "step": 21700
+    },
+    {
+      "epoch": 0.21701,
+      "grad_norm": 1.3582490003593168,
+      "learning_rate": 0.003,
+      "loss": 4.0266,
+      "step": 21701
+    },
+    {
+      "epoch": 0.21702,
+      "grad_norm": 1.2189610658498278,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 21702
+    },
+    {
+      "epoch": 0.21703,
+      "grad_norm": 1.400861181711102,
+      "learning_rate": 0.003,
+      "loss": 3.9961,
+      "step": 21703
+    },
+    {
+      "epoch": 0.21704,
+      "grad_norm": 1.3793076427971191,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 21704
+    },
+    {
+      "epoch": 0.21705,
+      "grad_norm": 1.2547707154100634,
+      "learning_rate": 0.003,
+      "loss": 3.9879,
+      "step": 21705
+    },
+    {
+      "epoch": 0.21706,
+      "grad_norm": 1.422807917028276,
+      "learning_rate": 0.003,
+      "loss": 3.9957,
+      "step": 21706
+    },
+    {
+      "epoch": 0.21707,
+      "grad_norm": 1.2631625810418166,
+      "learning_rate": 0.003,
+      "loss": 4.0232,
+      "step": 21707
+    },
+    {
+      "epoch": 0.21708,
+      "grad_norm": 1.5388596405107844,
+      "learning_rate": 0.003,
+      "loss": 4.0043,
+      "step": 21708
+    },
+    {
+      "epoch": 0.21709,
+      "grad_norm": 0.9160040309264672,
+      "learning_rate": 0.003,
+      "loss": 3.9846,
+      "step": 21709
+    },
+    {
+      "epoch": 0.2171,
+      "grad_norm": 1.4301554597373987,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 21710
+    },
+    {
+      "epoch": 0.21711,
+      "grad_norm": 1.223623836909467,
+      "learning_rate": 0.003,
+      "loss": 4.004,
+      "step": 21711
+    },
+    {
+      "epoch": 0.21712,
+      "grad_norm": 1.3822427485303515,
+      "learning_rate": 0.003,
+      "loss": 3.998,
+      "step": 21712
+    },
+    {
+      "epoch": 0.21713,
+      "grad_norm": 1.0559367677743268,
+      "learning_rate": 0.003,
+      "loss": 4.0275,
+      "step": 21713
+    },
+    {
+      "epoch": 0.21714,
+      "grad_norm": 1.289162450194436,
+      "learning_rate": 0.003,
+      "loss": 3.9822,
+      "step": 21714
+    },
+    {
+      "epoch": 0.21715,
+      "grad_norm": 1.3029986587668727,
+      "learning_rate": 0.003,
+      "loss": 4.0061,
+      "step": 21715
+    },
+    {
+      "epoch": 0.21716,
+      "grad_norm": 1.5030526802190594,
+      "learning_rate": 0.003,
+      "loss": 4.0119,
+      "step": 21716
+    },
+    {
+      "epoch": 0.21717,
+      "grad_norm": 1.2648928391488217,
+      "learning_rate": 0.003,
+      "loss": 4.002,
+      "step": 21717
+    },
+    {
+      "epoch": 0.21718,
+      "grad_norm": 1.3001179721627285,
+      "learning_rate": 0.003,
+      "loss": 3.9987,
+      "step": 21718
+    },
+    {
+      "epoch": 0.21719,
+      "grad_norm": 1.0463610143181978,
+      "learning_rate": 0.003,
+      "loss": 3.9934,
+      "step": 21719
+    },
+    {
+      "epoch": 0.2172,
+      "grad_norm": 1.3993417369971985,
+      "learning_rate": 0.003,
+      "loss": 4.006,
+      "step": 21720
+    },
+    {
+      "epoch": 0.21721,
+      "grad_norm": 1.100640650540458,
+      "learning_rate": 0.003,
+      "loss": 4.0212,
+      "step": 21721
+    },
+    {
+      "epoch": 0.21722,
+      "grad_norm": 1.3661680298866732,
+      "learning_rate": 0.003,
+      "loss": 3.9849,
+      "step": 21722
+    },
+    {
+      "epoch": 0.21723,
+      "grad_norm": 1.2823271976456876,
+      "learning_rate": 0.003,
+      "loss": 4.0079,
+      "step": 21723
+    },
+    {
+      "epoch": 0.21724,
+      "grad_norm": 1.4231791470111517,
+      "learning_rate": 0.003,
+      "loss": 4.0211,
+      "step": 21724
+    },
+    {
+      "epoch": 0.21725,
+      "grad_norm": 1.2870024886802647,
+      "learning_rate": 0.003,
+      "loss": 4.0014,
+      "step": 21725
+    },
+    {
+      "epoch": 0.21726,
+      "grad_norm": 1.20542877042387,
+      "learning_rate": 0.003,
+      "loss": 3.9911,
+      "step": 21726
+    },
+    {
+      "epoch": 0.21727,
+      "grad_norm": 1.2826568651715116,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 21727
+    },
+    {
+      "epoch": 0.21728,
+      "grad_norm": 1.2352665442852366,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 21728
+    },
+    {
+      "epoch": 0.21729,
+      "grad_norm": 1.2154328135228454,
+      "learning_rate": 0.003,
+      "loss": 3.999,
+      "step": 21729
+    },
+    {
+      "epoch": 0.2173,
+      "grad_norm": 1.2418098423770925,
+      "learning_rate": 0.003,
+      "loss": 3.9897,
+      "step": 21730
+    },
+    {
+      "epoch": 0.21731,
+      "grad_norm": 1.1875155243886,
+      "learning_rate": 0.003,
+      "loss": 4.0473,
+      "step": 21731
+    },
+    {
+      "epoch": 0.21732,
+      "grad_norm": 1.262029586192612,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 21732
+    },
+    {
+      "epoch": 0.21733,
+      "grad_norm": 1.4239950145149205,
+      "learning_rate": 0.003,
+      "loss": 4.0302,
+      "step": 21733
+    },
+    {
+      "epoch": 0.21734,
+      "grad_norm": 1.259297756408471,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 21734
+    },
+    {
+      "epoch": 0.21735,
+      "grad_norm": 1.1719438615292317,
+      "learning_rate": 0.003,
+      "loss": 3.997,
+      "step": 21735
+    },
+    {
+      "epoch": 0.21736,
+      "grad_norm": 1.5409436411221293,
+      "learning_rate": 0.003,
+      "loss": 3.9933,
+      "step": 21736
+    },
+    {
+      "epoch": 0.21737,
+      "grad_norm": 1.1451597065097385,
+      "learning_rate": 0.003,
+      "loss": 3.9836,
+      "step": 21737
+    },
+    {
+      "epoch": 0.21738,
+      "grad_norm": 1.279614585746944,
+      "learning_rate": 0.003,
+      "loss": 4.0229,
+      "step": 21738
+    },
+    {
+      "epoch": 0.21739,
+      "grad_norm": 1.2966456076805943,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 21739
+    },
+    {
+      "epoch": 0.2174,
+      "grad_norm": 1.4656034760074008,
+      "learning_rate": 0.003,
+      "loss": 3.9753,
+      "step": 21740
+    },
+    {
+      "epoch": 0.21741,
+      "grad_norm": 1.4479869247362216,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 21741
+    },
+    {
+      "epoch": 0.21742,
+      "grad_norm": 1.1848165729106541,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 21742
+    },
+    {
+      "epoch": 0.21743,
+      "grad_norm": 1.3840384365755485,
+      "learning_rate": 0.003,
+      "loss": 4.0378,
+      "step": 21743
+    },
+    {
+      "epoch": 0.21744,
+      "grad_norm": 1.279590145644443,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 21744
+    },
+    {
+      "epoch": 0.21745,
+      "grad_norm": 1.3789629070982803,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 21745
+    },
+    {
+      "epoch": 0.21746,
+      "grad_norm": 1.0540940977160531,
+      "learning_rate": 0.003,
+      "loss": 3.9914,
+      "step": 21746
+    },
+    {
+      "epoch": 0.21747,
+      "grad_norm": 1.5730727187948093,
+      "learning_rate": 0.003,
+      "loss": 4.0318,
+      "step": 21747
+    },
+    {
+      "epoch": 0.21748,
+      "grad_norm": 1.1764800514808005,
+      "learning_rate": 0.003,
+      "loss": 3.9988,
+      "step": 21748
+    },
+    {
+      "epoch": 0.21749,
+      "grad_norm": 1.2460213606544117,
+      "learning_rate": 0.003,
+      "loss": 3.9792,
+      "step": 21749
+    },
+    {
+      "epoch": 0.2175,
+      "grad_norm": 1.3122230070799068,
+      "learning_rate": 0.003,
+      "loss": 4.0029,
+      "step": 21750
+    },
+    {
+      "epoch": 0.21751,
+      "grad_norm": 1.141371455852024,
+      "learning_rate": 0.003,
+      "loss": 3.9686,
+      "step": 21751
+    },
+    {
+      "epoch": 0.21752,
+      "grad_norm": 1.4919469282401676,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 21752
+    },
+    {
+      "epoch": 0.21753,
+      "grad_norm": 1.1735280521429639,
+      "learning_rate": 0.003,
+      "loss": 4.0112,
+      "step": 21753
+    },
+    {
+      "epoch": 0.21754,
+      "grad_norm": 1.427043387774454,
+      "learning_rate": 0.003,
+      "loss": 3.9871,
+      "step": 21754
+    },
+    {
+      "epoch": 0.21755,
+      "grad_norm": 1.2779868785226811,
+      "learning_rate": 0.003,
+      "loss": 4.0251,
+      "step": 21755
+    },
+    {
+      "epoch": 0.21756,
+      "grad_norm": 1.2548683750358114,
+      "learning_rate": 0.003,
+      "loss": 3.9704,
+      "step": 21756
+    },
+    {
+      "epoch": 0.21757,
+      "grad_norm": 1.2679058514990853,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 21757
+    },
+    {
+      "epoch": 0.21758,
+      "grad_norm": 1.303131517984688,
+      "learning_rate": 0.003,
+      "loss": 4.0151,
+      "step": 21758
+    },
+    {
+      "epoch": 0.21759,
+      "grad_norm": 1.331985687637226,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 21759
+    },
+    {
+      "epoch": 0.2176,
+      "grad_norm": 1.4031679322445785,
+      "learning_rate": 0.003,
+      "loss": 3.9819,
+      "step": 21760
+    },
+    {
+      "epoch": 0.21761,
+      "grad_norm": 1.1076077714180101,
+      "learning_rate": 0.003,
+      "loss": 3.9726,
+      "step": 21761
+    },
+    {
+      "epoch": 0.21762,
+      "grad_norm": 1.3687737150539718,
+      "learning_rate": 0.003,
+      "loss": 3.9912,
+      "step": 21762
+    },
+    {
+      "epoch": 0.21763,
+      "grad_norm": 1.304071379845354,
+      "learning_rate": 0.003,
+      "loss": 4.0141,
+      "step": 21763
+    },
+    {
+      "epoch": 0.21764,
+      "grad_norm": 1.323771094154932,
+      "learning_rate": 0.003,
+      "loss": 4.0119,
+      "step": 21764
+    },
+    {
+      "epoch": 0.21765,
+      "grad_norm": 1.1774993312902868,
+      "learning_rate": 0.003,
+      "loss": 4.0188,
+      "step": 21765
+    },
+    {
+      "epoch": 0.21766,
+      "grad_norm": 1.327044182245997,
+      "learning_rate": 0.003,
+      "loss": 3.999,
+      "step": 21766
+    },
+    {
+      "epoch": 0.21767,
+      "grad_norm": 1.1776694552478215,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 21767
+    },
+    {
+      "epoch": 0.21768,
+      "grad_norm": 1.223011154706187,
+      "learning_rate": 0.003,
+      "loss": 4.0167,
+      "step": 21768
+    },
+    {
+      "epoch": 0.21769,
+      "grad_norm": 1.3694764412911384,
+      "learning_rate": 0.003,
+      "loss": 4.0132,
+      "step": 21769
+    },
+    {
+      "epoch": 0.2177,
+      "grad_norm": 1.1699432890161034,
+      "learning_rate": 0.003,
+      "loss": 4.0081,
+      "step": 21770
+    },
+    {
+      "epoch": 0.21771,
+      "grad_norm": 1.5418599018135937,
+      "learning_rate": 0.003,
+      "loss": 4.0112,
+      "step": 21771
+    },
+    {
+      "epoch": 0.21772,
+      "grad_norm": 1.0820871844775757,
+      "learning_rate": 0.003,
+      "loss": 3.9914,
+      "step": 21772
+    },
+    {
+      "epoch": 0.21773,
+      "grad_norm": 1.3481451355140062,
+      "learning_rate": 0.003,
+      "loss": 4.0284,
+      "step": 21773
+    },
+    {
+      "epoch": 0.21774,
+      "grad_norm": 1.1254900894759898,
+      "learning_rate": 0.003,
+      "loss": 3.9968,
+      "step": 21774
+    },
+    {
+      "epoch": 0.21775,
+      "grad_norm": 1.264138099729919,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 21775
+    },
+    {
+      "epoch": 0.21776,
+      "grad_norm": 1.3801388677498272,
+      "learning_rate": 0.003,
+      "loss": 3.9875,
+      "step": 21776
+    },
+    {
+      "epoch": 0.21777,
+      "grad_norm": 1.0510350951036482,
+      "learning_rate": 0.003,
+      "loss": 4.0019,
+      "step": 21777
+    },
+    {
+      "epoch": 0.21778,
+      "grad_norm": 1.5313579050896282,
+      "learning_rate": 0.003,
+      "loss": 3.9788,
+      "step": 21778
+    },
+    {
+      "epoch": 0.21779,
+      "grad_norm": 1.164113603328975,
+      "learning_rate": 0.003,
+      "loss": 3.9876,
+      "step": 21779
+    },
+    {
+      "epoch": 0.2178,
+      "grad_norm": 1.3670399105287572,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 21780
+    },
+    {
+      "epoch": 0.21781,
+      "grad_norm": 1.2770534705101182,
+      "learning_rate": 0.003,
+      "loss": 4.019,
+      "step": 21781
+    },
+    {
+      "epoch": 0.21782,
+      "grad_norm": 1.1582309497073513,
+      "learning_rate": 0.003,
+      "loss": 3.9711,
+      "step": 21782
+    },
+    {
+      "epoch": 0.21783,
+      "grad_norm": 1.4611288408518515,
+      "learning_rate": 0.003,
+      "loss": 3.9811,
+      "step": 21783
+    },
+    {
+      "epoch": 0.21784,
+      "grad_norm": 0.9649134082889295,
+      "learning_rate": 0.003,
+      "loss": 3.9933,
+      "step": 21784
+    },
+    {
+      "epoch": 0.21785,
+      "grad_norm": 1.511684577494886,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 21785
+    },
+    {
+      "epoch": 0.21786,
+      "grad_norm": 1.2281489538221246,
+      "learning_rate": 0.003,
+      "loss": 4.0107,
+      "step": 21786
+    },
+    {
+      "epoch": 0.21787,
+      "grad_norm": 1.6143017303562976,
+      "learning_rate": 0.003,
+      "loss": 4.0096,
+      "step": 21787
+    },
+    {
+      "epoch": 0.21788,
+      "grad_norm": 1.0468105754483152,
+      "learning_rate": 0.003,
+      "loss": 4.0251,
+      "step": 21788
+    },
+    {
+      "epoch": 0.21789,
+      "grad_norm": 1.646682094880902,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 21789
+    },
+    {
+      "epoch": 0.2179,
+      "grad_norm": 1.23610096027204,
+      "learning_rate": 0.003,
+      "loss": 3.9954,
+      "step": 21790
+    },
+    {
+      "epoch": 0.21791,
+      "grad_norm": 1.4794489125673145,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 21791
+    },
+    {
+      "epoch": 0.21792,
+      "grad_norm": 1.3440837333136029,
+      "learning_rate": 0.003,
+      "loss": 3.9926,
+      "step": 21792
+    },
+    {
+      "epoch": 0.21793,
+      "grad_norm": 1.179499030021326,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 21793
+    },
+    {
+      "epoch": 0.21794,
+      "grad_norm": 1.2834508849931736,
+      "learning_rate": 0.003,
+      "loss": 4.0239,
+      "step": 21794
+    },
+    {
+      "epoch": 0.21795,
+      "grad_norm": 1.3732880876621187,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 21795
+    },
+    {
+      "epoch": 0.21796,
+      "grad_norm": 1.2558356220196722,
+      "learning_rate": 0.003,
+      "loss": 3.959,
+      "step": 21796
+    },
+    {
+      "epoch": 0.21797,
+      "grad_norm": 1.2816576598999312,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 21797
+    },
+    {
+      "epoch": 0.21798,
+      "grad_norm": 1.2503085835796202,
+      "learning_rate": 0.003,
+      "loss": 4.0019,
+      "step": 21798
+    },
+    {
+      "epoch": 0.21799,
+      "grad_norm": 1.4029705768851444,
+      "learning_rate": 0.003,
+      "loss": 3.9968,
+      "step": 21799
+    },
+    {
+      "epoch": 0.218,
+      "grad_norm": 1.4366730044785514,
+      "learning_rate": 0.003,
+      "loss": 3.9982,
+      "step": 21800
+    },
+    {
+      "epoch": 0.21801,
+      "grad_norm": 1.1694798321117736,
+      "learning_rate": 0.003,
+      "loss": 4.0075,
+      "step": 21801
+    },
+    {
+      "epoch": 0.21802,
+      "grad_norm": 1.283462284994241,
+      "learning_rate": 0.003,
+      "loss": 4.0085,
+      "step": 21802
+    },
+    {
+      "epoch": 0.21803,
+      "grad_norm": 1.140706760661062,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 21803
+    },
+    {
+      "epoch": 0.21804,
+      "grad_norm": 1.453924527744494,
+      "learning_rate": 0.003,
+      "loss": 4.0236,
+      "step": 21804
+    },
+    {
+      "epoch": 0.21805,
+      "grad_norm": 1.1912356903732884,
+      "learning_rate": 0.003,
+      "loss": 4.0141,
+      "step": 21805
+    },
+    {
+      "epoch": 0.21806,
+      "grad_norm": 1.2334749485556487,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 21806
+    },
+    {
+      "epoch": 0.21807,
+      "grad_norm": 1.4471940941305517,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 21807
+    },
+    {
+      "epoch": 0.21808,
+      "grad_norm": 1.2353815586720716,
+      "learning_rate": 0.003,
+      "loss": 3.9966,
+      "step": 21808
+    },
+    {
+      "epoch": 0.21809,
+      "grad_norm": 1.5224623562827944,
+      "learning_rate": 0.003,
+      "loss": 4.0313,
+      "step": 21809
+    },
+    {
+      "epoch": 0.2181,
+      "grad_norm": 1.2077071070295733,
+      "learning_rate": 0.003,
+      "loss": 3.9911,
+      "step": 21810
+    },
+    {
+      "epoch": 0.21811,
+      "grad_norm": 1.300146554824162,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 21811
+    },
+    {
+      "epoch": 0.21812,
+      "grad_norm": 1.1658697738093449,
+      "learning_rate": 0.003,
+      "loss": 3.9813,
+      "step": 21812
+    },
+    {
+      "epoch": 0.21813,
+      "grad_norm": 1.5420504293605335,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 21813
+    },
+    {
+      "epoch": 0.21814,
+      "grad_norm": 1.0078477061920392,
+      "learning_rate": 0.003,
+      "loss": 3.9891,
+      "step": 21814
+    },
+    {
+      "epoch": 0.21815,
+      "grad_norm": 1.6374236407535403,
+      "learning_rate": 0.003,
+      "loss": 3.9894,
+      "step": 21815
+    },
+    {
+      "epoch": 0.21816,
+      "grad_norm": 1.0687861990142336,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 21816
+    },
+    {
+      "epoch": 0.21817,
+      "grad_norm": 1.4155436050833432,
+      "learning_rate": 0.003,
+      "loss": 3.9924,
+      "step": 21817
+    },
+    {
+      "epoch": 0.21818,
+      "grad_norm": 1.1651205988438953,
+      "learning_rate": 0.003,
+      "loss": 4.0058,
+      "step": 21818
+    },
+    {
+      "epoch": 0.21819,
+      "grad_norm": 1.261813264029156,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 21819
+    },
+    {
+      "epoch": 0.2182,
+      "grad_norm": 1.0838312480286736,
+      "learning_rate": 0.003,
+      "loss": 4.0116,
+      "step": 21820
+    },
+    {
+      "epoch": 0.21821,
+      "grad_norm": 1.3637360642604501,
+      "learning_rate": 0.003,
+      "loss": 4.0281,
+      "step": 21821
+    },
+    {
+      "epoch": 0.21822,
+      "grad_norm": 1.2480589047861015,
+      "learning_rate": 0.003,
+      "loss": 3.9838,
+      "step": 21822
+    },
+    {
+      "epoch": 0.21823,
+      "grad_norm": 1.4010219198556428,
+      "learning_rate": 0.003,
+      "loss": 4.0429,
+      "step": 21823
+    },
+    {
+      "epoch": 0.21824,
+      "grad_norm": 1.2839359085136266,
+      "learning_rate": 0.003,
+      "loss": 4.005,
+      "step": 21824
+    },
+    {
+      "epoch": 0.21825,
+      "grad_norm": 1.240245025249499,
+      "learning_rate": 0.003,
+      "loss": 4.0265,
+      "step": 21825
+    },
+    {
+      "epoch": 0.21826,
+      "grad_norm": 1.1938548881586089,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 21826
+    },
+    {
+      "epoch": 0.21827,
+      "grad_norm": 1.376678794029911,
+      "learning_rate": 0.003,
+      "loss": 4.0212,
+      "step": 21827
+    },
+    {
+      "epoch": 0.21828,
+      "grad_norm": 1.118088098619449,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 21828
+    },
+    {
+      "epoch": 0.21829,
+      "grad_norm": 1.5572990507421491,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 21829
+    },
+    {
+      "epoch": 0.2183,
+      "grad_norm": 1.2531447666936237,
+      "learning_rate": 0.003,
+      "loss": 4.0163,
+      "step": 21830
+    },
+    {
+      "epoch": 0.21831,
+      "grad_norm": 1.4664059698650174,
+      "learning_rate": 0.003,
+      "loss": 4.0259,
+      "step": 21831
+    },
+    {
+      "epoch": 0.21832,
+      "grad_norm": 1.0316312311668534,
+      "learning_rate": 0.003,
+      "loss": 3.9765,
+      "step": 21832
+    },
+    {
+      "epoch": 0.21833,
+      "grad_norm": 1.445862037602515,
+      "learning_rate": 0.003,
+      "loss": 3.9951,
+      "step": 21833
+    },
+    {
+      "epoch": 0.21834,
+      "grad_norm": 1.2108963402547028,
+      "learning_rate": 0.003,
+      "loss": 4.0288,
+      "step": 21834
+    },
+    {
+      "epoch": 0.21835,
+      "grad_norm": 1.3040656237630401,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 21835
+    },
+    {
+      "epoch": 0.21836,
+      "grad_norm": 1.1798936595699987,
+      "learning_rate": 0.003,
+      "loss": 4.0115,
+      "step": 21836
+    },
+    {
+      "epoch": 0.21837,
+      "grad_norm": 1.3679458985709623,
+      "learning_rate": 0.003,
+      "loss": 3.9942,
+      "step": 21837
+    },
+    {
+      "epoch": 0.21838,
+      "grad_norm": 1.2198754453964504,
+      "learning_rate": 0.003,
+      "loss": 4.0211,
+      "step": 21838
+    },
+    {
+      "epoch": 0.21839,
+      "grad_norm": 1.4343483708140523,
+      "learning_rate": 0.003,
+      "loss": 4.0057,
+      "step": 21839
+    },
+    {
+      "epoch": 0.2184,
+      "grad_norm": 1.0338172750567736,
+      "learning_rate": 0.003,
+      "loss": 3.9749,
+      "step": 21840
+    },
+    {
+      "epoch": 0.21841,
+      "grad_norm": 1.7518111031861696,
+      "learning_rate": 0.003,
+      "loss": 4.0496,
+      "step": 21841
+    },
+    {
+      "epoch": 0.21842,
+      "grad_norm": 1.0598038543555905,
+      "learning_rate": 0.003,
+      "loss": 4.0163,
+      "step": 21842
+    },
+    {
+      "epoch": 0.21843,
+      "grad_norm": 1.3780903631226455,
+      "learning_rate": 0.003,
+      "loss": 3.9985,
+      "step": 21843
+    },
+    {
+      "epoch": 0.21844,
+      "grad_norm": 1.1884182956063731,
+      "learning_rate": 0.003,
+      "loss": 4.0093,
+      "step": 21844
+    },
+    {
+      "epoch": 0.21845,
+      "grad_norm": 1.4304053281200648,
+      "learning_rate": 0.003,
+      "loss": 4.0404,
+      "step": 21845
+    },
+    {
+      "epoch": 0.21846,
+      "grad_norm": 1.0373177595603593,
+      "learning_rate": 0.003,
+      "loss": 3.9911,
+      "step": 21846
+    },
+    {
+      "epoch": 0.21847,
+      "grad_norm": 1.2571777522053638,
+      "learning_rate": 0.003,
+      "loss": 3.9843,
+      "step": 21847
+    },
+    {
+      "epoch": 0.21848,
+      "grad_norm": 1.2197728784912514,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 21848
+    },
+    {
+      "epoch": 0.21849,
+      "grad_norm": 1.2662971698959744,
+      "learning_rate": 0.003,
+      "loss": 3.9895,
+      "step": 21849
+    },
+    {
+      "epoch": 0.2185,
+      "grad_norm": 1.3204204331438287,
+      "learning_rate": 0.003,
+      "loss": 4.0001,
+      "step": 21850
+    },
+    {
+      "epoch": 0.21851,
+      "grad_norm": 1.2399597268774565,
+      "learning_rate": 0.003,
+      "loss": 4.0146,
+      "step": 21851
+    },
+    {
+      "epoch": 0.21852,
+      "grad_norm": 1.364045186690722,
+      "learning_rate": 0.003,
+      "loss": 3.9895,
+      "step": 21852
+    },
+    {
+      "epoch": 0.21853,
+      "grad_norm": 1.2713390981763568,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 21853
+    },
+    {
+      "epoch": 0.21854,
+      "grad_norm": 1.145328696233498,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 21854
+    },
+    {
+      "epoch": 0.21855,
+      "grad_norm": 1.2711334216375794,
+      "learning_rate": 0.003,
+      "loss": 4.0157,
+      "step": 21855
+    },
+    {
+      "epoch": 0.21856,
+      "grad_norm": 1.1040199474440724,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 21856
+    },
+    {
+      "epoch": 0.21857,
+      "grad_norm": 1.3415364199904953,
+      "learning_rate": 0.003,
+      "loss": 3.9874,
+      "step": 21857
+    },
+    {
+      "epoch": 0.21858,
+      "grad_norm": 1.185378392189486,
+      "learning_rate": 0.003,
+      "loss": 3.9737,
+      "step": 21858
+    },
+    {
+      "epoch": 0.21859,
+      "grad_norm": 1.338292571518052,
+      "learning_rate": 0.003,
+      "loss": 4.0075,
+      "step": 21859
+    },
+    {
+      "epoch": 0.2186,
+      "grad_norm": 1.2046792976741894,
+      "learning_rate": 0.003,
+      "loss": 3.9935,
+      "step": 21860
+    },
+    {
+      "epoch": 0.21861,
+      "grad_norm": 1.291152239939995,
+      "learning_rate": 0.003,
+      "loss": 3.9984,
+      "step": 21861
+    },
+    {
+      "epoch": 0.21862,
+      "grad_norm": 1.166982753658108,
+      "learning_rate": 0.003,
+      "loss": 3.9956,
+      "step": 21862
+    },
+    {
+      "epoch": 0.21863,
+      "grad_norm": 1.1762413169620136,
+      "learning_rate": 0.003,
+      "loss": 3.9901,
+      "step": 21863
+    },
+    {
+      "epoch": 0.21864,
+      "grad_norm": 1.4298673095114875,
+      "learning_rate": 0.003,
+      "loss": 3.9673,
+      "step": 21864
+    },
+    {
+      "epoch": 0.21865,
+      "grad_norm": 1.2215428951006153,
+      "learning_rate": 0.003,
+      "loss": 4.031,
+      "step": 21865
+    },
+    {
+      "epoch": 0.21866,
+      "grad_norm": 1.4271519651619973,
+      "learning_rate": 0.003,
+      "loss": 3.9963,
+      "step": 21866
+    },
+    {
+      "epoch": 0.21867,
+      "grad_norm": 1.3037291738669112,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 21867
+    },
+    {
+      "epoch": 0.21868,
+      "grad_norm": 1.117337641278464,
+      "learning_rate": 0.003,
+      "loss": 3.993,
+      "step": 21868
+    },
+    {
+      "epoch": 0.21869,
+      "grad_norm": 1.2402716722577118,
+      "learning_rate": 0.003,
+      "loss": 4.0027,
+      "step": 21869
+    },
+    {
+      "epoch": 0.2187,
+      "grad_norm": 1.2345448525251175,
+      "learning_rate": 0.003,
+      "loss": 3.9769,
+      "step": 21870
+    },
+    {
+      "epoch": 0.21871,
+      "grad_norm": 1.4014862269704604,
+      "learning_rate": 0.003,
+      "loss": 4.0088,
+      "step": 21871
+    },
+    {
+      "epoch": 0.21872,
+      "grad_norm": 1.1402472903200775,
+      "learning_rate": 0.003,
+      "loss": 3.9996,
+      "step": 21872
+    },
+    {
+      "epoch": 0.21873,
+      "grad_norm": 1.225973577807098,
+      "learning_rate": 0.003,
+      "loss": 3.9886,
+      "step": 21873
+    },
+    {
+      "epoch": 0.21874,
+      "grad_norm": 1.3883287258174069,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 21874
+    },
+    {
+      "epoch": 0.21875,
+      "grad_norm": 1.5297479332053998,
+      "learning_rate": 0.003,
+      "loss": 4.0085,
+      "step": 21875
+    },
+    {
+      "epoch": 0.21876,
+      "grad_norm": 1.3241711109601797,
+      "learning_rate": 0.003,
+      "loss": 3.9906,
+      "step": 21876
+    },
+    {
+      "epoch": 0.21877,
+      "grad_norm": 1.0668913234611774,
+      "learning_rate": 0.003,
+      "loss": 4.0257,
+      "step": 21877
+    },
+    {
+      "epoch": 0.21878,
+      "grad_norm": 1.430795499540618,
+      "learning_rate": 0.003,
+      "loss": 3.9808,
+      "step": 21878
+    },
+    {
+      "epoch": 0.21879,
+      "grad_norm": 1.115405545193698,
+      "learning_rate": 0.003,
+      "loss": 4.0042,
+      "step": 21879
+    },
+    {
+      "epoch": 0.2188,
+      "grad_norm": 1.2108572506616062,
+      "learning_rate": 0.003,
+      "loss": 3.9911,
+      "step": 21880
+    },
+    {
+      "epoch": 0.21881,
+      "grad_norm": 1.2240014313139185,
+      "learning_rate": 0.003,
+      "loss": 4.0207,
+      "step": 21881
+    },
+    {
+      "epoch": 0.21882,
+      "grad_norm": 1.2065903733274945,
+      "learning_rate": 0.003,
+      "loss": 3.984,
+      "step": 21882
+    },
+    {
+      "epoch": 0.21883,
+      "grad_norm": 1.3490610047429985,
+      "learning_rate": 0.003,
+      "loss": 3.9844,
+      "step": 21883
+    },
+    {
+      "epoch": 0.21884,
+      "grad_norm": 1.300317917889717,
+      "learning_rate": 0.003,
+      "loss": 4.0224,
+      "step": 21884
+    },
+    {
+      "epoch": 0.21885,
+      "grad_norm": 1.414366541811956,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 21885
+    },
+    {
+      "epoch": 0.21886,
+      "grad_norm": 1.443208431203201,
+      "learning_rate": 0.003,
+      "loss": 4.0703,
+      "step": 21886
+    },
+    {
+      "epoch": 0.21887,
+      "grad_norm": 1.2319644242573962,
+      "learning_rate": 0.003,
+      "loss": 3.9864,
+      "step": 21887
+    },
+    {
+      "epoch": 0.21888,
+      "grad_norm": 1.2176729559943604,
+      "learning_rate": 0.003,
+      "loss": 3.9868,
+      "step": 21888
+    },
+    {
+      "epoch": 0.21889,
+      "grad_norm": 1.2467573220811374,
+      "learning_rate": 0.003,
+      "loss": 3.9937,
+      "step": 21889
+    },
+    {
+      "epoch": 0.2189,
+      "grad_norm": 1.2081046402129583,
+      "learning_rate": 0.003,
+      "loss": 3.9891,
+      "step": 21890
+    },
+    {
+      "epoch": 0.21891,
+      "grad_norm": 1.2940293170639094,
+      "learning_rate": 0.003,
+      "loss": 4.0085,
+      "step": 21891
+    },
+    {
+      "epoch": 0.21892,
+      "grad_norm": 1.3723069928839458,
+      "learning_rate": 0.003,
+      "loss": 4.0373,
+      "step": 21892
+    },
+    {
+      "epoch": 0.21893,
+      "grad_norm": 1.3301723736497029,
+      "learning_rate": 0.003,
+      "loss": 3.9853,
+      "step": 21893
+    },
+    {
+      "epoch": 0.21894,
+      "grad_norm": 1.185436337982168,
+      "learning_rate": 0.003,
+      "loss": 3.9725,
+      "step": 21894
+    },
+    {
+      "epoch": 0.21895,
+      "grad_norm": 1.1606511887153435,
+      "learning_rate": 0.003,
+      "loss": 3.9877,
+      "step": 21895
+    },
+    {
+      "epoch": 0.21896,
+      "grad_norm": 1.2776651688973892,
+      "learning_rate": 0.003,
+      "loss": 4.0122,
+      "step": 21896
+    },
+    {
+      "epoch": 0.21897,
+      "grad_norm": 1.6254167650798246,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 21897
+    },
+    {
+      "epoch": 0.21898,
+      "grad_norm": 1.1721704587577604,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 21898
+    },
+    {
+      "epoch": 0.21899,
+      "grad_norm": 1.5401132682470466,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 21899
+    },
+    {
+      "epoch": 0.219,
+      "grad_norm": 1.0673158640252267,
+      "learning_rate": 0.003,
+      "loss": 4.0214,
+      "step": 21900
+    },
+    {
+      "epoch": 0.21901,
+      "grad_norm": 1.487340788414237,
+      "learning_rate": 0.003,
+      "loss": 4.0075,
+      "step": 21901
+    },
+    {
+      "epoch": 0.21902,
+      "grad_norm": 1.1975640909149188,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 21902
+    },
+    {
+      "epoch": 0.21903,
+      "grad_norm": 1.3169445653765746,
+      "learning_rate": 0.003,
+      "loss": 4.0061,
+      "step": 21903
+    },
+    {
+      "epoch": 0.21904,
+      "grad_norm": 1.4526398213406417,
+      "learning_rate": 0.003,
+      "loss": 4.0012,
+      "step": 21904
+    },
+    {
+      "epoch": 0.21905,
+      "grad_norm": 1.3010281372906871,
+      "learning_rate": 0.003,
+      "loss": 3.9999,
+      "step": 21905
+    },
+    {
+      "epoch": 0.21906,
+      "grad_norm": 1.3601499751659736,
+      "learning_rate": 0.003,
+      "loss": 4.0047,
+      "step": 21906
+    },
+    {
+      "epoch": 0.21907,
+      "grad_norm": 1.2226197823039024,
+      "learning_rate": 0.003,
+      "loss": 3.9939,
+      "step": 21907
+    },
+    {
+      "epoch": 0.21908,
+      "grad_norm": 1.2856416207852328,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 21908
+    },
+    {
+      "epoch": 0.21909,
+      "grad_norm": 1.1815041754329354,
+      "learning_rate": 0.003,
+      "loss": 3.9859,
+      "step": 21909
+    },
+    {
+      "epoch": 0.2191,
+      "grad_norm": 1.2773475543260076,
+      "learning_rate": 0.003,
+      "loss": 4.0363,
+      "step": 21910
+    },
+    {
+      "epoch": 0.21911,
+      "grad_norm": 1.2078028060180195,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 21911
+    },
+    {
+      "epoch": 0.21912,
+      "grad_norm": 1.2428544505529202,
+      "learning_rate": 0.003,
+      "loss": 4.0041,
+      "step": 21912
+    },
+    {
+      "epoch": 0.21913,
+      "grad_norm": 1.2214572033876574,
+      "learning_rate": 0.003,
+      "loss": 4.005,
+      "step": 21913
+    },
+    {
+      "epoch": 0.21914,
+      "grad_norm": 1.7431740224135908,
+      "learning_rate": 0.003,
+      "loss": 4.0315,
+      "step": 21914
+    },
+    {
+      "epoch": 0.21915,
+      "grad_norm": 1.1040982132724255,
+      "learning_rate": 0.003,
+      "loss": 4.0333,
+      "step": 21915
+    },
+    {
+      "epoch": 0.21916,
+      "grad_norm": 1.19844725030282,
+      "learning_rate": 0.003,
+      "loss": 4.0252,
+      "step": 21916
+    },
+    {
+      "epoch": 0.21917,
+      "grad_norm": 1.2343599841127029,
+      "learning_rate": 0.003,
+      "loss": 4.02,
+      "step": 21917
+    },
+    {
+      "epoch": 0.21918,
+      "grad_norm": 1.4708367502788375,
+      "learning_rate": 0.003,
+      "loss": 3.9997,
+      "step": 21918
+    },
+    {
+      "epoch": 0.21919,
+      "grad_norm": 1.1103392226320454,
+      "learning_rate": 0.003,
+      "loss": 4.0347,
+      "step": 21919
+    },
+    {
+      "epoch": 0.2192,
+      "grad_norm": 1.283644044947123,
+      "learning_rate": 0.003,
+      "loss": 3.9937,
+      "step": 21920
+    },
+    {
+      "epoch": 0.21921,
+      "grad_norm": 1.1248574422060396,
+      "learning_rate": 0.003,
+      "loss": 3.9806,
+      "step": 21921
+    },
+    {
+      "epoch": 0.21922,
+      "grad_norm": 1.5172633901063053,
+      "learning_rate": 0.003,
+      "loss": 3.997,
+      "step": 21922
+    },
+    {
+      "epoch": 0.21923,
+      "grad_norm": 1.2789310587193718,
+      "learning_rate": 0.003,
+      "loss": 4.0015,
+      "step": 21923
+    },
+    {
+      "epoch": 0.21924,
+      "grad_norm": 1.1568626132520783,
+      "learning_rate": 0.003,
+      "loss": 4.0379,
+      "step": 21924
+    },
+    {
+      "epoch": 0.21925,
+      "grad_norm": 1.3820980685709467,
+      "learning_rate": 0.003,
+      "loss": 4.0301,
+      "step": 21925
+    },
+    {
+      "epoch": 0.21926,
+      "grad_norm": 1.026116399024905,
+      "learning_rate": 0.003,
+      "loss": 3.992,
+      "step": 21926
+    },
+    {
+      "epoch": 0.21927,
+      "grad_norm": 1.41140901858849,
+      "learning_rate": 0.003,
+      "loss": 4.0162,
+      "step": 21927
+    },
+    {
+      "epoch": 0.21928,
+      "grad_norm": 1.1782418701940927,
+      "learning_rate": 0.003,
+      "loss": 4.0345,
+      "step": 21928
+    },
+    {
+      "epoch": 0.21929,
+      "grad_norm": 1.6131010645869583,
+      "learning_rate": 0.003,
+      "loss": 4.0482,
+      "step": 21929
+    },
+    {
+      "epoch": 0.2193,
+      "grad_norm": 1.0560398092073782,
+      "learning_rate": 0.003,
+      "loss": 4.0018,
+      "step": 21930
+    },
+    {
+      "epoch": 0.21931,
+      "grad_norm": 1.6343082471022763,
+      "learning_rate": 0.003,
+      "loss": 4.0278,
+      "step": 21931
+    },
+    {
+      "epoch": 0.21932,
+      "grad_norm": 1.0506307303060547,
+      "learning_rate": 0.003,
+      "loss": 4.0006,
+      "step": 21932
+    },
+    {
+      "epoch": 0.21933,
+      "grad_norm": 1.4997533694840168,
+      "learning_rate": 0.003,
+      "loss": 4.0265,
+      "step": 21933
+    },
+    {
+      "epoch": 0.21934,
+      "grad_norm": 1.3167061695979907,
+      "learning_rate": 0.003,
+      "loss": 4.0282,
+      "step": 21934
+    },
+    {
+      "epoch": 0.21935,
+      "grad_norm": 1.4593656972207476,
+      "learning_rate": 0.003,
+      "loss": 4.0098,
+      "step": 21935
+    },
+    {
+      "epoch": 0.21936,
+      "grad_norm": 1.097834867590188,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 21936
+    },
+    {
+      "epoch": 0.21937,
+      "grad_norm": 1.2185424182037174,
+      "learning_rate": 0.003,
+      "loss": 3.991,
+      "step": 21937
+    },
+    {
+      "epoch": 0.21938,
+      "grad_norm": 1.2895317110699231,
+      "learning_rate": 0.003,
+      "loss": 4.0508,
+      "step": 21938
+    },
+    {
+      "epoch": 0.21939,
+      "grad_norm": 1.290684280947615,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 21939
+    },
+    {
+      "epoch": 0.2194,
+      "grad_norm": 1.3139205404251693,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 21940
+    },
+    {
+      "epoch": 0.21941,
+      "grad_norm": 1.4452133210796771,
+      "learning_rate": 0.003,
+      "loss": 4.0484,
+      "step": 21941
+    },
+    {
+      "epoch": 0.21942,
+      "grad_norm": 1.3303495579649562,
+      "learning_rate": 0.003,
+      "loss": 4.0007,
+      "step": 21942
+    },
+    {
+      "epoch": 0.21943,
+      "grad_norm": 1.2697916231419495,
+      "learning_rate": 0.003,
+      "loss": 3.986,
+      "step": 21943
+    },
+    {
+      "epoch": 0.21944,
+      "grad_norm": 1.1408443244630757,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 21944
+    },
+    {
+      "epoch": 0.21945,
+      "grad_norm": 1.2600971499646672,
+      "learning_rate": 0.003,
+      "loss": 3.9955,
+      "step": 21945
+    },
+    {
+      "epoch": 0.21946,
+      "grad_norm": 1.3098347228103253,
+      "learning_rate": 0.003,
+      "loss": 3.9972,
+      "step": 21946
+    },
+    {
+      "epoch": 0.21947,
+      "grad_norm": 1.4513435483654045,
+      "learning_rate": 0.003,
+      "loss": 4.0301,
+      "step": 21947
+    },
+    {
+      "epoch": 0.21948,
+      "grad_norm": 1.200830825264696,
+      "learning_rate": 0.003,
+      "loss": 4.0069,
+      "step": 21948
+    },
+    {
+      "epoch": 0.21949,
+      "grad_norm": 1.3509222517674822,
+      "learning_rate": 0.003,
+      "loss": 4.0134,
+      "step": 21949
+    },
+    {
+      "epoch": 0.2195,
+      "grad_norm": 1.1395968903810056,
+      "learning_rate": 0.003,
+      "loss": 3.9766,
+      "step": 21950
+    },
+    {
+      "epoch": 0.21951,
+      "grad_norm": 1.5728761200389048,
+      "learning_rate": 0.003,
+      "loss": 3.9877,
+      "step": 21951
+    },
+    {
+      "epoch": 0.21952,
+      "grad_norm": 1.1363787087207766,
+      "learning_rate": 0.003,
+      "loss": 4.0005,
+      "step": 21952
+    },
+    {
+      "epoch": 0.21953,
+      "grad_norm": 1.3577314199272705,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 21953
+    },
+    {
+      "epoch": 0.21954,
+      "grad_norm": 1.2500914517142754,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 21954
+    },
+    {
+      "epoch": 0.21955,
+      "grad_norm": 1.4603544818831204,
+      "learning_rate": 0.003,
+      "loss": 4.0104,
+      "step": 21955
+    },
+    {
+      "epoch": 0.21956,
+      "grad_norm": 1.1745907106598832,
+      "learning_rate": 0.003,
+      "loss": 4.006,
+      "step": 21956
+    },
+    {
+      "epoch": 0.21957,
+      "grad_norm": 1.186160508033941,
+      "learning_rate": 0.003,
+      "loss": 3.9874,
+      "step": 21957
+    },
+    {
+      "epoch": 0.21958,
+      "grad_norm": 1.2368320513108695,
+      "learning_rate": 0.003,
+      "loss": 3.998,
+      "step": 21958
+    },
+    {
+      "epoch": 0.21959,
+      "grad_norm": 1.4236076051226265,
+      "learning_rate": 0.003,
+      "loss": 3.9966,
+      "step": 21959
+    },
+    {
+      "epoch": 0.2196,
+      "grad_norm": 1.1865289567557014,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 21960
+    },
+    {
+      "epoch": 0.21961,
+      "grad_norm": 1.2546481777956995,
+      "learning_rate": 0.003,
+      "loss": 4.0286,
+      "step": 21961
+    },
+    {
+      "epoch": 0.21962,
+      "grad_norm": 1.345252889425688,
+      "learning_rate": 0.003,
+      "loss": 4.0061,
+      "step": 21962
+    },
+    {
+      "epoch": 0.21963,
+      "grad_norm": 1.3627518014062117,
+      "learning_rate": 0.003,
+      "loss": 4.0264,
+      "step": 21963
+    },
+    {
+      "epoch": 0.21964,
+      "grad_norm": 1.1712241091592825,
+      "learning_rate": 0.003,
+      "loss": 3.9769,
+      "step": 21964
+    },
+    {
+      "epoch": 0.21965,
+      "grad_norm": 1.513457301552111,
+      "learning_rate": 0.003,
+      "loss": 3.9868,
+      "step": 21965
+    },
+    {
+      "epoch": 0.21966,
+      "grad_norm": 1.163463755306292,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 21966
+    },
+    {
+      "epoch": 0.21967,
+      "grad_norm": 1.2003450971786878,
+      "learning_rate": 0.003,
+      "loss": 4.0035,
+      "step": 21967
+    },
+    {
+      "epoch": 0.21968,
+      "grad_norm": 1.1467509574049393,
+      "learning_rate": 0.003,
+      "loss": 3.9853,
+      "step": 21968
+    },
+    {
+      "epoch": 0.21969,
+      "grad_norm": 1.379864497984008,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 21969
+    },
+    {
+      "epoch": 0.2197,
+      "grad_norm": 1.2584318634962848,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 21970
+    },
+    {
+      "epoch": 0.21971,
+      "grad_norm": 1.3972654983741013,
+      "learning_rate": 0.003,
+      "loss": 4.0096,
+      "step": 21971
+    },
+    {
+      "epoch": 0.21972,
+      "grad_norm": 1.4502015031139905,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 21972
+    },
+    {
+      "epoch": 0.21973,
+      "grad_norm": 1.138620910620182,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 21973
+    },
+    {
+      "epoch": 0.21974,
+      "grad_norm": 1.4293096222523891,
+      "learning_rate": 0.003,
+      "loss": 4.0457,
+      "step": 21974
+    },
+    {
+      "epoch": 0.21975,
+      "grad_norm": 1.3330163131755468,
+      "learning_rate": 0.003,
+      "loss": 3.9786,
+      "step": 21975
+    },
+    {
+      "epoch": 0.21976,
+      "grad_norm": 1.4237210006242238,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 21976
+    },
+    {
+      "epoch": 0.21977,
+      "grad_norm": 1.495596782642265,
+      "learning_rate": 0.003,
+      "loss": 4.0382,
+      "step": 21977
+    },
+    {
+      "epoch": 0.21978,
+      "grad_norm": 1.1220260672495146,
+      "learning_rate": 0.003,
+      "loss": 4.0363,
+      "step": 21978
+    },
+    {
+      "epoch": 0.21979,
+      "grad_norm": 1.2442392075787911,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 21979
+    },
+    {
+      "epoch": 0.2198,
+      "grad_norm": 1.3444246511124354,
+      "learning_rate": 0.003,
+      "loss": 3.9896,
+      "step": 21980
+    },
+    {
+      "epoch": 0.21981,
+      "grad_norm": 1.1452307342541062,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 21981
+    },
+    {
+      "epoch": 0.21982,
+      "grad_norm": 1.3242455461598135,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 21982
+    },
+    {
+      "epoch": 0.21983,
+      "grad_norm": 1.369131315266482,
+      "learning_rate": 0.003,
+      "loss": 4.0257,
+      "step": 21983
+    },
+    {
+      "epoch": 0.21984,
+      "grad_norm": 1.2316747685982936,
+      "learning_rate": 0.003,
+      "loss": 4.0129,
+      "step": 21984
+    },
+    {
+      "epoch": 0.21985,
+      "grad_norm": 1.1951326751468154,
+      "learning_rate": 0.003,
+      "loss": 4.0154,
+      "step": 21985
+    },
+    {
+      "epoch": 0.21986,
+      "grad_norm": 1.37377002846383,
+      "learning_rate": 0.003,
+      "loss": 4.0254,
+      "step": 21986
+    },
+    {
+      "epoch": 0.21987,
+      "grad_norm": 1.0896226162143197,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 21987
+    },
+    {
+      "epoch": 0.21988,
+      "grad_norm": 1.2639202795280942,
+      "learning_rate": 0.003,
+      "loss": 4.0015,
+      "step": 21988
+    },
+    {
+      "epoch": 0.21989,
+      "grad_norm": 1.282674369063659,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 21989
+    },
+    {
+      "epoch": 0.2199,
+      "grad_norm": 1.2875758150173855,
+      "learning_rate": 0.003,
+      "loss": 4.0146,
+      "step": 21990
+    },
+    {
+      "epoch": 0.21991,
+      "grad_norm": 1.385651012209087,
+      "learning_rate": 0.003,
+      "loss": 4.0111,
+      "step": 21991
+    },
+    {
+      "epoch": 0.21992,
+      "grad_norm": 1.347858184909434,
+      "learning_rate": 0.003,
+      "loss": 3.9957,
+      "step": 21992
+    },
+    {
+      "epoch": 0.21993,
+      "grad_norm": 1.3305111756630719,
+      "learning_rate": 0.003,
+      "loss": 3.9827,
+      "step": 21993
+    },
+    {
+      "epoch": 0.21994,
+      "grad_norm": 1.3283474624992067,
+      "learning_rate": 0.003,
+      "loss": 3.9912,
+      "step": 21994
+    },
+    {
+      "epoch": 0.21995,
+      "grad_norm": 1.3826047219227724,
+      "learning_rate": 0.003,
+      "loss": 3.9876,
+      "step": 21995
+    },
+    {
+      "epoch": 0.21996,
+      "grad_norm": 1.0951952990769498,
+      "learning_rate": 0.003,
+      "loss": 4.0171,
+      "step": 21996
+    },
+    {
+      "epoch": 0.21997,
+      "grad_norm": 1.507892777333911,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 21997
+    },
+    {
+      "epoch": 0.21998,
+      "grad_norm": 1.0354754259079926,
+      "learning_rate": 0.003,
+      "loss": 3.9935,
+      "step": 21998
+    },
+    {
+      "epoch": 0.21999,
+      "grad_norm": 1.5380320213563503,
+      "learning_rate": 0.003,
+      "loss": 4.0063,
+      "step": 21999
+    },
+    {
+      "epoch": 0.22,
+      "grad_norm": 1.0189092873836487,
+      "learning_rate": 0.003,
+      "loss": 4.0027,
+      "step": 22000
+    },
+    {
+      "epoch": 0.22001,
+      "grad_norm": 1.428867686943894,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 22001
+    },
+    {
+      "epoch": 0.22002,
+      "grad_norm": 1.2319500366647884,
+      "learning_rate": 0.003,
+      "loss": 4.0216,
+      "step": 22002
+    },
+    {
+      "epoch": 0.22003,
+      "grad_norm": 1.2633050943212367,
+      "learning_rate": 0.003,
+      "loss": 4.0303,
+      "step": 22003
+    },
+    {
+      "epoch": 0.22004,
+      "grad_norm": 1.2948771278466273,
+      "learning_rate": 0.003,
+      "loss": 4.0089,
+      "step": 22004
+    },
+    {
+      "epoch": 0.22005,
+      "grad_norm": 1.1374450186578489,
+      "learning_rate": 0.003,
+      "loss": 4.0155,
+      "step": 22005
+    },
+    {
+      "epoch": 0.22006,
+      "grad_norm": 1.5237268382103961,
+      "learning_rate": 0.003,
+      "loss": 4.012,
+      "step": 22006
+    },
+    {
+      "epoch": 0.22007,
+      "grad_norm": 1.082636516090062,
+      "learning_rate": 0.003,
+      "loss": 3.9862,
+      "step": 22007
+    },
+    {
+      "epoch": 0.22008,
+      "grad_norm": 1.5706525964547133,
+      "learning_rate": 0.003,
+      "loss": 3.9941,
+      "step": 22008
+    },
+    {
+      "epoch": 0.22009,
+      "grad_norm": 1.0855090068923505,
+      "learning_rate": 0.003,
+      "loss": 3.9855,
+      "step": 22009
+    },
+    {
+      "epoch": 0.2201,
+      "grad_norm": 1.35454420551543,
+      "learning_rate": 0.003,
+      "loss": 4.0018,
+      "step": 22010
+    },
+    {
+      "epoch": 0.22011,
+      "grad_norm": 1.3648919229378953,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 22011
+    },
+    {
+      "epoch": 0.22012,
+      "grad_norm": 1.154120608239134,
+      "learning_rate": 0.003,
+      "loss": 4.0039,
+      "step": 22012
+    },
+    {
+      "epoch": 0.22013,
+      "grad_norm": 1.2865616699364808,
+      "learning_rate": 0.003,
+      "loss": 3.9871,
+      "step": 22013
+    },
+    {
+      "epoch": 0.22014,
+      "grad_norm": 1.1980781058670849,
+      "learning_rate": 0.003,
+      "loss": 3.9817,
+      "step": 22014
+    },
+    {
+      "epoch": 0.22015,
+      "grad_norm": 1.41767142889125,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 22015
+    },
+    {
+      "epoch": 0.22016,
+      "grad_norm": 1.1131704003436644,
+      "learning_rate": 0.003,
+      "loss": 4.0223,
+      "step": 22016
+    },
+    {
+      "epoch": 0.22017,
+      "grad_norm": 1.6066146510173955,
+      "learning_rate": 0.003,
+      "loss": 4.0228,
+      "step": 22017
+    },
+    {
+      "epoch": 0.22018,
+      "grad_norm": 1.004241072782212,
+      "learning_rate": 0.003,
+      "loss": 4.0144,
+      "step": 22018
+    },
+    {
+      "epoch": 0.22019,
+      "grad_norm": 1.5176738872804638,
+      "learning_rate": 0.003,
+      "loss": 4.005,
+      "step": 22019
+    },
+    {
+      "epoch": 0.2202,
+      "grad_norm": 1.0081686885092411,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 22020
+    },
+    {
+      "epoch": 0.22021,
+      "grad_norm": 1.829997962581874,
+      "learning_rate": 0.003,
+      "loss": 4.056,
+      "step": 22021
+    },
+    {
+      "epoch": 0.22022,
+      "grad_norm": 1.0599632890880404,
+      "learning_rate": 0.003,
+      "loss": 4.002,
+      "step": 22022
+    },
+    {
+      "epoch": 0.22023,
+      "grad_norm": 1.4429083557713775,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 22023
+    },
+    {
+      "epoch": 0.22024,
+      "grad_norm": 1.2221930211758043,
+      "learning_rate": 0.003,
+      "loss": 4.0317,
+      "step": 22024
+    },
+    {
+      "epoch": 0.22025,
+      "grad_norm": 1.4593121814203809,
+      "learning_rate": 0.003,
+      "loss": 3.999,
+      "step": 22025
+    },
+    {
+      "epoch": 0.22026,
+      "grad_norm": 1.2221895672813916,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 22026
+    },
+    {
+      "epoch": 0.22027,
+      "grad_norm": 1.344793569849184,
+      "learning_rate": 0.003,
+      "loss": 4.0358,
+      "step": 22027
+    },
+    {
+      "epoch": 0.22028,
+      "grad_norm": 1.1483371112571372,
+      "learning_rate": 0.003,
+      "loss": 4.0276,
+      "step": 22028
+    },
+    {
+      "epoch": 0.22029,
+      "grad_norm": 1.2909013130117,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 22029
+    },
+    {
+      "epoch": 0.2203,
+      "grad_norm": 1.2381653001564616,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 22030
+    },
+    {
+      "epoch": 0.22031,
+      "grad_norm": 1.1142173992589275,
+      "learning_rate": 0.003,
+      "loss": 3.9963,
+      "step": 22031
+    },
+    {
+      "epoch": 0.22032,
+      "grad_norm": 1.2770716216052906,
+      "learning_rate": 0.003,
+      "loss": 4.0312,
+      "step": 22032
+    },
+    {
+      "epoch": 0.22033,
+      "grad_norm": 1.209833031762697,
+      "learning_rate": 0.003,
+      "loss": 4.0245,
+      "step": 22033
+    },
+    {
+      "epoch": 0.22034,
+      "grad_norm": 1.5022570969983324,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 22034
+    },
+    {
+      "epoch": 0.22035,
+      "grad_norm": 1.1249020633312756,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 22035
+    },
+    {
+      "epoch": 0.22036,
+      "grad_norm": 1.4148394651843306,
+      "learning_rate": 0.003,
+      "loss": 4.0318,
+      "step": 22036
+    },
+    {
+      "epoch": 0.22037,
+      "grad_norm": 1.2001685633940893,
+      "learning_rate": 0.003,
+      "loss": 4.0237,
+      "step": 22037
+    },
+    {
+      "epoch": 0.22038,
+      "grad_norm": 1.3269535278966025,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 22038
+    },
+    {
+      "epoch": 0.22039,
+      "grad_norm": 1.326508971574058,
+      "learning_rate": 0.003,
+      "loss": 4.0415,
+      "step": 22039
+    },
+    {
+      "epoch": 0.2204,
+      "grad_norm": 1.1317059274603851,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 22040
+    },
+    {
+      "epoch": 0.22041,
+      "grad_norm": 1.3356944049515387,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 22041
+    },
+    {
+      "epoch": 0.22042,
+      "grad_norm": 1.2500601555558384,
+      "learning_rate": 0.003,
+      "loss": 4.02,
+      "step": 22042
+    },
+    {
+      "epoch": 0.22043,
+      "grad_norm": 1.4474729734872778,
+      "learning_rate": 0.003,
+      "loss": 4.0113,
+      "step": 22043
+    },
+    {
+      "epoch": 0.22044,
+      "grad_norm": 1.1308063346974266,
+      "learning_rate": 0.003,
+      "loss": 4.0064,
+      "step": 22044
+    },
+    {
+      "epoch": 0.22045,
+      "grad_norm": 1.179501885486384,
+      "learning_rate": 0.003,
+      "loss": 4.0173,
+      "step": 22045
+    },
+    {
+      "epoch": 0.22046,
+      "grad_norm": 1.4908231194552612,
+      "learning_rate": 0.003,
+      "loss": 3.9882,
+      "step": 22046
+    },
+    {
+      "epoch": 0.22047,
+      "grad_norm": 1.2593301287164498,
+      "learning_rate": 0.003,
+      "loss": 4.0163,
+      "step": 22047
+    },
+    {
+      "epoch": 0.22048,
+      "grad_norm": 1.1785909742470038,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 22048
+    },
+    {
+      "epoch": 0.22049,
+      "grad_norm": 1.533697748210594,
+      "learning_rate": 0.003,
+      "loss": 3.9951,
+      "step": 22049
+    },
+    {
+      "epoch": 0.2205,
+      "grad_norm": 1.1438233345486835,
+      "learning_rate": 0.003,
+      "loss": 3.9774,
+      "step": 22050
+    },
+    {
+      "epoch": 0.22051,
+      "grad_norm": 1.7867501145052462,
+      "learning_rate": 0.003,
+      "loss": 3.9974,
+      "step": 22051
+    },
+    {
+      "epoch": 0.22052,
+      "grad_norm": 1.1073476691169464,
+      "learning_rate": 0.003,
+      "loss": 4.0019,
+      "step": 22052
+    },
+    {
+      "epoch": 0.22053,
+      "grad_norm": 1.4405470229714454,
+      "learning_rate": 0.003,
+      "loss": 4.0019,
+      "step": 22053
+    },
+    {
+      "epoch": 0.22054,
+      "grad_norm": 1.2914641582535333,
+      "learning_rate": 0.003,
+      "loss": 4.0037,
+      "step": 22054
+    },
+    {
+      "epoch": 0.22055,
+      "grad_norm": 1.253860833487621,
+      "learning_rate": 0.003,
+      "loss": 3.9725,
+      "step": 22055
+    },
+    {
+      "epoch": 0.22056,
+      "grad_norm": 1.0641652904146015,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 22056
+    },
+    {
+      "epoch": 0.22057,
+      "grad_norm": 1.4921300066638792,
+      "learning_rate": 0.003,
+      "loss": 4.0313,
+      "step": 22057
+    },
+    {
+      "epoch": 0.22058,
+      "grad_norm": 1.3939940781227167,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 22058
+    },
+    {
+      "epoch": 0.22059,
+      "grad_norm": 1.2571460522275353,
+      "learning_rate": 0.003,
+      "loss": 3.9829,
+      "step": 22059
+    },
+    {
+      "epoch": 0.2206,
+      "grad_norm": 1.4302707339553673,
+      "learning_rate": 0.003,
+      "loss": 4.0258,
+      "step": 22060
+    },
+    {
+      "epoch": 0.22061,
+      "grad_norm": 1.2013298047046455,
+      "learning_rate": 0.003,
+      "loss": 3.9892,
+      "step": 22061
+    },
+    {
+      "epoch": 0.22062,
+      "grad_norm": 1.2486095972091802,
+      "learning_rate": 0.003,
+      "loss": 3.9806,
+      "step": 22062
+    },
+    {
+      "epoch": 0.22063,
+      "grad_norm": 1.1879590683349075,
+      "learning_rate": 0.003,
+      "loss": 3.9915,
+      "step": 22063
+    },
+    {
+      "epoch": 0.22064,
+      "grad_norm": 1.2464384303792504,
+      "learning_rate": 0.003,
+      "loss": 4.0171,
+      "step": 22064
+    },
+    {
+      "epoch": 0.22065,
+      "grad_norm": 1.197677070742028,
+      "learning_rate": 0.003,
+      "loss": 3.9662,
+      "step": 22065
+    },
+    {
+      "epoch": 0.22066,
+      "grad_norm": 1.4446443900954185,
+      "learning_rate": 0.003,
+      "loss": 3.9754,
+      "step": 22066
+    },
+    {
+      "epoch": 0.22067,
+      "grad_norm": 1.1333958047741617,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 22067
+    },
+    {
+      "epoch": 0.22068,
+      "grad_norm": 1.4235986061193857,
+      "learning_rate": 0.003,
+      "loss": 3.982,
+      "step": 22068
+    },
+    {
+      "epoch": 0.22069,
+      "grad_norm": 1.2465562007016322,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 22069
+    },
+    {
+      "epoch": 0.2207,
+      "grad_norm": 1.362587303234457,
+      "learning_rate": 0.003,
+      "loss": 4.0026,
+      "step": 22070
+    },
+    {
+      "epoch": 0.22071,
+      "grad_norm": 1.155060477336059,
+      "learning_rate": 0.003,
+      "loss": 4.0184,
+      "step": 22071
+    },
+    {
+      "epoch": 0.22072,
+      "grad_norm": 1.1970466312434473,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 22072
+    },
+    {
+      "epoch": 0.22073,
+      "grad_norm": 1.3725158353516962,
+      "learning_rate": 0.003,
+      "loss": 4.0265,
+      "step": 22073
+    },
+    {
+      "epoch": 0.22074,
+      "grad_norm": 1.1690013901086118,
+      "learning_rate": 0.003,
+      "loss": 4.0036,
+      "step": 22074
+    },
+    {
+      "epoch": 0.22075,
+      "grad_norm": 1.3394892608775293,
+      "learning_rate": 0.003,
+      "loss": 3.9923,
+      "step": 22075
+    },
+    {
+      "epoch": 0.22076,
+      "grad_norm": 1.091080000041114,
+      "learning_rate": 0.003,
+      "loss": 4.0363,
+      "step": 22076
+    },
+    {
+      "epoch": 0.22077,
+      "grad_norm": 1.6062558864733216,
+      "learning_rate": 0.003,
+      "loss": 4.0098,
+      "step": 22077
+    },
+    {
+      "epoch": 0.22078,
+      "grad_norm": 0.9616858561917966,
+      "learning_rate": 0.003,
+      "loss": 3.9977,
+      "step": 22078
+    },
+    {
+      "epoch": 0.22079,
+      "grad_norm": 1.4179928361291607,
+      "learning_rate": 0.003,
+      "loss": 4.0222,
+      "step": 22079
+    },
+    {
+      "epoch": 0.2208,
+      "grad_norm": 1.1139799121889618,
+      "learning_rate": 0.003,
+      "loss": 4.0134,
+      "step": 22080
+    },
+    {
+      "epoch": 0.22081,
+      "grad_norm": 1.6243199534585553,
+      "learning_rate": 0.003,
+      "loss": 4.0234,
+      "step": 22081
+    },
+    {
+      "epoch": 0.22082,
+      "grad_norm": 0.9873636842267688,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 22082
+    },
+    {
+      "epoch": 0.22083,
+      "grad_norm": 1.56150130901402,
+      "learning_rate": 0.003,
+      "loss": 4.0162,
+      "step": 22083
+    },
+    {
+      "epoch": 0.22084,
+      "grad_norm": 1.0954644992654325,
+      "learning_rate": 0.003,
+      "loss": 3.9922,
+      "step": 22084
+    },
+    {
+      "epoch": 0.22085,
+      "grad_norm": 1.1524334734338175,
+      "learning_rate": 0.003,
+      "loss": 4.0092,
+      "step": 22085
+    },
+    {
+      "epoch": 0.22086,
+      "grad_norm": 1.332106863321241,
+      "learning_rate": 0.003,
+      "loss": 4.0371,
+      "step": 22086
+    },
+    {
+      "epoch": 0.22087,
+      "grad_norm": 1.1886022249334833,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 22087
+    },
+    {
+      "epoch": 0.22088,
+      "grad_norm": 1.4093190078874978,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 22088
+    },
+    {
+      "epoch": 0.22089,
+      "grad_norm": 1.0887333937037862,
+      "learning_rate": 0.003,
+      "loss": 4.0285,
+      "step": 22089
+    },
+    {
+      "epoch": 0.2209,
+      "grad_norm": 1.2384187505461124,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 22090
+    },
+    {
+      "epoch": 0.22091,
+      "grad_norm": 1.447501775582183,
+      "learning_rate": 0.003,
+      "loss": 4.0155,
+      "step": 22091
+    },
+    {
+      "epoch": 0.22092,
+      "grad_norm": 1.3666184100352485,
+      "learning_rate": 0.003,
+      "loss": 4.0186,
+      "step": 22092
+    },
+    {
+      "epoch": 0.22093,
+      "grad_norm": 1.2118748400837311,
+      "learning_rate": 0.003,
+      "loss": 3.9962,
+      "step": 22093
+    },
+    {
+      "epoch": 0.22094,
+      "grad_norm": 1.40396169926044,
+      "learning_rate": 0.003,
+      "loss": 4.0035,
+      "step": 22094
+    },
+    {
+      "epoch": 0.22095,
+      "grad_norm": 1.2275023324229626,
+      "learning_rate": 0.003,
+      "loss": 3.9767,
+      "step": 22095
+    },
+    {
+      "epoch": 0.22096,
+      "grad_norm": 1.2382014386455995,
+      "learning_rate": 0.003,
+      "loss": 4.0366,
+      "step": 22096
+    },
+    {
+      "epoch": 0.22097,
+      "grad_norm": 1.177756747160481,
+      "learning_rate": 0.003,
+      "loss": 3.9901,
+      "step": 22097
+    },
+    {
+      "epoch": 0.22098,
+      "grad_norm": 1.3808222254990776,
+      "learning_rate": 0.003,
+      "loss": 3.9827,
+      "step": 22098
+    },
+    {
+      "epoch": 0.22099,
+      "grad_norm": 1.2007472942782924,
+      "learning_rate": 0.003,
+      "loss": 3.983,
+      "step": 22099
+    },
+    {
+      "epoch": 0.221,
+      "grad_norm": 1.3833069265795677,
+      "learning_rate": 0.003,
+      "loss": 4.0129,
+      "step": 22100
+    },
+    {
+      "epoch": 0.22101,
+      "grad_norm": 1.323620324791774,
+      "learning_rate": 0.003,
+      "loss": 4.021,
+      "step": 22101
+    },
+    {
+      "epoch": 0.22102,
+      "grad_norm": 1.4934409918096088,
+      "learning_rate": 0.003,
+      "loss": 4.0023,
+      "step": 22102
+    },
+    {
+      "epoch": 0.22103,
+      "grad_norm": 1.238598886669831,
+      "learning_rate": 0.003,
+      "loss": 4.0275,
+      "step": 22103
+    },
+    {
+      "epoch": 0.22104,
+      "grad_norm": 1.3404517366558169,
+      "learning_rate": 0.003,
+      "loss": 4.0277,
+      "step": 22104
+    },
+    {
+      "epoch": 0.22105,
+      "grad_norm": 1.0680523287474042,
+      "learning_rate": 0.003,
+      "loss": 3.9967,
+      "step": 22105
+    },
+    {
+      "epoch": 0.22106,
+      "grad_norm": 1.2274922701685616,
+      "learning_rate": 0.003,
+      "loss": 3.9961,
+      "step": 22106
+    },
+    {
+      "epoch": 0.22107,
+      "grad_norm": 1.4203153522250276,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 22107
+    },
+    {
+      "epoch": 0.22108,
+      "grad_norm": 1.1948585633357327,
+      "learning_rate": 0.003,
+      "loss": 3.9865,
+      "step": 22108
+    },
+    {
+      "epoch": 0.22109,
+      "grad_norm": 1.4540983594912313,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 22109
+    },
+    {
+      "epoch": 0.2211,
+      "grad_norm": 1.1110510409353458,
+      "learning_rate": 0.003,
+      "loss": 4.012,
+      "step": 22110
+    },
+    {
+      "epoch": 0.22111,
+      "grad_norm": 1.4974533411866173,
+      "learning_rate": 0.003,
+      "loss": 3.9782,
+      "step": 22111
+    },
+    {
+      "epoch": 0.22112,
+      "grad_norm": 1.133514273375061,
+      "learning_rate": 0.003,
+      "loss": 3.9865,
+      "step": 22112
+    },
+    {
+      "epoch": 0.22113,
+      "grad_norm": 1.3843747521593133,
+      "learning_rate": 0.003,
+      "loss": 4.0229,
+      "step": 22113
+    },
+    {
+      "epoch": 0.22114,
+      "grad_norm": 1.1550535446032153,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 22114
+    },
+    {
+      "epoch": 0.22115,
+      "grad_norm": 1.424895412759572,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 22115
+    },
+    {
+      "epoch": 0.22116,
+      "grad_norm": 1.27303597107666,
+      "learning_rate": 0.003,
+      "loss": 3.9973,
+      "step": 22116
+    },
+    {
+      "epoch": 0.22117,
+      "grad_norm": 1.3142444757748482,
+      "learning_rate": 0.003,
+      "loss": 3.9974,
+      "step": 22117
+    },
+    {
+      "epoch": 0.22118,
+      "grad_norm": 1.3088368549729987,
+      "learning_rate": 0.003,
+      "loss": 4.0437,
+      "step": 22118
+    },
+    {
+      "epoch": 0.22119,
+      "grad_norm": 1.2523343040017494,
+      "learning_rate": 0.003,
+      "loss": 4.0155,
+      "step": 22119
+    },
+    {
+      "epoch": 0.2212,
+      "grad_norm": 1.5514376340553981,
+      "learning_rate": 0.003,
+      "loss": 4.0328,
+      "step": 22120
+    },
+    {
+      "epoch": 0.22121,
+      "grad_norm": 1.1015771541196764,
+      "learning_rate": 0.003,
+      "loss": 4.0073,
+      "step": 22121
+    },
+    {
+      "epoch": 0.22122,
+      "grad_norm": 1.4885916555454042,
+      "learning_rate": 0.003,
+      "loss": 3.9951,
+      "step": 22122
+    },
+    {
+      "epoch": 0.22123,
+      "grad_norm": 1.0027423773501605,
+      "learning_rate": 0.003,
+      "loss": 3.9789,
+      "step": 22123
+    },
+    {
+      "epoch": 0.22124,
+      "grad_norm": 1.5194912381275527,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 22124
+    },
+    {
+      "epoch": 0.22125,
+      "grad_norm": 1.1701169754673186,
+      "learning_rate": 0.003,
+      "loss": 4.0111,
+      "step": 22125
+    },
+    {
+      "epoch": 0.22126,
+      "grad_norm": 1.5434868974489149,
+      "learning_rate": 0.003,
+      "loss": 4.0346,
+      "step": 22126
+    },
+    {
+      "epoch": 0.22127,
+      "grad_norm": 1.1107156008816752,
+      "learning_rate": 0.003,
+      "loss": 4.0277,
+      "step": 22127
+    },
+    {
+      "epoch": 0.22128,
+      "grad_norm": 1.5103690953821733,
+      "learning_rate": 0.003,
+      "loss": 4.0028,
+      "step": 22128
+    },
+    {
+      "epoch": 0.22129,
+      "grad_norm": 1.1579426881387118,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 22129
+    },
+    {
+      "epoch": 0.2213,
+      "grad_norm": 1.2785602163525396,
+      "learning_rate": 0.003,
+      "loss": 3.9959,
+      "step": 22130
+    },
+    {
+      "epoch": 0.22131,
+      "grad_norm": 1.0656242167117935,
+      "learning_rate": 0.003,
+      "loss": 3.983,
+      "step": 22131
+    },
+    {
+      "epoch": 0.22132,
+      "grad_norm": 1.2091553838694626,
+      "learning_rate": 0.003,
+      "loss": 4.0035,
+      "step": 22132
+    },
+    {
+      "epoch": 0.22133,
+      "grad_norm": 1.3345682331749695,
+      "learning_rate": 0.003,
+      "loss": 3.9975,
+      "step": 22133
+    },
+    {
+      "epoch": 0.22134,
+      "grad_norm": 1.1013115769098618,
+      "learning_rate": 0.003,
+      "loss": 3.9986,
+      "step": 22134
+    },
+    {
+      "epoch": 0.22135,
+      "grad_norm": 1.3904152512651053,
+      "learning_rate": 0.003,
+      "loss": 3.9911,
+      "step": 22135
+    },
+    {
+      "epoch": 0.22136,
+      "grad_norm": 1.1398734825583523,
+      "learning_rate": 0.003,
+      "loss": 4.0141,
+      "step": 22136
+    },
+    {
+      "epoch": 0.22137,
+      "grad_norm": 1.4053187470187192,
+      "learning_rate": 0.003,
+      "loss": 4.0098,
+      "step": 22137
+    },
+    {
+      "epoch": 0.22138,
+      "grad_norm": 1.1589072462895906,
+      "learning_rate": 0.003,
+      "loss": 3.9996,
+      "step": 22138
+    },
+    {
+      "epoch": 0.22139,
+      "grad_norm": 1.2989181399788257,
+      "learning_rate": 0.003,
+      "loss": 3.9986,
+      "step": 22139
+    },
+    {
+      "epoch": 0.2214,
+      "grad_norm": 1.355417899770067,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 22140
+    },
+    {
+      "epoch": 0.22141,
+      "grad_norm": 1.1364100260616194,
+      "learning_rate": 0.003,
+      "loss": 3.9785,
+      "step": 22141
+    },
+    {
+      "epoch": 0.22142,
+      "grad_norm": 1.3869143553853276,
+      "learning_rate": 0.003,
+      "loss": 4.0298,
+      "step": 22142
+    },
+    {
+      "epoch": 0.22143,
+      "grad_norm": 1.1324885204446864,
+      "learning_rate": 0.003,
+      "loss": 4.0188,
+      "step": 22143
+    },
+    {
+      "epoch": 0.22144,
+      "grad_norm": 1.4666090754290997,
+      "learning_rate": 0.003,
+      "loss": 4.0113,
+      "step": 22144
+    },
+    {
+      "epoch": 0.22145,
+      "grad_norm": 1.1745155965311151,
+      "learning_rate": 0.003,
+      "loss": 4.015,
+      "step": 22145
+    },
+    {
+      "epoch": 0.22146,
+      "grad_norm": 1.1350638436165652,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 22146
+    },
+    {
+      "epoch": 0.22147,
+      "grad_norm": 1.318922272426884,
+      "learning_rate": 0.003,
+      "loss": 4.0013,
+      "step": 22147
+    },
+    {
+      "epoch": 0.22148,
+      "grad_norm": 1.2489417760227715,
+      "learning_rate": 0.003,
+      "loss": 4.0043,
+      "step": 22148
+    },
+    {
+      "epoch": 0.22149,
+      "grad_norm": 1.4964421564797827,
+      "learning_rate": 0.003,
+      "loss": 4.0153,
+      "step": 22149
+    },
+    {
+      "epoch": 0.2215,
+      "grad_norm": 1.1549858738794154,
+      "learning_rate": 0.003,
+      "loss": 3.9718,
+      "step": 22150
+    },
+    {
+      "epoch": 0.22151,
+      "grad_norm": 1.275313692137715,
+      "learning_rate": 0.003,
+      "loss": 3.9954,
+      "step": 22151
+    },
+    {
+      "epoch": 0.22152,
+      "grad_norm": 1.3239559956324538,
+      "learning_rate": 0.003,
+      "loss": 4.0438,
+      "step": 22152
+    },
+    {
+      "epoch": 0.22153,
+      "grad_norm": 1.2073036732004683,
+      "learning_rate": 0.003,
+      "loss": 3.9944,
+      "step": 22153
+    },
+    {
+      "epoch": 0.22154,
+      "grad_norm": 1.2273812799478625,
+      "learning_rate": 0.003,
+      "loss": 4.002,
+      "step": 22154
+    },
+    {
+      "epoch": 0.22155,
+      "grad_norm": 1.3509851424095785,
+      "learning_rate": 0.003,
+      "loss": 4.0088,
+      "step": 22155
+    },
+    {
+      "epoch": 0.22156,
+      "grad_norm": 1.2691096441678182,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 22156
+    },
+    {
+      "epoch": 0.22157,
+      "grad_norm": 1.4805730315050365,
+      "learning_rate": 0.003,
+      "loss": 4.0282,
+      "step": 22157
+    },
+    {
+      "epoch": 0.22158,
+      "grad_norm": 1.4052486748891053,
+      "learning_rate": 0.003,
+      "loss": 4.0401,
+      "step": 22158
+    },
+    {
+      "epoch": 0.22159,
+      "grad_norm": 1.2545201023240475,
+      "learning_rate": 0.003,
+      "loss": 4.021,
+      "step": 22159
+    },
+    {
+      "epoch": 0.2216,
+      "grad_norm": 1.3631514115956413,
+      "learning_rate": 0.003,
+      "loss": 4.0123,
+      "step": 22160
+    },
+    {
+      "epoch": 0.22161,
+      "grad_norm": 1.1575288632960248,
+      "learning_rate": 0.003,
+      "loss": 4.0018,
+      "step": 22161
+    },
+    {
+      "epoch": 0.22162,
+      "grad_norm": 1.2100612790857146,
+      "learning_rate": 0.003,
+      "loss": 4.0081,
+      "step": 22162
+    },
+    {
+      "epoch": 0.22163,
+      "grad_norm": 1.3298106541093695,
+      "learning_rate": 0.003,
+      "loss": 4.0309,
+      "step": 22163
+    },
+    {
+      "epoch": 0.22164,
+      "grad_norm": 1.1339756660653089,
+      "learning_rate": 0.003,
+      "loss": 4.0086,
+      "step": 22164
+    },
+    {
+      "epoch": 0.22165,
+      "grad_norm": 1.4270428568478482,
+      "learning_rate": 0.003,
+      "loss": 3.99,
+      "step": 22165
+    },
+    {
+      "epoch": 0.22166,
+      "grad_norm": 1.073143861528856,
+      "learning_rate": 0.003,
+      "loss": 3.9889,
+      "step": 22166
+    },
+    {
+      "epoch": 0.22167,
+      "grad_norm": 1.3751081610695806,
+      "learning_rate": 0.003,
+      "loss": 3.9989,
+      "step": 22167
+    },
+    {
+      "epoch": 0.22168,
+      "grad_norm": 1.1859347957558986,
+      "learning_rate": 0.003,
+      "loss": 3.9882,
+      "step": 22168
+    },
+    {
+      "epoch": 0.22169,
+      "grad_norm": 1.3726162253307514,
+      "learning_rate": 0.003,
+      "loss": 4.0174,
+      "step": 22169
+    },
+    {
+      "epoch": 0.2217,
+      "grad_norm": 1.5072967067192462,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 22170
+    },
+    {
+      "epoch": 0.22171,
+      "grad_norm": 1.1945433874179399,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 22171
+    },
+    {
+      "epoch": 0.22172,
+      "grad_norm": 1.0478732761069476,
+      "learning_rate": 0.003,
+      "loss": 4.0091,
+      "step": 22172
+    },
+    {
+      "epoch": 0.22173,
+      "grad_norm": 1.4847092338414318,
+      "learning_rate": 0.003,
+      "loss": 4.0027,
+      "step": 22173
+    },
+    {
+      "epoch": 0.22174,
+      "grad_norm": 1.1248172998712007,
+      "learning_rate": 0.003,
+      "loss": 4.0164,
+      "step": 22174
+    },
+    {
+      "epoch": 0.22175,
+      "grad_norm": 1.652201642290977,
+      "learning_rate": 0.003,
+      "loss": 4.0029,
+      "step": 22175
+    },
+    {
+      "epoch": 0.22176,
+      "grad_norm": 1.0389131111360757,
+      "learning_rate": 0.003,
+      "loss": 4.0391,
+      "step": 22176
+    },
+    {
+      "epoch": 0.22177,
+      "grad_norm": 1.2779270276685768,
+      "learning_rate": 0.003,
+      "loss": 4.0455,
+      "step": 22177
+    },
+    {
+      "epoch": 0.22178,
+      "grad_norm": 1.4143137912893928,
+      "learning_rate": 0.003,
+      "loss": 4.0346,
+      "step": 22178
+    },
+    {
+      "epoch": 0.22179,
+      "grad_norm": 1.374459395047651,
+      "learning_rate": 0.003,
+      "loss": 4.0132,
+      "step": 22179
+    },
+    {
+      "epoch": 0.2218,
+      "grad_norm": 1.2594069525331673,
+      "learning_rate": 0.003,
+      "loss": 4.0112,
+      "step": 22180
+    },
+    {
+      "epoch": 0.22181,
+      "grad_norm": 1.287818577368298,
+      "learning_rate": 0.003,
+      "loss": 4.0046,
+      "step": 22181
+    },
+    {
+      "epoch": 0.22182,
+      "grad_norm": 1.2419472927094162,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 22182
+    },
+    {
+      "epoch": 0.22183,
+      "grad_norm": 1.2487621588581774,
+      "learning_rate": 0.003,
+      "loss": 3.9961,
+      "step": 22183
+    },
+    {
+      "epoch": 0.22184,
+      "grad_norm": 1.3795333310186317,
+      "learning_rate": 0.003,
+      "loss": 3.9913,
+      "step": 22184
+    },
+    {
+      "epoch": 0.22185,
+      "grad_norm": 1.578629733122645,
+      "learning_rate": 0.003,
+      "loss": 3.9974,
+      "step": 22185
+    },
+    {
+      "epoch": 0.22186,
+      "grad_norm": 1.110870347041684,
+      "learning_rate": 0.003,
+      "loss": 4.0091,
+      "step": 22186
+    },
+    {
+      "epoch": 0.22187,
+      "grad_norm": 1.4214308078457683,
+      "learning_rate": 0.003,
+      "loss": 3.9815,
+      "step": 22187
+    },
+    {
+      "epoch": 0.22188,
+      "grad_norm": 1.1233098661443772,
+      "learning_rate": 0.003,
+      "loss": 4.0354,
+      "step": 22188
+    },
+    {
+      "epoch": 0.22189,
+      "grad_norm": 1.2497330290170423,
+      "learning_rate": 0.003,
+      "loss": 3.9892,
+      "step": 22189
+    },
+    {
+      "epoch": 0.2219,
+      "grad_norm": 1.1539569032175996,
+      "learning_rate": 0.003,
+      "loss": 4.007,
+      "step": 22190
+    },
+    {
+      "epoch": 0.22191,
+      "grad_norm": 1.4485213308165041,
+      "learning_rate": 0.003,
+      "loss": 3.9838,
+      "step": 22191
+    },
+    {
+      "epoch": 0.22192,
+      "grad_norm": 1.1531431252182807,
+      "learning_rate": 0.003,
+      "loss": 3.9933,
+      "step": 22192
+    },
+    {
+      "epoch": 0.22193,
+      "grad_norm": 1.3712884446908908,
+      "learning_rate": 0.003,
+      "loss": 4.0081,
+      "step": 22193
+    },
+    {
+      "epoch": 0.22194,
+      "grad_norm": 1.1557739441086288,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 22194
+    },
+    {
+      "epoch": 0.22195,
+      "grad_norm": 1.4298868193434655,
+      "learning_rate": 0.003,
+      "loss": 3.9829,
+      "step": 22195
+    },
+    {
+      "epoch": 0.22196,
+      "grad_norm": 1.3003211410404316,
+      "learning_rate": 0.003,
+      "loss": 4.0304,
+      "step": 22196
+    },
+    {
+      "epoch": 0.22197,
+      "grad_norm": 1.593557490358542,
+      "learning_rate": 0.003,
+      "loss": 4.0074,
+      "step": 22197
+    },
+    {
+      "epoch": 0.22198,
+      "grad_norm": 1.1550824319803468,
+      "learning_rate": 0.003,
+      "loss": 4.0016,
+      "step": 22198
+    },
+    {
+      "epoch": 0.22199,
+      "grad_norm": 1.0959497390147552,
+      "learning_rate": 0.003,
+      "loss": 3.9928,
+      "step": 22199
+    },
+    {
+      "epoch": 0.222,
+      "grad_norm": 1.1257699617349437,
+      "learning_rate": 0.003,
+      "loss": 3.9975,
+      "step": 22200
+    },
+    {
+      "epoch": 0.22201,
+      "grad_norm": 1.0927871614697378,
+      "learning_rate": 0.003,
+      "loss": 4.0305,
+      "step": 22201
+    },
+    {
+      "epoch": 0.22202,
+      "grad_norm": 1.6016117903457965,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 22202
+    },
+    {
+      "epoch": 0.22203,
+      "grad_norm": 1.1460426780830848,
+      "learning_rate": 0.003,
+      "loss": 4.0129,
+      "step": 22203
+    },
+    {
+      "epoch": 0.22204,
+      "grad_norm": 1.3424979262289414,
+      "learning_rate": 0.003,
+      "loss": 3.9955,
+      "step": 22204
+    },
+    {
+      "epoch": 0.22205,
+      "grad_norm": 1.195203446186108,
+      "learning_rate": 0.003,
+      "loss": 4.0266,
+      "step": 22205
+    },
+    {
+      "epoch": 0.22206,
+      "grad_norm": 1.2556505341453592,
+      "learning_rate": 0.003,
+      "loss": 4.0028,
+      "step": 22206
+    },
+    {
+      "epoch": 0.22207,
+      "grad_norm": 1.0612035131157695,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 22207
+    },
+    {
+      "epoch": 0.22208,
+      "grad_norm": 1.213076018746376,
+      "learning_rate": 0.003,
+      "loss": 4.0209,
+      "step": 22208
+    },
+    {
+      "epoch": 0.22209,
+      "grad_norm": 1.3373363902873439,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 22209
+    },
+    {
+      "epoch": 0.2221,
+      "grad_norm": 1.1622364416413262,
+      "learning_rate": 0.003,
+      "loss": 4.0051,
+      "step": 22210
+    },
+    {
+      "epoch": 0.22211,
+      "grad_norm": 1.5373206855276882,
+      "learning_rate": 0.003,
+      "loss": 4.0238,
+      "step": 22211
+    },
+    {
+      "epoch": 0.22212,
+      "grad_norm": 1.2398106435025416,
+      "learning_rate": 0.003,
+      "loss": 4.0086,
+      "step": 22212
+    },
+    {
+      "epoch": 0.22213,
+      "grad_norm": 1.3992650010932621,
+      "learning_rate": 0.003,
+      "loss": 4.0052,
+      "step": 22213
+    },
+    {
+      "epoch": 0.22214,
+      "grad_norm": 1.2319045721501314,
+      "learning_rate": 0.003,
+      "loss": 4.0123,
+      "step": 22214
+    },
+    {
+      "epoch": 0.22215,
+      "grad_norm": 1.325902773789485,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 22215
+    },
+    {
+      "epoch": 0.22216,
+      "grad_norm": 1.1342506458176551,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 22216
+    },
+    {
+      "epoch": 0.22217,
+      "grad_norm": 1.2347743890569887,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 22217
+    },
+    {
+      "epoch": 0.22218,
+      "grad_norm": 1.330046530674968,
+      "learning_rate": 0.003,
+      "loss": 4.0255,
+      "step": 22218
+    },
+    {
+      "epoch": 0.22219,
+      "grad_norm": 1.3530417945264546,
+      "learning_rate": 0.003,
+      "loss": 3.9845,
+      "step": 22219
+    },
+    {
+      "epoch": 0.2222,
+      "grad_norm": 1.1446028462232785,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 22220
+    },
+    {
+      "epoch": 0.22221,
+      "grad_norm": 1.3803948841967062,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 22221
+    },
+    {
+      "epoch": 0.22222,
+      "grad_norm": 1.1663442656127743,
+      "learning_rate": 0.003,
+      "loss": 4.0264,
+      "step": 22222
+    },
+    {
+      "epoch": 0.22223,
+      "grad_norm": 1.2493702516543268,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 22223
+    },
+    {
+      "epoch": 0.22224,
+      "grad_norm": 1.3134853774869517,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 22224
+    },
+    {
+      "epoch": 0.22225,
+      "grad_norm": 1.2332610820250947,
+      "learning_rate": 0.003,
+      "loss": 3.9838,
+      "step": 22225
+    },
+    {
+      "epoch": 0.22226,
+      "grad_norm": 1.3264490532651745,
+      "learning_rate": 0.003,
+      "loss": 3.9963,
+      "step": 22226
+    },
+    {
+      "epoch": 0.22227,
+      "grad_norm": 1.1729989633232898,
+      "learning_rate": 0.003,
+      "loss": 3.9877,
+      "step": 22227
+    },
+    {
+      "epoch": 0.22228,
+      "grad_norm": 1.338147775407921,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 22228
+    },
+    {
+      "epoch": 0.22229,
+      "grad_norm": 1.1531622930302199,
+      "learning_rate": 0.003,
+      "loss": 4.0023,
+      "step": 22229
+    },
+    {
+      "epoch": 0.2223,
+      "grad_norm": 1.4623724671251228,
+      "learning_rate": 0.003,
+      "loss": 3.9928,
+      "step": 22230
+    },
+    {
+      "epoch": 0.22231,
+      "grad_norm": 1.2283445970627507,
+      "learning_rate": 0.003,
+      "loss": 4.0032,
+      "step": 22231
+    },
+    {
+      "epoch": 0.22232,
+      "grad_norm": 1.4734893109741845,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 22232
+    },
+    {
+      "epoch": 0.22233,
+      "grad_norm": 1.2662444672806636,
+      "learning_rate": 0.003,
+      "loss": 3.9974,
+      "step": 22233
+    },
+    {
+      "epoch": 0.22234,
+      "grad_norm": 1.1679597681427156,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 22234
+    },
+    {
+      "epoch": 0.22235,
+      "grad_norm": 1.274699632534093,
+      "learning_rate": 0.003,
+      "loss": 3.9904,
+      "step": 22235
+    },
+    {
+      "epoch": 0.22236,
+      "grad_norm": 1.3195000288964178,
+      "learning_rate": 0.003,
+      "loss": 3.9986,
+      "step": 22236
+    },
+    {
+      "epoch": 0.22237,
+      "grad_norm": 1.1231028264185339,
+      "learning_rate": 0.003,
+      "loss": 4.0082,
+      "step": 22237
+    },
+    {
+      "epoch": 0.22238,
+      "grad_norm": 1.4515920323244593,
+      "learning_rate": 0.003,
+      "loss": 4.0257,
+      "step": 22238
+    },
+    {
+      "epoch": 0.22239,
+      "grad_norm": 1.2740602000122034,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 22239
+    },
+    {
+      "epoch": 0.2224,
+      "grad_norm": 1.2561539032739748,
+      "learning_rate": 0.003,
+      "loss": 4.0023,
+      "step": 22240
+    },
+    {
+      "epoch": 0.22241,
+      "grad_norm": 1.3632561545347632,
+      "learning_rate": 0.003,
+      "loss": 3.9896,
+      "step": 22241
+    },
+    {
+      "epoch": 0.22242,
+      "grad_norm": 1.0622585673942055,
+      "learning_rate": 0.003,
+      "loss": 3.9808,
+      "step": 22242
+    },
+    {
+      "epoch": 0.22243,
+      "grad_norm": 1.3463769549220315,
+      "learning_rate": 0.003,
+      "loss": 4.03,
+      "step": 22243
+    },
+    {
+      "epoch": 0.22244,
+      "grad_norm": 1.2120183776452662,
+      "learning_rate": 0.003,
+      "loss": 3.9929,
+      "step": 22244
+    },
+    {
+      "epoch": 0.22245,
+      "grad_norm": 1.5380436175849057,
+      "learning_rate": 0.003,
+      "loss": 3.9981,
+      "step": 22245
+    },
+    {
+      "epoch": 0.22246,
+      "grad_norm": 1.1606007356774277,
+      "learning_rate": 0.003,
+      "loss": 4.0019,
+      "step": 22246
+    },
+    {
+      "epoch": 0.22247,
+      "grad_norm": 1.3906118557372886,
+      "learning_rate": 0.003,
+      "loss": 4.0188,
+      "step": 22247
+    },
+    {
+      "epoch": 0.22248,
+      "grad_norm": 1.2210053095989595,
+      "learning_rate": 0.003,
+      "loss": 3.9947,
+      "step": 22248
+    },
+    {
+      "epoch": 0.22249,
+      "grad_norm": 1.283401948086982,
+      "learning_rate": 0.003,
+      "loss": 3.9915,
+      "step": 22249
+    },
+    {
+      "epoch": 0.2225,
+      "grad_norm": 1.10567302298304,
+      "learning_rate": 0.003,
+      "loss": 3.9864,
+      "step": 22250
+    },
+    {
+      "epoch": 0.22251,
+      "grad_norm": 1.4972763891149112,
+      "learning_rate": 0.003,
+      "loss": 3.9944,
+      "step": 22251
+    },
+    {
+      "epoch": 0.22252,
+      "grad_norm": 1.1978424416814946,
+      "learning_rate": 0.003,
+      "loss": 3.998,
+      "step": 22252
+    },
+    {
+      "epoch": 0.22253,
+      "grad_norm": 1.5737416436157172,
+      "learning_rate": 0.003,
+      "loss": 3.9783,
+      "step": 22253
+    },
+    {
+      "epoch": 0.22254,
+      "grad_norm": 0.8735047438876812,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 22254
+    },
+    {
+      "epoch": 0.22255,
+      "grad_norm": 1.529849713127279,
+      "learning_rate": 0.003,
+      "loss": 4.0391,
+      "step": 22255
+    },
+    {
+      "epoch": 0.22256,
+      "grad_norm": 1.3027370119764354,
+      "learning_rate": 0.003,
+      "loss": 4.012,
+      "step": 22256
+    },
+    {
+      "epoch": 0.22257,
+      "grad_norm": 1.3163653505940183,
+      "learning_rate": 0.003,
+      "loss": 4.0241,
+      "step": 22257
+    },
+    {
+      "epoch": 0.22258,
+      "grad_norm": 1.30996803756067,
+      "learning_rate": 0.003,
+      "loss": 3.9943,
+      "step": 22258
+    },
+    {
+      "epoch": 0.22259,
+      "grad_norm": 1.616681898831908,
+      "learning_rate": 0.003,
+      "loss": 4.0112,
+      "step": 22259
+    },
+    {
+      "epoch": 0.2226,
+      "grad_norm": 0.9947180714798239,
+      "learning_rate": 0.003,
+      "loss": 4.0079,
+      "step": 22260
+    },
+    {
+      "epoch": 0.22261,
+      "grad_norm": 1.4796841295380732,
+      "learning_rate": 0.003,
+      "loss": 4.0257,
+      "step": 22261
+    },
+    {
+      "epoch": 0.22262,
+      "grad_norm": 1.0750942073211576,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 22262
+    },
+    {
+      "epoch": 0.22263,
+      "grad_norm": 1.586556519301127,
+      "learning_rate": 0.003,
+      "loss": 4.0316,
+      "step": 22263
+    },
+    {
+      "epoch": 0.22264,
+      "grad_norm": 1.1506249436189016,
+      "learning_rate": 0.003,
+      "loss": 4.0251,
+      "step": 22264
+    },
+    {
+      "epoch": 0.22265,
+      "grad_norm": 1.2348456352201267,
+      "learning_rate": 0.003,
+      "loss": 4.031,
+      "step": 22265
+    },
+    {
+      "epoch": 0.22266,
+      "grad_norm": 1.2334313872843545,
+      "learning_rate": 0.003,
+      "loss": 4.0099,
+      "step": 22266
+    },
+    {
+      "epoch": 0.22267,
+      "grad_norm": 1.1891279177421972,
+      "learning_rate": 0.003,
+      "loss": 3.972,
+      "step": 22267
+    },
+    {
+      "epoch": 0.22268,
+      "grad_norm": 1.3870979865809179,
+      "learning_rate": 0.003,
+      "loss": 3.9889,
+      "step": 22268
+    },
+    {
+      "epoch": 0.22269,
+      "grad_norm": 1.0748794010643383,
+      "learning_rate": 0.003,
+      "loss": 3.9917,
+      "step": 22269
+    },
+    {
+      "epoch": 0.2227,
+      "grad_norm": 1.440572402600489,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 22270
+    },
+    {
+      "epoch": 0.22271,
+      "grad_norm": 1.1468997498396223,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 22271
+    },
+    {
+      "epoch": 0.22272,
+      "grad_norm": 1.4709870504935312,
+      "learning_rate": 0.003,
+      "loss": 3.9999,
+      "step": 22272
+    },
+    {
+      "epoch": 0.22273,
+      "grad_norm": 1.3092269570709973,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 22273
+    },
+    {
+      "epoch": 0.22274,
+      "grad_norm": 1.3434577829800087,
+      "learning_rate": 0.003,
+      "loss": 4.0301,
+      "step": 22274
+    },
+    {
+      "epoch": 0.22275,
+      "grad_norm": 1.0955397652147572,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 22275
+    },
+    {
+      "epoch": 0.22276,
+      "grad_norm": 1.4052464611643605,
+      "learning_rate": 0.003,
+      "loss": 4.0032,
+      "step": 22276
+    },
+    {
+      "epoch": 0.22277,
+      "grad_norm": 1.1423726995358114,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 22277
+    },
+    {
+      "epoch": 0.22278,
+      "grad_norm": 1.4822558601113118,
+      "learning_rate": 0.003,
+      "loss": 3.9993,
+      "step": 22278
+    },
+    {
+      "epoch": 0.22279,
+      "grad_norm": 1.088760007212847,
+      "learning_rate": 0.003,
+      "loss": 3.984,
+      "step": 22279
+    },
+    {
+      "epoch": 0.2228,
+      "grad_norm": 1.4406821704441328,
+      "learning_rate": 0.003,
+      "loss": 3.9931,
+      "step": 22280
+    },
+    {
+      "epoch": 0.22281,
+      "grad_norm": 1.220925352986136,
+      "learning_rate": 0.003,
+      "loss": 4.0237,
+      "step": 22281
+    },
+    {
+      "epoch": 0.22282,
+      "grad_norm": 1.340358580542462,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 22282
+    },
+    {
+      "epoch": 0.22283,
+      "grad_norm": 1.2072620405532462,
+      "learning_rate": 0.003,
+      "loss": 3.9958,
+      "step": 22283
+    },
+    {
+      "epoch": 0.22284,
+      "grad_norm": 1.1333545422185627,
+      "learning_rate": 0.003,
+      "loss": 4.0299,
+      "step": 22284
+    },
+    {
+      "epoch": 0.22285,
+      "grad_norm": 1.241321508071821,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 22285
+    },
+    {
+      "epoch": 0.22286,
+      "grad_norm": 1.3779494374347359,
+      "learning_rate": 0.003,
+      "loss": 3.9982,
+      "step": 22286
+    },
+    {
+      "epoch": 0.22287,
+      "grad_norm": 1.298214274010136,
+      "learning_rate": 0.003,
+      "loss": 4.0041,
+      "step": 22287
+    },
+    {
+      "epoch": 0.22288,
+      "grad_norm": 1.3093745367987752,
+      "learning_rate": 0.003,
+      "loss": 4.0018,
+      "step": 22288
+    },
+    {
+      "epoch": 0.22289,
+      "grad_norm": 1.0624855229087422,
+      "learning_rate": 0.003,
+      "loss": 4.0014,
+      "step": 22289
+    },
+    {
+      "epoch": 0.2229,
+      "grad_norm": 1.3648232714564688,
+      "learning_rate": 0.003,
+      "loss": 3.9862,
+      "step": 22290
+    },
+    {
+      "epoch": 0.22291,
+      "grad_norm": 1.119801631630299,
+      "learning_rate": 0.003,
+      "loss": 3.9942,
+      "step": 22291
+    },
+    {
+      "epoch": 0.22292,
+      "grad_norm": 1.4076053904854904,
+      "learning_rate": 0.003,
+      "loss": 3.9879,
+      "step": 22292
+    },
+    {
+      "epoch": 0.22293,
+      "grad_norm": 1.2825525548647692,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 22293
+    },
+    {
+      "epoch": 0.22294,
+      "grad_norm": 1.5887204915820883,
+      "learning_rate": 0.003,
+      "loss": 3.9859,
+      "step": 22294
+    },
+    {
+      "epoch": 0.22295,
+      "grad_norm": 1.2934685119144123,
+      "learning_rate": 0.003,
+      "loss": 4.0187,
+      "step": 22295
+    },
+    {
+      "epoch": 0.22296,
+      "grad_norm": 1.3649114635602926,
+      "learning_rate": 0.003,
+      "loss": 4.0025,
+      "step": 22296
+    },
+    {
+      "epoch": 0.22297,
+      "grad_norm": 1.285459795879992,
+      "learning_rate": 0.003,
+      "loss": 3.9884,
+      "step": 22297
+    },
+    {
+      "epoch": 0.22298,
+      "grad_norm": 1.319993251974831,
+      "learning_rate": 0.003,
+      "loss": 4.0285,
+      "step": 22298
+    },
+    {
+      "epoch": 0.22299,
+      "grad_norm": 1.1812319133921414,
+      "learning_rate": 0.003,
+      "loss": 4.0119,
+      "step": 22299
+    },
+    {
+      "epoch": 0.223,
+      "grad_norm": 1.3572429238647146,
+      "learning_rate": 0.003,
+      "loss": 4.022,
+      "step": 22300
+    },
+    {
+      "epoch": 0.22301,
+      "grad_norm": 1.2663173568884765,
+      "learning_rate": 0.003,
+      "loss": 3.9761,
+      "step": 22301
+    },
+    {
+      "epoch": 0.22302,
+      "grad_norm": 1.3331389787769912,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 22302
+    },
+    {
+      "epoch": 0.22303,
+      "grad_norm": 1.1674424044244993,
+      "learning_rate": 0.003,
+      "loss": 3.9441,
+      "step": 22303
+    },
+    {
+      "epoch": 0.22304,
+      "grad_norm": 1.7269127202386154,
+      "learning_rate": 0.003,
+      "loss": 4.0048,
+      "step": 22304
+    },
+    {
+      "epoch": 0.22305,
+      "grad_norm": 1.2732554162031873,
+      "learning_rate": 0.003,
+      "loss": 3.9971,
+      "step": 22305
+    },
+    {
+      "epoch": 0.22306,
+      "grad_norm": 1.3568657357816485,
+      "learning_rate": 0.003,
+      "loss": 3.9869,
+      "step": 22306
+    },
+    {
+      "epoch": 0.22307,
+      "grad_norm": 1.2186543271436692,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 22307
+    },
+    {
+      "epoch": 0.22308,
+      "grad_norm": 1.0965927729749942,
+      "learning_rate": 0.003,
+      "loss": 4.0049,
+      "step": 22308
+    },
+    {
+      "epoch": 0.22309,
+      "grad_norm": 1.3777894643102881,
+      "learning_rate": 0.003,
+      "loss": 3.9842,
+      "step": 22309
+    },
+    {
+      "epoch": 0.2231,
+      "grad_norm": 1.1828291628119303,
+      "learning_rate": 0.003,
+      "loss": 4.0078,
+      "step": 22310
+    },
+    {
+      "epoch": 0.22311,
+      "grad_norm": 1.4735690437405191,
+      "learning_rate": 0.003,
+      "loss": 4.0306,
+      "step": 22311
+    },
+    {
+      "epoch": 0.22312,
+      "grad_norm": 1.127770982340548,
+      "learning_rate": 0.003,
+      "loss": 3.9918,
+      "step": 22312
+    },
+    {
+      "epoch": 0.22313,
+      "grad_norm": 1.4997024118649642,
+      "learning_rate": 0.003,
+      "loss": 4.0049,
+      "step": 22313
+    },
+    {
+      "epoch": 0.22314,
+      "grad_norm": 0.972881989264549,
+      "learning_rate": 0.003,
+      "loss": 3.9944,
+      "step": 22314
+    },
+    {
+      "epoch": 0.22315,
+      "grad_norm": 1.3684271183787817,
+      "learning_rate": 0.003,
+      "loss": 4.0215,
+      "step": 22315
+    },
+    {
+      "epoch": 0.22316,
+      "grad_norm": 1.283378227737481,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 22316
+    },
+    {
+      "epoch": 0.22317,
+      "grad_norm": 1.1803345183702496,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 22317
+    },
+    {
+      "epoch": 0.22318,
+      "grad_norm": 1.4422695624314568,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 22318
+    },
+    {
+      "epoch": 0.22319,
+      "grad_norm": 1.2067544669182493,
+      "learning_rate": 0.003,
+      "loss": 4.0245,
+      "step": 22319
+    },
+    {
+      "epoch": 0.2232,
+      "grad_norm": 1.2612677804987527,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 22320
+    },
+    {
+      "epoch": 0.22321,
+      "grad_norm": 1.1638777443335961,
+      "learning_rate": 0.003,
+      "loss": 4.026,
+      "step": 22321
+    },
+    {
+      "epoch": 0.22322,
+      "grad_norm": 1.2699783786888434,
+      "learning_rate": 0.003,
+      "loss": 4.0118,
+      "step": 22322
+    },
+    {
+      "epoch": 0.22323,
+      "grad_norm": 1.211783876700593,
+      "learning_rate": 0.003,
+      "loss": 4.024,
+      "step": 22323
+    },
+    {
+      "epoch": 0.22324,
+      "grad_norm": 1.5221454994686965,
+      "learning_rate": 0.003,
+      "loss": 4.0078,
+      "step": 22324
+    },
+    {
+      "epoch": 0.22325,
+      "grad_norm": 1.3427985263350946,
+      "learning_rate": 0.003,
+      "loss": 4.004,
+      "step": 22325
+    },
+    {
+      "epoch": 0.22326,
+      "grad_norm": 1.509952886384745,
+      "learning_rate": 0.003,
+      "loss": 4.0139,
+      "step": 22326
+    },
+    {
+      "epoch": 0.22327,
+      "grad_norm": 1.1193655253743768,
+      "learning_rate": 0.003,
+      "loss": 3.9917,
+      "step": 22327
+    },
+    {
+      "epoch": 0.22328,
+      "grad_norm": 1.4273329615126262,
+      "learning_rate": 0.003,
+      "loss": 4.0286,
+      "step": 22328
+    },
+    {
+      "epoch": 0.22329,
+      "grad_norm": 1.1284846688376067,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 22329
+    },
+    {
+      "epoch": 0.2233,
+      "grad_norm": 1.453615956109936,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 22330
+    },
+    {
+      "epoch": 0.22331,
+      "grad_norm": 1.1605671656961853,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 22331
+    },
+    {
+      "epoch": 0.22332,
+      "grad_norm": 1.1873711107173008,
+      "learning_rate": 0.003,
+      "loss": 3.9985,
+      "step": 22332
+    },
+    {
+      "epoch": 0.22333,
+      "grad_norm": 1.2429509028418222,
+      "learning_rate": 0.003,
+      "loss": 4.0056,
+      "step": 22333
+    },
+    {
+      "epoch": 0.22334,
+      "grad_norm": 1.4871785444884562,
+      "learning_rate": 0.003,
+      "loss": 4.0521,
+      "step": 22334
+    },
+    {
+      "epoch": 0.22335,
+      "grad_norm": 1.3067085077013383,
+      "learning_rate": 0.003,
+      "loss": 4.0084,
+      "step": 22335
+    },
+    {
+      "epoch": 0.22336,
+      "grad_norm": 1.5242278694179714,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 22336
+    },
+    {
+      "epoch": 0.22337,
+      "grad_norm": 1.3150749039084444,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 22337
+    },
+    {
+      "epoch": 0.22338,
+      "grad_norm": 1.1973381240549057,
+      "learning_rate": 0.003,
+      "loss": 4.0118,
+      "step": 22338
+    },
+    {
+      "epoch": 0.22339,
+      "grad_norm": 1.2772138858327446,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 22339
+    },
+    {
+      "epoch": 0.2234,
+      "grad_norm": 1.14997173594439,
+      "learning_rate": 0.003,
+      "loss": 3.9852,
+      "step": 22340
+    },
+    {
+      "epoch": 0.22341,
+      "grad_norm": 1.4496514063420634,
+      "learning_rate": 0.003,
+      "loss": 3.9822,
+      "step": 22341
+    },
+    {
+      "epoch": 0.22342,
+      "grad_norm": 1.1779142704982855,
+      "learning_rate": 0.003,
+      "loss": 3.9807,
+      "step": 22342
+    },
+    {
+      "epoch": 0.22343,
+      "grad_norm": 1.6029258617671656,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 22343
+    },
+    {
+      "epoch": 0.22344,
+      "grad_norm": 1.1727094745423574,
+      "learning_rate": 0.003,
+      "loss": 4.0075,
+      "step": 22344
+    },
+    {
+      "epoch": 0.22345,
+      "grad_norm": 1.3331263584096626,
+      "learning_rate": 0.003,
+      "loss": 4.0005,
+      "step": 22345
+    },
+    {
+      "epoch": 0.22346,
+      "grad_norm": 1.2607490835163813,
+      "learning_rate": 0.003,
+      "loss": 4.0292,
+      "step": 22346
+    },
+    {
+      "epoch": 0.22347,
+      "grad_norm": 1.3998270977822362,
+      "learning_rate": 0.003,
+      "loss": 3.9892,
+      "step": 22347
+    },
+    {
+      "epoch": 0.22348,
+      "grad_norm": 1.1674370136552168,
+      "learning_rate": 0.003,
+      "loss": 4.0086,
+      "step": 22348
+    },
+    {
+      "epoch": 0.22349,
+      "grad_norm": 1.3703875678328927,
+      "learning_rate": 0.003,
+      "loss": 4.0022,
+      "step": 22349
+    },
+    {
+      "epoch": 0.2235,
+      "grad_norm": 1.3270542712606392,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 22350
+    },
+    {
+      "epoch": 0.22351,
+      "grad_norm": 1.2457363989909527,
+      "learning_rate": 0.003,
+      "loss": 4.0296,
+      "step": 22351
+    },
+    {
+      "epoch": 0.22352,
+      "grad_norm": 1.4189238415769445,
+      "learning_rate": 0.003,
+      "loss": 4.0258,
+      "step": 22352
+    },
+    {
+      "epoch": 0.22353,
+      "grad_norm": 0.9930467772629256,
+      "learning_rate": 0.003,
+      "loss": 3.991,
+      "step": 22353
+    },
+    {
+      "epoch": 0.22354,
+      "grad_norm": 1.5164128735639641,
+      "learning_rate": 0.003,
+      "loss": 4.0242,
+      "step": 22354
+    },
+    {
+      "epoch": 0.22355,
+      "grad_norm": 1.148513076136743,
+      "learning_rate": 0.003,
+      "loss": 4.0018,
+      "step": 22355
+    },
+    {
+      "epoch": 0.22356,
+      "grad_norm": 1.2905881170543094,
+      "learning_rate": 0.003,
+      "loss": 4.0173,
+      "step": 22356
+    },
+    {
+      "epoch": 0.22357,
+      "grad_norm": 1.3536614009466608,
+      "learning_rate": 0.003,
+      "loss": 4.0042,
+      "step": 22357
+    },
+    {
+      "epoch": 0.22358,
+      "grad_norm": 1.3309981605000605,
+      "learning_rate": 0.003,
+      "loss": 4.0392,
+      "step": 22358
+    },
+    {
+      "epoch": 0.22359,
+      "grad_norm": 1.1900853704578604,
+      "learning_rate": 0.003,
+      "loss": 4.0141,
+      "step": 22359
+    },
+    {
+      "epoch": 0.2236,
+      "grad_norm": 1.2681050536547587,
+      "learning_rate": 0.003,
+      "loss": 3.999,
+      "step": 22360
+    },
+    {
+      "epoch": 0.22361,
+      "grad_norm": 1.1684778376524112,
+      "learning_rate": 0.003,
+      "loss": 4.0023,
+      "step": 22361
+    },
+    {
+      "epoch": 0.22362,
+      "grad_norm": 1.5881862448893047,
+      "learning_rate": 0.003,
+      "loss": 4.0032,
+      "step": 22362
+    },
+    {
+      "epoch": 0.22363,
+      "grad_norm": 0.9402399098757971,
+      "learning_rate": 0.003,
+      "loss": 3.9773,
+      "step": 22363
+    },
+    {
+      "epoch": 0.22364,
+      "grad_norm": 1.3168742482590914,
+      "learning_rate": 0.003,
+      "loss": 3.9918,
+      "step": 22364
+    },
+    {
+      "epoch": 0.22365,
+      "grad_norm": 1.2102800062161798,
+      "learning_rate": 0.003,
+      "loss": 3.9954,
+      "step": 22365
+    },
+    {
+      "epoch": 0.22366,
+      "grad_norm": 1.3506961598974063,
+      "learning_rate": 0.003,
+      "loss": 3.9734,
+      "step": 22366
+    },
+    {
+      "epoch": 0.22367,
+      "grad_norm": 1.44743130087681,
+      "learning_rate": 0.003,
+      "loss": 4.0048,
+      "step": 22367
+    },
+    {
+      "epoch": 0.22368,
+      "grad_norm": 1.087698624872361,
+      "learning_rate": 0.003,
+      "loss": 4.0213,
+      "step": 22368
+    },
+    {
+      "epoch": 0.22369,
+      "grad_norm": 1.27151368291548,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 22369
+    },
+    {
+      "epoch": 0.2237,
+      "grad_norm": 1.1995097632350293,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 22370
+    },
+    {
+      "epoch": 0.22371,
+      "grad_norm": 1.3117837966139207,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 22371
+    },
+    {
+      "epoch": 0.22372,
+      "grad_norm": 1.4879884637276606,
+      "learning_rate": 0.003,
+      "loss": 4.0331,
+      "step": 22372
+    },
+    {
+      "epoch": 0.22373,
+      "grad_norm": 1.2417573212437794,
+      "learning_rate": 0.003,
+      "loss": 4.0259,
+      "step": 22373
+    },
+    {
+      "epoch": 0.22374,
+      "grad_norm": 1.3976894470335894,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 22374
+    },
+    {
+      "epoch": 0.22375,
+      "grad_norm": 1.2772621018956982,
+      "learning_rate": 0.003,
+      "loss": 3.9885,
+      "step": 22375
+    },
+    {
+      "epoch": 0.22376,
+      "grad_norm": 1.325703715384148,
+      "learning_rate": 0.003,
+      "loss": 3.9907,
+      "step": 22376
+    },
+    {
+      "epoch": 0.22377,
+      "grad_norm": 1.159986673079892,
+      "learning_rate": 0.003,
+      "loss": 3.9556,
+      "step": 22377
+    },
+    {
+      "epoch": 0.22378,
+      "grad_norm": 1.311426256275083,
+      "learning_rate": 0.003,
+      "loss": 4.0003,
+      "step": 22378
+    },
+    {
+      "epoch": 0.22379,
+      "grad_norm": 1.047558449924243,
+      "learning_rate": 0.003,
+      "loss": 3.9857,
+      "step": 22379
+    },
+    {
+      "epoch": 0.2238,
+      "grad_norm": 1.438045589395385,
+      "learning_rate": 0.003,
+      "loss": 4.0142,
+      "step": 22380
+    },
+    {
+      "epoch": 0.22381,
+      "grad_norm": 1.0451849508313544,
+      "learning_rate": 0.003,
+      "loss": 3.9722,
+      "step": 22381
+    },
+    {
+      "epoch": 0.22382,
+      "grad_norm": 1.558069626929958,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 22382
+    },
+    {
+      "epoch": 0.22383,
+      "grad_norm": 1.2074830279517346,
+      "learning_rate": 0.003,
+      "loss": 4.0372,
+      "step": 22383
+    },
+    {
+      "epoch": 0.22384,
+      "grad_norm": 1.3628786377290536,
+      "learning_rate": 0.003,
+      "loss": 3.9987,
+      "step": 22384
+    },
+    {
+      "epoch": 0.22385,
+      "grad_norm": 1.2197437526886952,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 22385
+    },
+    {
+      "epoch": 0.22386,
+      "grad_norm": 1.38383836952328,
+      "learning_rate": 0.003,
+      "loss": 4.0393,
+      "step": 22386
+    },
+    {
+      "epoch": 0.22387,
+      "grad_norm": 1.1782710430119665,
+      "learning_rate": 0.003,
+      "loss": 3.9883,
+      "step": 22387
+    },
+    {
+      "epoch": 0.22388,
+      "grad_norm": 1.569609127597049,
+      "learning_rate": 0.003,
+      "loss": 4.0371,
+      "step": 22388
+    },
+    {
+      "epoch": 0.22389,
+      "grad_norm": 1.112750420167682,
+      "learning_rate": 0.003,
+      "loss": 4.0023,
+      "step": 22389
+    },
+    {
+      "epoch": 0.2239,
+      "grad_norm": 1.3490860033883258,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 22390
+    },
+    {
+      "epoch": 0.22391,
+      "grad_norm": 1.1546774040918815,
+      "learning_rate": 0.003,
+      "loss": 3.9947,
+      "step": 22391
+    },
+    {
+      "epoch": 0.22392,
+      "grad_norm": 1.3166079006981373,
+      "learning_rate": 0.003,
+      "loss": 3.9873,
+      "step": 22392
+    },
+    {
+      "epoch": 0.22393,
+      "grad_norm": 1.3454088910519977,
+      "learning_rate": 0.003,
+      "loss": 3.9971,
+      "step": 22393
+    },
+    {
+      "epoch": 0.22394,
+      "grad_norm": 1.2660747456334196,
+      "learning_rate": 0.003,
+      "loss": 4.0019,
+      "step": 22394
+    },
+    {
+      "epoch": 0.22395,
+      "grad_norm": 1.2355287239409345,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 22395
+    },
+    {
+      "epoch": 0.22396,
+      "grad_norm": 1.253999614163376,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 22396
+    },
+    {
+      "epoch": 0.22397,
+      "grad_norm": 1.3535283958350979,
+      "learning_rate": 0.003,
+      "loss": 3.9808,
+      "step": 22397
+    },
+    {
+      "epoch": 0.22398,
+      "grad_norm": 1.061519616861652,
+      "learning_rate": 0.003,
+      "loss": 3.9734,
+      "step": 22398
+    },
+    {
+      "epoch": 0.22399,
+      "grad_norm": 1.345174186246392,
+      "learning_rate": 0.003,
+      "loss": 4.0255,
+      "step": 22399
+    },
+    {
+      "epoch": 0.224,
+      "grad_norm": 1.064710677999615,
+      "learning_rate": 0.003,
+      "loss": 4.0015,
+      "step": 22400
+    },
+    {
+      "epoch": 0.22401,
+      "grad_norm": 1.2410371256345285,
+      "learning_rate": 0.003,
+      "loss": 4.0403,
+      "step": 22401
+    },
+    {
+      "epoch": 0.22402,
+      "grad_norm": 1.4001250561480585,
+      "learning_rate": 0.003,
+      "loss": 4.0033,
+      "step": 22402
+    },
+    {
+      "epoch": 0.22403,
+      "grad_norm": 1.3207333881116234,
+      "learning_rate": 0.003,
+      "loss": 4.0218,
+      "step": 22403
+    },
+    {
+      "epoch": 0.22404,
+      "grad_norm": 1.417437608285584,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 22404
+    },
+    {
+      "epoch": 0.22405,
+      "grad_norm": 1.214467297847527,
+      "learning_rate": 0.003,
+      "loss": 3.9817,
+      "step": 22405
+    },
+    {
+      "epoch": 0.22406,
+      "grad_norm": 1.2655828429918896,
+      "learning_rate": 0.003,
+      "loss": 4.0037,
+      "step": 22406
+    },
+    {
+      "epoch": 0.22407,
+      "grad_norm": 1.2807299406081147,
+      "learning_rate": 0.003,
+      "loss": 3.9844,
+      "step": 22407
+    },
+    {
+      "epoch": 0.22408,
+      "grad_norm": 1.4001984222435222,
+      "learning_rate": 0.003,
+      "loss": 4.0316,
+      "step": 22408
+    },
+    {
+      "epoch": 0.22409,
+      "grad_norm": 1.2629557533614575,
+      "learning_rate": 0.003,
+      "loss": 4.019,
+      "step": 22409
+    },
+    {
+      "epoch": 0.2241,
+      "grad_norm": 1.200000168693719,
+      "learning_rate": 0.003,
+      "loss": 4.0109,
+      "step": 22410
+    },
+    {
+      "epoch": 0.22411,
+      "grad_norm": 1.2001706899646096,
+      "learning_rate": 0.003,
+      "loss": 4.0018,
+      "step": 22411
+    },
+    {
+      "epoch": 0.22412,
+      "grad_norm": 1.4694144713477935,
+      "learning_rate": 0.003,
+      "loss": 3.9883,
+      "step": 22412
+    },
+    {
+      "epoch": 0.22413,
+      "grad_norm": 1.366599912439857,
+      "learning_rate": 0.003,
+      "loss": 3.9916,
+      "step": 22413
+    },
+    {
+      "epoch": 0.22414,
+      "grad_norm": 1.3157083625460972,
+      "learning_rate": 0.003,
+      "loss": 4.0187,
+      "step": 22414
+    },
+    {
+      "epoch": 0.22415,
+      "grad_norm": 1.1571896476668302,
+      "learning_rate": 0.003,
+      "loss": 3.9726,
+      "step": 22415
+    },
+    {
+      "epoch": 0.22416,
+      "grad_norm": 1.130733212981995,
+      "learning_rate": 0.003,
+      "loss": 3.9615,
+      "step": 22416
+    },
+    {
+      "epoch": 0.22417,
+      "grad_norm": 1.2412211884685174,
+      "learning_rate": 0.003,
+      "loss": 4.0099,
+      "step": 22417
+    },
+    {
+      "epoch": 0.22418,
+      "grad_norm": 1.3563457232829983,
+      "learning_rate": 0.003,
+      "loss": 4.0113,
+      "step": 22418
+    },
+    {
+      "epoch": 0.22419,
+      "grad_norm": 1.3182546504124966,
+      "learning_rate": 0.003,
+      "loss": 4.0073,
+      "step": 22419
+    },
+    {
+      "epoch": 0.2242,
+      "grad_norm": 1.1984212576852058,
+      "learning_rate": 0.003,
+      "loss": 4.022,
+      "step": 22420
+    },
+    {
+      "epoch": 0.22421,
+      "grad_norm": 1.310225416348663,
+      "learning_rate": 0.003,
+      "loss": 3.989,
+      "step": 22421
+    },
+    {
+      "epoch": 0.22422,
+      "grad_norm": 1.1628377943551342,
+      "learning_rate": 0.003,
+      "loss": 4.0211,
+      "step": 22422
+    },
+    {
+      "epoch": 0.22423,
+      "grad_norm": 1.365695453914373,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 22423
+    },
+    {
+      "epoch": 0.22424,
+      "grad_norm": 1.2720724681725184,
+      "learning_rate": 0.003,
+      "loss": 3.9704,
+      "step": 22424
+    },
+    {
+      "epoch": 0.22425,
+      "grad_norm": 1.389185012324819,
+      "learning_rate": 0.003,
+      "loss": 3.994,
+      "step": 22425
+    },
+    {
+      "epoch": 0.22426,
+      "grad_norm": 1.2532943438726174,
+      "learning_rate": 0.003,
+      "loss": 3.9869,
+      "step": 22426
+    },
+    {
+      "epoch": 0.22427,
+      "grad_norm": 1.3295070441954238,
+      "learning_rate": 0.003,
+      "loss": 4.0241,
+      "step": 22427
+    },
+    {
+      "epoch": 0.22428,
+      "grad_norm": 1.1661339698134536,
+      "learning_rate": 0.003,
+      "loss": 4.0003,
+      "step": 22428
+    },
+    {
+      "epoch": 0.22429,
+      "grad_norm": 1.3973863445250432,
+      "learning_rate": 0.003,
+      "loss": 4.0053,
+      "step": 22429
+    },
+    {
+      "epoch": 0.2243,
+      "grad_norm": 1.2266041730155126,
+      "learning_rate": 0.003,
+      "loss": 3.9885,
+      "step": 22430
+    },
+    {
+      "epoch": 0.22431,
+      "grad_norm": 1.3520543270221528,
+      "learning_rate": 0.003,
+      "loss": 3.9954,
+      "step": 22431
+    },
+    {
+      "epoch": 0.22432,
+      "grad_norm": 1.37711800407905,
+      "learning_rate": 0.003,
+      "loss": 3.9958,
+      "step": 22432
+    },
+    {
+      "epoch": 0.22433,
+      "grad_norm": 1.3713845857291262,
+      "learning_rate": 0.003,
+      "loss": 3.9906,
+      "step": 22433
+    },
+    {
+      "epoch": 0.22434,
+      "grad_norm": 1.344708250729736,
+      "learning_rate": 0.003,
+      "loss": 4.0036,
+      "step": 22434
+    },
+    {
+      "epoch": 0.22435,
+      "grad_norm": 1.122923122719212,
+      "learning_rate": 0.003,
+      "loss": 4.0273,
+      "step": 22435
+    },
+    {
+      "epoch": 0.22436,
+      "grad_norm": 1.1893832074012671,
+      "learning_rate": 0.003,
+      "loss": 4.0016,
+      "step": 22436
+    },
+    {
+      "epoch": 0.22437,
+      "grad_norm": 1.2445833453932735,
+      "learning_rate": 0.003,
+      "loss": 3.9992,
+      "step": 22437
+    },
+    {
+      "epoch": 0.22438,
+      "grad_norm": 1.2911061181141115,
+      "learning_rate": 0.003,
+      "loss": 3.9874,
+      "step": 22438
+    },
+    {
+      "epoch": 0.22439,
+      "grad_norm": 1.2976441725000534,
+      "learning_rate": 0.003,
+      "loss": 4.0304,
+      "step": 22439
+    },
+    {
+      "epoch": 0.2244,
+      "grad_norm": 1.275199670531111,
+      "learning_rate": 0.003,
+      "loss": 4.0232,
+      "step": 22440
+    },
+    {
+      "epoch": 0.22441,
+      "grad_norm": 1.3286306334363798,
+      "learning_rate": 0.003,
+      "loss": 4.0129,
+      "step": 22441
+    },
+    {
+      "epoch": 0.22442,
+      "grad_norm": 1.5258677859344327,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 22442
+    },
+    {
+      "epoch": 0.22443,
+      "grad_norm": 1.1856767741238596,
+      "learning_rate": 0.003,
+      "loss": 3.9818,
+      "step": 22443
+    },
+    {
+      "epoch": 0.22444,
+      "grad_norm": 1.3745317578574252,
+      "learning_rate": 0.003,
+      "loss": 3.9747,
+      "step": 22444
+    },
+    {
+      "epoch": 0.22445,
+      "grad_norm": 1.2256446949279602,
+      "learning_rate": 0.003,
+      "loss": 3.9919,
+      "step": 22445
+    },
+    {
+      "epoch": 0.22446,
+      "grad_norm": 1.5224800245742083,
+      "learning_rate": 0.003,
+      "loss": 4.0264,
+      "step": 22446
+    },
+    {
+      "epoch": 0.22447,
+      "grad_norm": 1.0313899642118451,
+      "learning_rate": 0.003,
+      "loss": 4.0018,
+      "step": 22447
+    },
+    {
+      "epoch": 0.22448,
+      "grad_norm": 1.4191230825081844,
+      "learning_rate": 0.003,
+      "loss": 4.0099,
+      "step": 22448
+    },
+    {
+      "epoch": 0.22449,
+      "grad_norm": 1.1025429579736072,
+      "learning_rate": 0.003,
+      "loss": 3.9921,
+      "step": 22449
+    },
+    {
+      "epoch": 0.2245,
+      "grad_norm": 1.4551588004763092,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 22450
+    },
+    {
+      "epoch": 0.22451,
+      "grad_norm": 1.1998171320836486,
+      "learning_rate": 0.003,
+      "loss": 3.9997,
+      "step": 22451
+    },
+    {
+      "epoch": 0.22452,
+      "grad_norm": 1.255319558444541,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 22452
+    },
+    {
+      "epoch": 0.22453,
+      "grad_norm": 1.2118062726362522,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 22453
+    },
+    {
+      "epoch": 0.22454,
+      "grad_norm": 1.5996481680619214,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 22454
+    },
+    {
+      "epoch": 0.22455,
+      "grad_norm": 1.1568720442606577,
+      "learning_rate": 0.003,
+      "loss": 3.9719,
+      "step": 22455
+    },
+    {
+      "epoch": 0.22456,
+      "grad_norm": 1.350925002219499,
+      "learning_rate": 0.003,
+      "loss": 4.0059,
+      "step": 22456
+    },
+    {
+      "epoch": 0.22457,
+      "grad_norm": 1.1335171631678922,
+      "learning_rate": 0.003,
+      "loss": 4.0181,
+      "step": 22457
+    },
+    {
+      "epoch": 0.22458,
+      "grad_norm": 1.6030059977756823,
+      "learning_rate": 0.003,
+      "loss": 4.0167,
+      "step": 22458
+    },
+    {
+      "epoch": 0.22459,
+      "grad_norm": 1.1896706228631284,
+      "learning_rate": 0.003,
+      "loss": 3.9939,
+      "step": 22459
+    },
+    {
+      "epoch": 0.2246,
+      "grad_norm": 1.1637706684056774,
+      "learning_rate": 0.003,
+      "loss": 4.0167,
+      "step": 22460
+    },
+    {
+      "epoch": 0.22461,
+      "grad_norm": 1.4054579231980726,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 22461
+    },
+    {
+      "epoch": 0.22462,
+      "grad_norm": 1.2177931693348178,
+      "learning_rate": 0.003,
+      "loss": 4.006,
+      "step": 22462
+    },
+    {
+      "epoch": 0.22463,
+      "grad_norm": 1.2394789043585155,
+      "learning_rate": 0.003,
+      "loss": 4.0134,
+      "step": 22463
+    },
+    {
+      "epoch": 0.22464,
+      "grad_norm": 1.2005540580186418,
+      "learning_rate": 0.003,
+      "loss": 4.0323,
+      "step": 22464
+    },
+    {
+      "epoch": 0.22465,
+      "grad_norm": 1.376820967640807,
+      "learning_rate": 0.003,
+      "loss": 4.032,
+      "step": 22465
+    },
+    {
+      "epoch": 0.22466,
+      "grad_norm": 1.2994628352723128,
+      "learning_rate": 0.003,
+      "loss": 4.0135,
+      "step": 22466
+    },
+    {
+      "epoch": 0.22467,
+      "grad_norm": 1.135369224433705,
+      "learning_rate": 0.003,
+      "loss": 3.9959,
+      "step": 22467
+    },
+    {
+      "epoch": 0.22468,
+      "grad_norm": 1.2599822671537686,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 22468
+    },
+    {
+      "epoch": 0.22469,
+      "grad_norm": 1.3112131782627057,
+      "learning_rate": 0.003,
+      "loss": 4.0374,
+      "step": 22469
+    },
+    {
+      "epoch": 0.2247,
+      "grad_norm": 1.1405945856957036,
+      "learning_rate": 0.003,
+      "loss": 4.0039,
+      "step": 22470
+    },
+    {
+      "epoch": 0.22471,
+      "grad_norm": 1.2782022339444397,
+      "learning_rate": 0.003,
+      "loss": 4.0182,
+      "step": 22471
+    },
+    {
+      "epoch": 0.22472,
+      "grad_norm": 1.2693499943997877,
+      "learning_rate": 0.003,
+      "loss": 4.0382,
+      "step": 22472
+    },
+    {
+      "epoch": 0.22473,
+      "grad_norm": 1.3382644757203557,
+      "learning_rate": 0.003,
+      "loss": 3.999,
+      "step": 22473
+    },
+    {
+      "epoch": 0.22474,
+      "grad_norm": 1.3400217319563612,
+      "learning_rate": 0.003,
+      "loss": 4.0057,
+      "step": 22474
+    },
+    {
+      "epoch": 0.22475,
+      "grad_norm": 1.1618899121794695,
+      "learning_rate": 0.003,
+      "loss": 3.9926,
+      "step": 22475
+    },
+    {
+      "epoch": 0.22476,
+      "grad_norm": 1.3722859194312897,
+      "learning_rate": 0.003,
+      "loss": 4.0265,
+      "step": 22476
+    },
+    {
+      "epoch": 0.22477,
+      "grad_norm": 1.2665372743766887,
+      "learning_rate": 0.003,
+      "loss": 3.9852,
+      "step": 22477
+    },
+    {
+      "epoch": 0.22478,
+      "grad_norm": 1.2951349384607629,
+      "learning_rate": 0.003,
+      "loss": 4.0169,
+      "step": 22478
+    },
+    {
+      "epoch": 0.22479,
+      "grad_norm": 1.326646206709094,
+      "learning_rate": 0.003,
+      "loss": 4.0141,
+      "step": 22479
+    },
+    {
+      "epoch": 0.2248,
+      "grad_norm": 1.3054024241059476,
+      "learning_rate": 0.003,
+      "loss": 4.0191,
+      "step": 22480
+    },
+    {
+      "epoch": 0.22481,
+      "grad_norm": 1.188585068088655,
+      "learning_rate": 0.003,
+      "loss": 4.0135,
+      "step": 22481
+    },
+    {
+      "epoch": 0.22482,
+      "grad_norm": 1.2119798246473172,
+      "learning_rate": 0.003,
+      "loss": 3.9956,
+      "step": 22482
+    },
+    {
+      "epoch": 0.22483,
+      "grad_norm": 1.2127305864896916,
+      "learning_rate": 0.003,
+      "loss": 4.0069,
+      "step": 22483
+    },
+    {
+      "epoch": 0.22484,
+      "grad_norm": 1.2436380272366911,
+      "learning_rate": 0.003,
+      "loss": 4.0105,
+      "step": 22484
+    },
+    {
+      "epoch": 0.22485,
+      "grad_norm": 1.1334257597349076,
+      "learning_rate": 0.003,
+      "loss": 3.9796,
+      "step": 22485
+    },
+    {
+      "epoch": 0.22486,
+      "grad_norm": 1.2995765364014777,
+      "learning_rate": 0.003,
+      "loss": 4.0306,
+      "step": 22486
+    },
+    {
+      "epoch": 0.22487,
+      "grad_norm": 1.378797398835341,
+      "learning_rate": 0.003,
+      "loss": 4.0192,
+      "step": 22487
+    },
+    {
+      "epoch": 0.22488,
+      "grad_norm": 1.2199453701568161,
+      "learning_rate": 0.003,
+      "loss": 4.0242,
+      "step": 22488
+    },
+    {
+      "epoch": 0.22489,
+      "grad_norm": 1.4776554518736094,
+      "learning_rate": 0.003,
+      "loss": 4.0073,
+      "step": 22489
+    },
+    {
+      "epoch": 0.2249,
+      "grad_norm": 1.4028316476649385,
+      "learning_rate": 0.003,
+      "loss": 4.0112,
+      "step": 22490
+    },
+    {
+      "epoch": 0.22491,
+      "grad_norm": 1.180291365617104,
+      "learning_rate": 0.003,
+      "loss": 3.9972,
+      "step": 22491
+    },
+    {
+      "epoch": 0.22492,
+      "grad_norm": 1.4937122272676258,
+      "learning_rate": 0.003,
+      "loss": 4.0135,
+      "step": 22492
+    },
+    {
+      "epoch": 0.22493,
+      "grad_norm": 1.107028016409526,
+      "learning_rate": 0.003,
+      "loss": 3.9661,
+      "step": 22493
+    },
+    {
+      "epoch": 0.22494,
+      "grad_norm": 1.3354709386431685,
+      "learning_rate": 0.003,
+      "loss": 4.0014,
+      "step": 22494
+    },
+    {
+      "epoch": 0.22495,
+      "grad_norm": 1.1432176786757509,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 22495
+    },
+    {
+      "epoch": 0.22496,
+      "grad_norm": 1.4562995707502615,
+      "learning_rate": 0.003,
+      "loss": 4.0431,
+      "step": 22496
+    },
+    {
+      "epoch": 0.22497,
+      "grad_norm": 1.0688464701860052,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 22497
+    },
+    {
+      "epoch": 0.22498,
+      "grad_norm": 1.4801101639795715,
+      "learning_rate": 0.003,
+      "loss": 3.9967,
+      "step": 22498
+    },
+    {
+      "epoch": 0.22499,
+      "grad_norm": 1.2806999040899756,
+      "learning_rate": 0.003,
+      "loss": 3.9901,
+      "step": 22499
+    },
+    {
+      "epoch": 0.225,
+      "grad_norm": 1.0817634577540134,
+      "learning_rate": 0.003,
+      "loss": 3.9959,
+      "step": 22500
+    },
+    {
+      "epoch": 0.22501,
+      "grad_norm": 1.2843604203945191,
+      "learning_rate": 0.003,
+      "loss": 3.9903,
+      "step": 22501
+    },
+    {
+      "epoch": 0.22502,
+      "grad_norm": 1.2667296050947727,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 22502
+    },
+    {
+      "epoch": 0.22503,
+      "grad_norm": 1.1668549636608758,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 22503
+    },
+    {
+      "epoch": 0.22504,
+      "grad_norm": 1.3355331388831415,
+      "learning_rate": 0.003,
+      "loss": 4.0089,
+      "step": 22504
+    },
+    {
+      "epoch": 0.22505,
+      "grad_norm": 1.0516139840981271,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 22505
+    },
+    {
+      "epoch": 0.22506,
+      "grad_norm": 1.2657884767565837,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 22506
+    },
+    {
+      "epoch": 0.22507,
+      "grad_norm": 1.1069515007216801,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 22507
+    },
+    {
+      "epoch": 0.22508,
+      "grad_norm": 1.4797685514032357,
+      "learning_rate": 0.003,
+      "loss": 3.9999,
+      "step": 22508
+    },
+    {
+      "epoch": 0.22509,
+      "grad_norm": 1.3331713095821027,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 22509
+    },
+    {
+      "epoch": 0.2251,
+      "grad_norm": 1.325695469874847,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 22510
+    },
+    {
+      "epoch": 0.22511,
+      "grad_norm": 1.2735346279001936,
+      "learning_rate": 0.003,
+      "loss": 3.9865,
+      "step": 22511
+    },
+    {
+      "epoch": 0.22512,
+      "grad_norm": 1.4033627302463723,
+      "learning_rate": 0.003,
+      "loss": 4.0215,
+      "step": 22512
+    },
+    {
+      "epoch": 0.22513,
+      "grad_norm": 1.3205744061198856,
+      "learning_rate": 0.003,
+      "loss": 4.0,
+      "step": 22513
+    },
+    {
+      "epoch": 0.22514,
+      "grad_norm": 1.2786976067044686,
+      "learning_rate": 0.003,
+      "loss": 3.972,
+      "step": 22514
+    },
+    {
+      "epoch": 0.22515,
+      "grad_norm": 1.1967183198625957,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 22515
+    },
+    {
+      "epoch": 0.22516,
+      "grad_norm": 1.3560343394849093,
+      "learning_rate": 0.003,
+      "loss": 4.0284,
+      "step": 22516
+    },
+    {
+      "epoch": 0.22517,
+      "grad_norm": 1.189433701202624,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 22517
+    },
+    {
+      "epoch": 0.22518,
+      "grad_norm": 1.2774695542402534,
+      "learning_rate": 0.003,
+      "loss": 3.9969,
+      "step": 22518
+    },
+    {
+      "epoch": 0.22519,
+      "grad_norm": 1.2494959746096168,
+      "learning_rate": 0.003,
+      "loss": 4.0046,
+      "step": 22519
+    },
+    {
+      "epoch": 0.2252,
+      "grad_norm": 1.3637157744362591,
+      "learning_rate": 0.003,
+      "loss": 3.9702,
+      "step": 22520
+    },
+    {
+      "epoch": 0.22521,
+      "grad_norm": 1.2974566367114249,
+      "learning_rate": 0.003,
+      "loss": 3.9968,
+      "step": 22521
+    },
+    {
+      "epoch": 0.22522,
+      "grad_norm": 1.3921122498846437,
+      "learning_rate": 0.003,
+      "loss": 4.0043,
+      "step": 22522
+    },
+    {
+      "epoch": 0.22523,
+      "grad_norm": 1.0530428749059217,
+      "learning_rate": 0.003,
+      "loss": 4.0042,
+      "step": 22523
+    },
+    {
+      "epoch": 0.22524,
+      "grad_norm": 1.1981882029088853,
+      "learning_rate": 0.003,
+      "loss": 3.9978,
+      "step": 22524
+    },
+    {
+      "epoch": 0.22525,
+      "grad_norm": 1.3978282743326627,
+      "learning_rate": 0.003,
+      "loss": 3.9982,
+      "step": 22525
+    },
+    {
+      "epoch": 0.22526,
+      "grad_norm": 1.1309066836035648,
+      "learning_rate": 0.003,
+      "loss": 4.0234,
+      "step": 22526
+    },
+    {
+      "epoch": 0.22527,
+      "grad_norm": 1.402120511330208,
+      "learning_rate": 0.003,
+      "loss": 4.0261,
+      "step": 22527
+    },
+    {
+      "epoch": 0.22528,
+      "grad_norm": 1.193276079423319,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 22528
+    },
+    {
+      "epoch": 0.22529,
+      "grad_norm": 1.4208468301806532,
+      "learning_rate": 0.003,
+      "loss": 3.9978,
+      "step": 22529
+    },
+    {
+      "epoch": 0.2253,
+      "grad_norm": 1.3068348699133714,
+      "learning_rate": 0.003,
+      "loss": 3.9975,
+      "step": 22530
+    },
+    {
+      "epoch": 0.22531,
+      "grad_norm": 1.2105395465544653,
+      "learning_rate": 0.003,
+      "loss": 4.0225,
+      "step": 22531
+    },
+    {
+      "epoch": 0.22532,
+      "grad_norm": 1.3476057640658015,
+      "learning_rate": 0.003,
+      "loss": 4.0035,
+      "step": 22532
+    },
+    {
+      "epoch": 0.22533,
+      "grad_norm": 1.1901563536004314,
+      "learning_rate": 0.003,
+      "loss": 3.9714,
+      "step": 22533
+    },
+    {
+      "epoch": 0.22534,
+      "grad_norm": 1.480501284652599,
+      "learning_rate": 0.003,
+      "loss": 3.9883,
+      "step": 22534
+    },
+    {
+      "epoch": 0.22535,
+      "grad_norm": 1.1790775331287007,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 22535
+    },
+    {
+      "epoch": 0.22536,
+      "grad_norm": 1.5324231430857598,
+      "learning_rate": 0.003,
+      "loss": 4.0196,
+      "step": 22536
+    },
+    {
+      "epoch": 0.22537,
+      "grad_norm": 1.150663389226861,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 22537
+    },
+    {
+      "epoch": 0.22538,
+      "grad_norm": 1.3661009432470363,
+      "learning_rate": 0.003,
+      "loss": 4.0281,
+      "step": 22538
+    },
+    {
+      "epoch": 0.22539,
+      "grad_norm": 1.2684906441009884,
+      "learning_rate": 0.003,
+      "loss": 3.9828,
+      "step": 22539
+    },
+    {
+      "epoch": 0.2254,
+      "grad_norm": 1.1352216713885313,
+      "learning_rate": 0.003,
+      "loss": 3.988,
+      "step": 22540
+    },
+    {
+      "epoch": 0.22541,
+      "grad_norm": 1.407756237105997,
+      "learning_rate": 0.003,
+      "loss": 4.02,
+      "step": 22541
+    },
+    {
+      "epoch": 0.22542,
+      "grad_norm": 1.2123626575369322,
+      "learning_rate": 0.003,
+      "loss": 4.0072,
+      "step": 22542
+    },
+    {
+      "epoch": 0.22543,
+      "grad_norm": 1.4018773576403527,
+      "learning_rate": 0.003,
+      "loss": 4.0141,
+      "step": 22543
+    },
+    {
+      "epoch": 0.22544,
+      "grad_norm": 1.012553287215869,
+      "learning_rate": 0.003,
+      "loss": 3.9801,
+      "step": 22544
+    },
+    {
+      "epoch": 0.22545,
+      "grad_norm": 1.2819276736771017,
+      "learning_rate": 0.003,
+      "loss": 4.0135,
+      "step": 22545
+    },
+    {
+      "epoch": 0.22546,
+      "grad_norm": 1.2256899276499758,
+      "learning_rate": 0.003,
+      "loss": 3.9749,
+      "step": 22546
+    },
+    {
+      "epoch": 0.22547,
+      "grad_norm": 1.2367069129173276,
+      "learning_rate": 0.003,
+      "loss": 3.9965,
+      "step": 22547
+    },
+    {
+      "epoch": 0.22548,
+      "grad_norm": 1.3011635132460417,
+      "learning_rate": 0.003,
+      "loss": 4.0086,
+      "step": 22548
+    },
+    {
+      "epoch": 0.22549,
+      "grad_norm": 1.1949052284290673,
+      "learning_rate": 0.003,
+      "loss": 3.9858,
+      "step": 22549
+    },
+    {
+      "epoch": 0.2255,
+      "grad_norm": 1.3735955982482284,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 22550
+    },
+    {
+      "epoch": 0.22551,
+      "grad_norm": 1.2330551299434023,
+      "learning_rate": 0.003,
+      "loss": 4.0139,
+      "step": 22551
+    },
+    {
+      "epoch": 0.22552,
+      "grad_norm": 1.314844716656654,
+      "learning_rate": 0.003,
+      "loss": 4.0091,
+      "step": 22552
+    },
+    {
+      "epoch": 0.22553,
+      "grad_norm": 1.1822744599875548,
+      "learning_rate": 0.003,
+      "loss": 3.9856,
+      "step": 22553
+    },
+    {
+      "epoch": 0.22554,
+      "grad_norm": 1.261341409461104,
+      "learning_rate": 0.003,
+      "loss": 4.0241,
+      "step": 22554
+    },
+    {
+      "epoch": 0.22555,
+      "grad_norm": 1.3312845734067662,
+      "learning_rate": 0.003,
+      "loss": 4.0209,
+      "step": 22555
+    },
+    {
+      "epoch": 0.22556,
+      "grad_norm": 1.4750707717625615,
+      "learning_rate": 0.003,
+      "loss": 4.0144,
+      "step": 22556
+    },
+    {
+      "epoch": 0.22557,
+      "grad_norm": 1.2851941954440056,
+      "learning_rate": 0.003,
+      "loss": 4.0039,
+      "step": 22557
+    },
+    {
+      "epoch": 0.22558,
+      "grad_norm": 1.6197261487705839,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 22558
+    },
+    {
+      "epoch": 0.22559,
+      "grad_norm": 1.1350629273430315,
+      "learning_rate": 0.003,
+      "loss": 3.9964,
+      "step": 22559
+    },
+    {
+      "epoch": 0.2256,
+      "grad_norm": 1.3432054674670533,
+      "learning_rate": 0.003,
+      "loss": 4.0347,
+      "step": 22560
+    },
+    {
+      "epoch": 0.22561,
+      "grad_norm": 1.1518616379470412,
+      "learning_rate": 0.003,
+      "loss": 4.0321,
+      "step": 22561
+    },
+    {
+      "epoch": 0.22562,
+      "grad_norm": 1.7018484859224896,
+      "learning_rate": 0.003,
+      "loss": 4.0375,
+      "step": 22562
+    },
+    {
+      "epoch": 0.22563,
+      "grad_norm": 1.3328459301516782,
+      "learning_rate": 0.003,
+      "loss": 3.9999,
+      "step": 22563
+    },
+    {
+      "epoch": 0.22564,
+      "grad_norm": 1.2761301635040434,
+      "learning_rate": 0.003,
+      "loss": 3.9919,
+      "step": 22564
+    },
+    {
+      "epoch": 0.22565,
+      "grad_norm": 1.2785560852895093,
+      "learning_rate": 0.003,
+      "loss": 4.0329,
+      "step": 22565
+    },
+    {
+      "epoch": 0.22566,
+      "grad_norm": 1.2427043449987374,
+      "learning_rate": 0.003,
+      "loss": 4.0029,
+      "step": 22566
+    },
+    {
+      "epoch": 0.22567,
+      "grad_norm": 1.2908230903627478,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 22567
+    },
+    {
+      "epoch": 0.22568,
+      "grad_norm": 1.365953390780153,
+      "learning_rate": 0.003,
+      "loss": 3.99,
+      "step": 22568
+    },
+    {
+      "epoch": 0.22569,
+      "grad_norm": 1.41559439937492,
+      "learning_rate": 0.003,
+      "loss": 3.9936,
+      "step": 22569
+    },
+    {
+      "epoch": 0.2257,
+      "grad_norm": 1.3972146758020818,
+      "learning_rate": 0.003,
+      "loss": 3.972,
+      "step": 22570
+    },
+    {
+      "epoch": 0.22571,
+      "grad_norm": 1.3999232329901161,
+      "learning_rate": 0.003,
+      "loss": 4.0082,
+      "step": 22571
+    },
+    {
+      "epoch": 0.22572,
+      "grad_norm": 1.0708162190561128,
+      "learning_rate": 0.003,
+      "loss": 4.0153,
+      "step": 22572
+    },
+    {
+      "epoch": 0.22573,
+      "grad_norm": 1.4612954154755544,
+      "learning_rate": 0.003,
+      "loss": 3.996,
+      "step": 22573
+    },
+    {
+      "epoch": 0.22574,
+      "grad_norm": 1.1309516830080055,
+      "learning_rate": 0.003,
+      "loss": 4.005,
+      "step": 22574
+    },
+    {
+      "epoch": 0.22575,
+      "grad_norm": 1.5195621080772024,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 22575
+    },
+    {
+      "epoch": 0.22576,
+      "grad_norm": 1.0250110237725385,
+      "learning_rate": 0.003,
+      "loss": 3.9774,
+      "step": 22576
+    },
+    {
+      "epoch": 0.22577,
+      "grad_norm": 1.5801665822419138,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 22577
+    },
+    {
+      "epoch": 0.22578,
+      "grad_norm": 1.0389749983974395,
+      "learning_rate": 0.003,
+      "loss": 4.0017,
+      "step": 22578
+    },
+    {
+      "epoch": 0.22579,
+      "grad_norm": 1.5112914155231072,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 22579
+    },
+    {
+      "epoch": 0.2258,
+      "grad_norm": 1.0720968964380941,
+      "learning_rate": 0.003,
+      "loss": 3.9885,
+      "step": 22580
+    },
+    {
+      "epoch": 0.22581,
+      "grad_norm": 1.241106827345514,
+      "learning_rate": 0.003,
+      "loss": 3.9891,
+      "step": 22581
+    },
+    {
+      "epoch": 0.22582,
+      "grad_norm": 1.2506515871657455,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 22582
+    },
+    {
+      "epoch": 0.22583,
+      "grad_norm": 1.384275451855507,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 22583
+    },
+    {
+      "epoch": 0.22584,
+      "grad_norm": 1.128291891005495,
+      "learning_rate": 0.003,
+      "loss": 4.0277,
+      "step": 22584
+    },
+    {
+      "epoch": 0.22585,
+      "grad_norm": 1.2812055095464967,
+      "learning_rate": 0.003,
+      "loss": 4.0352,
+      "step": 22585
+    },
+    {
+      "epoch": 0.22586,
+      "grad_norm": 1.218863938109246,
+      "learning_rate": 0.003,
+      "loss": 4.0258,
+      "step": 22586
+    },
+    {
+      "epoch": 0.22587,
+      "grad_norm": 1.295842345107824,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 22587
+    },
+    {
+      "epoch": 0.22588,
+      "grad_norm": 1.1444092431768942,
+      "learning_rate": 0.003,
+      "loss": 4.0153,
+      "step": 22588
+    },
+    {
+      "epoch": 0.22589,
+      "grad_norm": 1.4820917294361375,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 22589
+    },
+    {
+      "epoch": 0.2259,
+      "grad_norm": 1.3195833351344128,
+      "learning_rate": 0.003,
+      "loss": 4.0014,
+      "step": 22590
+    },
+    {
+      "epoch": 0.22591,
+      "grad_norm": 1.1824542718186903,
+      "learning_rate": 0.003,
+      "loss": 4.0151,
+      "step": 22591
+    },
+    {
+      "epoch": 0.22592,
+      "grad_norm": 1.3947996822324285,
+      "learning_rate": 0.003,
+      "loss": 4.0367,
+      "step": 22592
+    },
+    {
+      "epoch": 0.22593,
+      "grad_norm": 1.465298742816563,
+      "learning_rate": 0.003,
+      "loss": 3.9928,
+      "step": 22593
+    },
+    {
+      "epoch": 0.22594,
+      "grad_norm": 1.2852371538931704,
+      "learning_rate": 0.003,
+      "loss": 4.004,
+      "step": 22594
+    },
+    {
+      "epoch": 0.22595,
+      "grad_norm": 1.184029159158004,
+      "learning_rate": 0.003,
+      "loss": 3.993,
+      "step": 22595
+    },
+    {
+      "epoch": 0.22596,
+      "grad_norm": 1.1856415404760932,
+      "learning_rate": 0.003,
+      "loss": 3.9997,
+      "step": 22596
+    },
+    {
+      "epoch": 0.22597,
+      "grad_norm": 1.3559871791289722,
+      "learning_rate": 0.003,
+      "loss": 4.0073,
+      "step": 22597
+    },
+    {
+      "epoch": 0.22598,
+      "grad_norm": 1.3398357796124907,
+      "learning_rate": 0.003,
+      "loss": 4.0146,
+      "step": 22598
+    },
+    {
+      "epoch": 0.22599,
+      "grad_norm": 1.523370787323789,
+      "learning_rate": 0.003,
+      "loss": 3.9955,
+      "step": 22599
+    },
+    {
+      "epoch": 0.226,
+      "grad_norm": 1.2531432765178037,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 22600
+    },
+    {
+      "epoch": 0.22601,
+      "grad_norm": 1.143294642361714,
+      "learning_rate": 0.003,
+      "loss": 4.0006,
+      "step": 22601
+    },
+    {
+      "epoch": 0.22602,
+      "grad_norm": 1.3169963541376843,
+      "learning_rate": 0.003,
+      "loss": 4.0024,
+      "step": 22602
+    },
+    {
+      "epoch": 0.22603,
+      "grad_norm": 1.4222987505508788,
+      "learning_rate": 0.003,
+      "loss": 3.9852,
+      "step": 22603
+    },
+    {
+      "epoch": 0.22604,
+      "grad_norm": 1.2706998123354118,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 22604
+    },
+    {
+      "epoch": 0.22605,
+      "grad_norm": 1.2187559012257394,
+      "learning_rate": 0.003,
+      "loss": 4.0009,
+      "step": 22605
+    },
+    {
+      "epoch": 0.22606,
+      "grad_norm": 1.3872550597309237,
+      "learning_rate": 0.003,
+      "loss": 4.026,
+      "step": 22606
+    },
+    {
+      "epoch": 0.22607,
+      "grad_norm": 1.154436274123666,
+      "learning_rate": 0.003,
+      "loss": 4.0068,
+      "step": 22607
+    },
+    {
+      "epoch": 0.22608,
+      "grad_norm": 1.312451210717472,
+      "learning_rate": 0.003,
+      "loss": 4.0132,
+      "step": 22608
+    },
+    {
+      "epoch": 0.22609,
+      "grad_norm": 1.261350458196559,
+      "learning_rate": 0.003,
+      "loss": 4.0351,
+      "step": 22609
+    },
+    {
+      "epoch": 0.2261,
+      "grad_norm": 1.3203880332558129,
+      "learning_rate": 0.003,
+      "loss": 3.9966,
+      "step": 22610
+    },
+    {
+      "epoch": 0.22611,
+      "grad_norm": 1.1236509757293136,
+      "learning_rate": 0.003,
+      "loss": 3.9654,
+      "step": 22611
+    },
+    {
+      "epoch": 0.22612,
+      "grad_norm": 1.516190333816063,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 22612
+    },
+    {
+      "epoch": 0.22613,
+      "grad_norm": 1.1536265021848324,
+      "learning_rate": 0.003,
+      "loss": 4.0346,
+      "step": 22613
+    },
+    {
+      "epoch": 0.22614,
+      "grad_norm": 1.510650175065345,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 22614
+    },
+    {
+      "epoch": 0.22615,
+      "grad_norm": 1.2024622150866966,
+      "learning_rate": 0.003,
+      "loss": 4.0002,
+      "step": 22615
+    },
+    {
+      "epoch": 0.22616,
+      "grad_norm": 1.3077553086997582,
+      "learning_rate": 0.003,
+      "loss": 3.9918,
+      "step": 22616
+    },
+    {
+      "epoch": 0.22617,
+      "grad_norm": 1.3713083510427642,
+      "learning_rate": 0.003,
+      "loss": 3.996,
+      "step": 22617
+    },
+    {
+      "epoch": 0.22618,
+      "grad_norm": 1.302385148835563,
+      "learning_rate": 0.003,
+      "loss": 4.0372,
+      "step": 22618
+    },
+    {
+      "epoch": 0.22619,
+      "grad_norm": 1.3502416243820476,
+      "learning_rate": 0.003,
+      "loss": 4.0123,
+      "step": 22619
+    },
+    {
+      "epoch": 0.2262,
+      "grad_norm": 1.1416749597625964,
+      "learning_rate": 0.003,
+      "loss": 3.9992,
+      "step": 22620
+    },
+    {
+      "epoch": 0.22621,
+      "grad_norm": 1.3896846242144427,
+      "learning_rate": 0.003,
+      "loss": 3.998,
+      "step": 22621
+    },
+    {
+      "epoch": 0.22622,
+      "grad_norm": 1.1686628491282742,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 22622
+    },
+    {
+      "epoch": 0.22623,
+      "grad_norm": 1.1366618720437742,
+      "learning_rate": 0.003,
+      "loss": 3.986,
+      "step": 22623
+    },
+    {
+      "epoch": 0.22624,
+      "grad_norm": 1.2338838257084186,
+      "learning_rate": 0.003,
+      "loss": 3.9966,
+      "step": 22624
+    },
+    {
+      "epoch": 0.22625,
+      "grad_norm": 1.362431330249743,
+      "learning_rate": 0.003,
+      "loss": 4.0069,
+      "step": 22625
+    },
+    {
+      "epoch": 0.22626,
+      "grad_norm": 1.28955499422325,
+      "learning_rate": 0.003,
+      "loss": 3.9924,
+      "step": 22626
+    },
+    {
+      "epoch": 0.22627,
+      "grad_norm": 1.7146963290827013,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 22627
+    },
+    {
+      "epoch": 0.22628,
+      "grad_norm": 1.0291134014104808,
+      "learning_rate": 0.003,
+      "loss": 4.0303,
+      "step": 22628
+    },
+    {
+      "epoch": 0.22629,
+      "grad_norm": 1.3694763736833087,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 22629
+    },
+    {
+      "epoch": 0.2263,
+      "grad_norm": 1.344749896454369,
+      "learning_rate": 0.003,
+      "loss": 3.9973,
+      "step": 22630
+    },
+    {
+      "epoch": 0.22631,
+      "grad_norm": 1.3487502694733515,
+      "learning_rate": 0.003,
+      "loss": 4.0261,
+      "step": 22631
+    },
+    {
+      "epoch": 0.22632,
+      "grad_norm": 1.3339342473622937,
+      "learning_rate": 0.003,
+      "loss": 4.004,
+      "step": 22632
+    },
+    {
+      "epoch": 0.22633,
+      "grad_norm": 1.288092097888253,
+      "learning_rate": 0.003,
+      "loss": 4.0345,
+      "step": 22633
+    },
+    {
+      "epoch": 0.22634,
+      "grad_norm": 1.3827756773928728,
+      "learning_rate": 0.003,
+      "loss": 3.999,
+      "step": 22634
+    },
+    {
+      "epoch": 0.22635,
+      "grad_norm": 1.3180999257311339,
+      "learning_rate": 0.003,
+      "loss": 3.9909,
+      "step": 22635
+    },
+    {
+      "epoch": 0.22636,
+      "grad_norm": 1.1915745714720076,
+      "learning_rate": 0.003,
+      "loss": 4.0047,
+      "step": 22636
+    },
+    {
+      "epoch": 0.22637,
+      "grad_norm": 1.2526184464767351,
+      "learning_rate": 0.003,
+      "loss": 4.0139,
+      "step": 22637
+    },
+    {
+      "epoch": 0.22638,
+      "grad_norm": 1.3421117077385427,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 22638
+    },
+    {
+      "epoch": 0.22639,
+      "grad_norm": 1.0806047936653145,
+      "learning_rate": 0.003,
+      "loss": 4.0026,
+      "step": 22639
+    },
+    {
+      "epoch": 0.2264,
+      "grad_norm": 1.681711614665692,
+      "learning_rate": 0.003,
+      "loss": 4.0207,
+      "step": 22640
+    },
+    {
+      "epoch": 0.22641,
+      "grad_norm": 1.0514764874336213,
+      "learning_rate": 0.003,
+      "loss": 4.0151,
+      "step": 22641
+    },
+    {
+      "epoch": 0.22642,
+      "grad_norm": 1.4319170581123186,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 22642
+    },
+    {
+      "epoch": 0.22643,
+      "grad_norm": 1.1123355463603788,
+      "learning_rate": 0.003,
+      "loss": 4.0283,
+      "step": 22643
+    },
+    {
+      "epoch": 0.22644,
+      "grad_norm": 1.52155418318171,
+      "learning_rate": 0.003,
+      "loss": 4.0026,
+      "step": 22644
+    },
+    {
+      "epoch": 0.22645,
+      "grad_norm": 1.1352406373618085,
+      "learning_rate": 0.003,
+      "loss": 4.007,
+      "step": 22645
+    },
+    {
+      "epoch": 0.22646,
+      "grad_norm": 1.2290977060489594,
+      "learning_rate": 0.003,
+      "loss": 4.0422,
+      "step": 22646
+    },
+    {
+      "epoch": 0.22647,
+      "grad_norm": 1.3277620476486571,
+      "learning_rate": 0.003,
+      "loss": 3.9782,
+      "step": 22647
+    },
+    {
+      "epoch": 0.22648,
+      "grad_norm": 1.0636001330176454,
+      "learning_rate": 0.003,
+      "loss": 4.0464,
+      "step": 22648
+    },
+    {
+      "epoch": 0.22649,
+      "grad_norm": 1.3711566913833808,
+      "learning_rate": 0.003,
+      "loss": 3.9981,
+      "step": 22649
+    },
+    {
+      "epoch": 0.2265,
+      "grad_norm": 1.1612570101359865,
+      "learning_rate": 0.003,
+      "loss": 3.9833,
+      "step": 22650
+    },
+    {
+      "epoch": 0.22651,
+      "grad_norm": 1.2849569421749862,
+      "learning_rate": 0.003,
+      "loss": 4.005,
+      "step": 22651
+    },
+    {
+      "epoch": 0.22652,
+      "grad_norm": 1.2747061899172882,
+      "learning_rate": 0.003,
+      "loss": 4.0252,
+      "step": 22652
+    },
+    {
+      "epoch": 0.22653,
+      "grad_norm": 1.3756234290752092,
+      "learning_rate": 0.003,
+      "loss": 4.0209,
+      "step": 22653
+    },
+    {
+      "epoch": 0.22654,
+      "grad_norm": 1.224635382035314,
+      "learning_rate": 0.003,
+      "loss": 3.9951,
+      "step": 22654
+    },
+    {
+      "epoch": 0.22655,
+      "grad_norm": 1.322687108173434,
+      "learning_rate": 0.003,
+      "loss": 4.0073,
+      "step": 22655
+    },
+    {
+      "epoch": 0.22656,
+      "grad_norm": 1.211523466768888,
+      "learning_rate": 0.003,
+      "loss": 3.9914,
+      "step": 22656
+    },
+    {
+      "epoch": 0.22657,
+      "grad_norm": 1.3941544934936156,
+      "learning_rate": 0.003,
+      "loss": 4.0278,
+      "step": 22657
+    },
+    {
+      "epoch": 0.22658,
+      "grad_norm": 1.03253008654033,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 22658
+    },
+    {
+      "epoch": 0.22659,
+      "grad_norm": 1.4165446622659172,
+      "learning_rate": 0.003,
+      "loss": 3.9892,
+      "step": 22659
+    },
+    {
+      "epoch": 0.2266,
+      "grad_norm": 1.1166518829056626,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 22660
+    },
+    {
+      "epoch": 0.22661,
+      "grad_norm": 1.329738440577359,
+      "learning_rate": 0.003,
+      "loss": 4.0362,
+      "step": 22661
+    },
+    {
+      "epoch": 0.22662,
+      "grad_norm": 1.339974032292337,
+      "learning_rate": 0.003,
+      "loss": 3.9982,
+      "step": 22662
+    },
+    {
+      "epoch": 0.22663,
+      "grad_norm": 1.3420308328724613,
+      "learning_rate": 0.003,
+      "loss": 3.9953,
+      "step": 22663
+    },
+    {
+      "epoch": 0.22664,
+      "grad_norm": 1.7404996508213633,
+      "learning_rate": 0.003,
+      "loss": 4.0258,
+      "step": 22664
+    },
+    {
+      "epoch": 0.22665,
+      "grad_norm": 1.1168402894042904,
+      "learning_rate": 0.003,
+      "loss": 4.0396,
+      "step": 22665
+    },
+    {
+      "epoch": 0.22666,
+      "grad_norm": 1.3510222486668735,
+      "learning_rate": 0.003,
+      "loss": 4.034,
+      "step": 22666
+    },
+    {
+      "epoch": 0.22667,
+      "grad_norm": 1.5358046354153914,
+      "learning_rate": 0.003,
+      "loss": 4.0093,
+      "step": 22667
+    },
+    {
+      "epoch": 0.22668,
+      "grad_norm": 1.0820371583145305,
+      "learning_rate": 0.003,
+      "loss": 3.9804,
+      "step": 22668
+    },
+    {
+      "epoch": 0.22669,
+      "grad_norm": 1.5502182147457255,
+      "learning_rate": 0.003,
+      "loss": 3.9951,
+      "step": 22669
+    },
+    {
+      "epoch": 0.2267,
+      "grad_norm": 1.1833555666865883,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 22670
+    },
+    {
+      "epoch": 0.22671,
+      "grad_norm": 1.3153302888051472,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 22671
+    },
+    {
+      "epoch": 0.22672,
+      "grad_norm": 1.3509195079713994,
+      "learning_rate": 0.003,
+      "loss": 4.0153,
+      "step": 22672
+    },
+    {
+      "epoch": 0.22673,
+      "grad_norm": 1.1767025840581729,
+      "learning_rate": 0.003,
+      "loss": 3.9713,
+      "step": 22673
+    },
+    {
+      "epoch": 0.22674,
+      "grad_norm": 1.401166177304556,
+      "learning_rate": 0.003,
+      "loss": 3.9929,
+      "step": 22674
+    },
+    {
+      "epoch": 0.22675,
+      "grad_norm": 1.208076602090131,
+      "learning_rate": 0.003,
+      "loss": 4.0373,
+      "step": 22675
+    },
+    {
+      "epoch": 0.22676,
+      "grad_norm": 1.3703163665668583,
+      "learning_rate": 0.003,
+      "loss": 3.9968,
+      "step": 22676
+    },
+    {
+      "epoch": 0.22677,
+      "grad_norm": 1.1119238405214589,
+      "learning_rate": 0.003,
+      "loss": 4.0057,
+      "step": 22677
+    },
+    {
+      "epoch": 0.22678,
+      "grad_norm": 1.3176088296069648,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 22678
+    },
+    {
+      "epoch": 0.22679,
+      "grad_norm": 1.0025180273112697,
+      "learning_rate": 0.003,
+      "loss": 3.987,
+      "step": 22679
+    },
+    {
+      "epoch": 0.2268,
+      "grad_norm": 1.622093486398929,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 22680
+    },
+    {
+      "epoch": 0.22681,
+      "grad_norm": 1.0796495265920687,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 22681
+    },
+    {
+      "epoch": 0.22682,
+      "grad_norm": 1.458255102300656,
+      "learning_rate": 0.003,
+      "loss": 4.015,
+      "step": 22682
+    },
+    {
+      "epoch": 0.22683,
+      "grad_norm": 1.1863767853885696,
+      "learning_rate": 0.003,
+      "loss": 4.0122,
+      "step": 22683
+    },
+    {
+      "epoch": 0.22684,
+      "grad_norm": 1.4155131305168378,
+      "learning_rate": 0.003,
+      "loss": 4.0181,
+      "step": 22684
+    },
+    {
+      "epoch": 0.22685,
+      "grad_norm": 1.2519010111671314,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 22685
+    },
+    {
+      "epoch": 0.22686,
+      "grad_norm": 1.325647042144286,
+      "learning_rate": 0.003,
+      "loss": 3.9948,
+      "step": 22686
+    },
+    {
+      "epoch": 0.22687,
+      "grad_norm": 1.2927877885850034,
+      "learning_rate": 0.003,
+      "loss": 4.0229,
+      "step": 22687
+    },
+    {
+      "epoch": 0.22688,
+      "grad_norm": 1.4142299805194503,
+      "learning_rate": 0.003,
+      "loss": 4.0187,
+      "step": 22688
+    },
+    {
+      "epoch": 0.22689,
+      "grad_norm": 1.0213531880511948,
+      "learning_rate": 0.003,
+      "loss": 4.0153,
+      "step": 22689
+    },
+    {
+      "epoch": 0.2269,
+      "grad_norm": 1.439566420734954,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 22690
+    },
+    {
+      "epoch": 0.22691,
+      "grad_norm": 1.1493651608460327,
+      "learning_rate": 0.003,
+      "loss": 4.0382,
+      "step": 22691
+    },
+    {
+      "epoch": 0.22692,
+      "grad_norm": 1.2329940903779464,
+      "learning_rate": 0.003,
+      "loss": 4.0002,
+      "step": 22692
+    },
+    {
+      "epoch": 0.22693,
+      "grad_norm": 1.1119939554651543,
+      "learning_rate": 0.003,
+      "loss": 3.9922,
+      "step": 22693
+    },
+    {
+      "epoch": 0.22694,
+      "grad_norm": 1.3452540902010601,
+      "learning_rate": 0.003,
+      "loss": 4.0113,
+      "step": 22694
+    },
+    {
+      "epoch": 0.22695,
+      "grad_norm": 1.2276457733559367,
+      "learning_rate": 0.003,
+      "loss": 3.9986,
+      "step": 22695
+    },
+    {
+      "epoch": 0.22696,
+      "grad_norm": 1.2232044632210122,
+      "learning_rate": 0.003,
+      "loss": 4.0028,
+      "step": 22696
+    },
+    {
+      "epoch": 0.22697,
+      "grad_norm": 1.092705683566609,
+      "learning_rate": 0.003,
+      "loss": 4.028,
+      "step": 22697
+    },
+    {
+      "epoch": 0.22698,
+      "grad_norm": 1.4952685263370296,
+      "learning_rate": 0.003,
+      "loss": 3.9836,
+      "step": 22698
+    },
+    {
+      "epoch": 0.22699,
+      "grad_norm": 1.2916916079493808,
+      "learning_rate": 0.003,
+      "loss": 3.995,
+      "step": 22699
+    },
+    {
+      "epoch": 0.227,
+      "grad_norm": 1.5561136938456746,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 22700
+    },
+    {
+      "epoch": 0.22701,
+      "grad_norm": 1.089198568191241,
+      "learning_rate": 0.003,
+      "loss": 4.0338,
+      "step": 22701
+    },
+    {
+      "epoch": 0.22702,
+      "grad_norm": 1.1667141875785318,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 22702
+    },
+    {
+      "epoch": 0.22703,
+      "grad_norm": 1.367908714713294,
+      "learning_rate": 0.003,
+      "loss": 4.0005,
+      "step": 22703
+    },
+    {
+      "epoch": 0.22704,
+      "grad_norm": 1.3122276285520316,
+      "learning_rate": 0.003,
+      "loss": 4.0105,
+      "step": 22704
+    },
+    {
+      "epoch": 0.22705,
+      "grad_norm": 1.1676473420160018,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 22705
+    },
+    {
+      "epoch": 0.22706,
+      "grad_norm": 1.2765880187055063,
+      "learning_rate": 0.003,
+      "loss": 3.9936,
+      "step": 22706
+    },
+    {
+      "epoch": 0.22707,
+      "grad_norm": 1.1672732123680687,
+      "learning_rate": 0.003,
+      "loss": 4.0116,
+      "step": 22707
+    },
+    {
+      "epoch": 0.22708,
+      "grad_norm": 1.4299937417422413,
+      "learning_rate": 0.003,
+      "loss": 3.9679,
+      "step": 22708
+    },
+    {
+      "epoch": 0.22709,
+      "grad_norm": 1.1259478081382068,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 22709
+    },
+    {
+      "epoch": 0.2271,
+      "grad_norm": 1.4214091919953824,
+      "learning_rate": 0.003,
+      "loss": 3.9984,
+      "step": 22710
+    },
+    {
+      "epoch": 0.22711,
+      "grad_norm": 1.1926596322877276,
+      "learning_rate": 0.003,
+      "loss": 4.0039,
+      "step": 22711
+    },
+    {
+      "epoch": 0.22712,
+      "grad_norm": 1.4080530469812624,
+      "learning_rate": 0.003,
+      "loss": 3.9889,
+      "step": 22712
+    },
+    {
+      "epoch": 0.22713,
+      "grad_norm": 1.277048343967381,
+      "learning_rate": 0.003,
+      "loss": 3.9934,
+      "step": 22713
+    },
+    {
+      "epoch": 0.22714,
+      "grad_norm": 1.2275804333775504,
+      "learning_rate": 0.003,
+      "loss": 4.0229,
+      "step": 22714
+    },
+    {
+      "epoch": 0.22715,
+      "grad_norm": 1.516484480071817,
+      "learning_rate": 0.003,
+      "loss": 3.9899,
+      "step": 22715
+    },
+    {
+      "epoch": 0.22716,
+      "grad_norm": 1.4161698830130096,
+      "learning_rate": 0.003,
+      "loss": 4.0029,
+      "step": 22716
+    },
+    {
+      "epoch": 0.22717,
+      "grad_norm": 1.1868902238898147,
+      "learning_rate": 0.003,
+      "loss": 4.0296,
+      "step": 22717
+    },
+    {
+      "epoch": 0.22718,
+      "grad_norm": 1.238807394779925,
+      "learning_rate": 0.003,
+      "loss": 4.0213,
+      "step": 22718
+    },
+    {
+      "epoch": 0.22719,
+      "grad_norm": 1.3244501090164655,
+      "learning_rate": 0.003,
+      "loss": 4.0172,
+      "step": 22719
+    },
+    {
+      "epoch": 0.2272,
+      "grad_norm": 1.429126986961252,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 22720
+    },
+    {
+      "epoch": 0.22721,
+      "grad_norm": 1.191024433896257,
+      "learning_rate": 0.003,
+      "loss": 3.9981,
+      "step": 22721
+    },
+    {
+      "epoch": 0.22722,
+      "grad_norm": 1.4788473156121986,
+      "learning_rate": 0.003,
+      "loss": 4.0065,
+      "step": 22722
+    },
+    {
+      "epoch": 0.22723,
+      "grad_norm": 0.9480239215806185,
+      "learning_rate": 0.003,
+      "loss": 4.0002,
+      "step": 22723
+    },
+    {
+      "epoch": 0.22724,
+      "grad_norm": 1.3369517113877827,
+      "learning_rate": 0.003,
+      "loss": 4.0057,
+      "step": 22724
+    },
+    {
+      "epoch": 0.22725,
+      "grad_norm": 1.2987791469120658,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 22725
+    },
+    {
+      "epoch": 0.22726,
+      "grad_norm": 1.319096280570226,
+      "learning_rate": 0.003,
+      "loss": 4.0093,
+      "step": 22726
+    },
+    {
+      "epoch": 0.22727,
+      "grad_norm": 0.996933811249999,
+      "learning_rate": 0.003,
+      "loss": 4.0169,
+      "step": 22727
+    },
+    {
+      "epoch": 0.22728,
+      "grad_norm": 1.480822375034368,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 22728
+    },
+    {
+      "epoch": 0.22729,
+      "grad_norm": 1.1032672456592745,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 22729
+    },
+    {
+      "epoch": 0.2273,
+      "grad_norm": 1.5166424066586064,
+      "learning_rate": 0.003,
+      "loss": 4.0252,
+      "step": 22730
+    },
+    {
+      "epoch": 0.22731,
+      "grad_norm": 1.2183091089535036,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 22731
+    },
+    {
+      "epoch": 0.22732,
+      "grad_norm": 1.4044727565582245,
+      "learning_rate": 0.003,
+      "loss": 4.0317,
+      "step": 22732
+    },
+    {
+      "epoch": 0.22733,
+      "grad_norm": 1.360224076345397,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 22733
+    },
+    {
+      "epoch": 0.22734,
+      "grad_norm": 1.1131519252837947,
+      "learning_rate": 0.003,
+      "loss": 4.0154,
+      "step": 22734
+    },
+    {
+      "epoch": 0.22735,
+      "grad_norm": 1.3309712499293276,
+      "learning_rate": 0.003,
+      "loss": 3.9999,
+      "step": 22735
+    },
+    {
+      "epoch": 0.22736,
+      "grad_norm": 1.2211104832637052,
+      "learning_rate": 0.003,
+      "loss": 3.9978,
+      "step": 22736
+    },
+    {
+      "epoch": 0.22737,
+      "grad_norm": 1.315777349617868,
+      "learning_rate": 0.003,
+      "loss": 3.9998,
+      "step": 22737
+    },
+    {
+      "epoch": 0.22738,
+      "grad_norm": 1.1229049250699499,
+      "learning_rate": 0.003,
+      "loss": 4.0313,
+      "step": 22738
+    },
+    {
+      "epoch": 0.22739,
+      "grad_norm": 1.4508994692471764,
+      "learning_rate": 0.003,
+      "loss": 4.0396,
+      "step": 22739
+    },
+    {
+      "epoch": 0.2274,
+      "grad_norm": 1.1296209105617934,
+      "learning_rate": 0.003,
+      "loss": 4.004,
+      "step": 22740
+    },
+    {
+      "epoch": 0.22741,
+      "grad_norm": 1.5814147830118586,
+      "learning_rate": 0.003,
+      "loss": 4.0182,
+      "step": 22741
+    },
+    {
+      "epoch": 0.22742,
+      "grad_norm": 1.244280088074894,
+      "learning_rate": 0.003,
+      "loss": 4.002,
+      "step": 22742
+    },
+    {
+      "epoch": 0.22743,
+      "grad_norm": 1.3771878277840803,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 22743
+    },
+    {
+      "epoch": 0.22744,
+      "grad_norm": 1.0938646572428499,
+      "learning_rate": 0.003,
+      "loss": 4.0229,
+      "step": 22744
+    },
+    {
+      "epoch": 0.22745,
+      "grad_norm": 1.6885280821316384,
+      "learning_rate": 0.003,
+      "loss": 4.012,
+      "step": 22745
+    },
+    {
+      "epoch": 0.22746,
+      "grad_norm": 1.2561041739303762,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 22746
+    },
+    {
+      "epoch": 0.22747,
+      "grad_norm": 1.3866286215842691,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 22747
+    },
+    {
+      "epoch": 0.22748,
+      "grad_norm": 1.180524357618422,
+      "learning_rate": 0.003,
+      "loss": 3.9803,
+      "step": 22748
+    },
+    {
+      "epoch": 0.22749,
+      "grad_norm": 1.325929964568727,
+      "learning_rate": 0.003,
+      "loss": 3.9833,
+      "step": 22749
+    },
+    {
+      "epoch": 0.2275,
+      "grad_norm": 1.2224316623480622,
+      "learning_rate": 0.003,
+      "loss": 3.997,
+      "step": 22750
+    },
+    {
+      "epoch": 0.22751,
+      "grad_norm": 1.3109630892786908,
+      "learning_rate": 0.003,
+      "loss": 4.0061,
+      "step": 22751
+    },
+    {
+      "epoch": 0.22752,
+      "grad_norm": 1.2636036183934982,
+      "learning_rate": 0.003,
+      "loss": 3.9905,
+      "step": 22752
+    },
+    {
+      "epoch": 0.22753,
+      "grad_norm": 1.3533393580968458,
+      "learning_rate": 0.003,
+      "loss": 3.9688,
+      "step": 22753
+    },
+    {
+      "epoch": 0.22754,
+      "grad_norm": 1.5045177587006389,
+      "learning_rate": 0.003,
+      "loss": 4.0041,
+      "step": 22754
+    },
+    {
+      "epoch": 0.22755,
+      "grad_norm": 1.4323337839677488,
+      "learning_rate": 0.003,
+      "loss": 4.0032,
+      "step": 22755
+    },
+    {
+      "epoch": 0.22756,
+      "grad_norm": 1.221787308114852,
+      "learning_rate": 0.003,
+      "loss": 4.0058,
+      "step": 22756
+    },
+    {
+      "epoch": 0.22757,
+      "grad_norm": 1.0876844285689602,
+      "learning_rate": 0.003,
+      "loss": 3.9981,
+      "step": 22757
+    },
+    {
+      "epoch": 0.22758,
+      "grad_norm": 1.414620316731395,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 22758
+    },
+    {
+      "epoch": 0.22759,
+      "grad_norm": 1.0328304338321266,
+      "learning_rate": 0.003,
+      "loss": 3.9919,
+      "step": 22759
+    },
+    {
+      "epoch": 0.2276,
+      "grad_norm": 1.4628365968610932,
+      "learning_rate": 0.003,
+      "loss": 4.0187,
+      "step": 22760
+    },
+    {
+      "epoch": 0.22761,
+      "grad_norm": 1.2817754831478003,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 22761
+    },
+    {
+      "epoch": 0.22762,
+      "grad_norm": 1.1938995748149848,
+      "learning_rate": 0.003,
+      "loss": 4.0353,
+      "step": 22762
+    },
+    {
+      "epoch": 0.22763,
+      "grad_norm": 1.3621430145835665,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 22763
+    },
+    {
+      "epoch": 0.22764,
+      "grad_norm": 1.1485114604817948,
+      "learning_rate": 0.003,
+      "loss": 4.0163,
+      "step": 22764
+    },
+    {
+      "epoch": 0.22765,
+      "grad_norm": 1.2149396519901305,
+      "learning_rate": 0.003,
+      "loss": 3.9898,
+      "step": 22765
+    },
+    {
+      "epoch": 0.22766,
+      "grad_norm": 1.1898689189185576,
+      "learning_rate": 0.003,
+      "loss": 4.0079,
+      "step": 22766
+    },
+    {
+      "epoch": 0.22767,
+      "grad_norm": 1.5825649403769346,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 22767
+    },
+    {
+      "epoch": 0.22768,
+      "grad_norm": 1.0051115497847125,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 22768
+    },
+    {
+      "epoch": 0.22769,
+      "grad_norm": 1.4798658185655087,
+      "learning_rate": 0.003,
+      "loss": 3.9918,
+      "step": 22769
+    },
+    {
+      "epoch": 0.2277,
+      "grad_norm": 1.272460394555835,
+      "learning_rate": 0.003,
+      "loss": 3.9995,
+      "step": 22770
+    },
+    {
+      "epoch": 0.22771,
+      "grad_norm": 1.2295290717464704,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 22771
+    },
+    {
+      "epoch": 0.22772,
+      "grad_norm": 1.3010301018455468,
+      "learning_rate": 0.003,
+      "loss": 4.0069,
+      "step": 22772
+    },
+    {
+      "epoch": 0.22773,
+      "grad_norm": 1.360084570754532,
+      "learning_rate": 0.003,
+      "loss": 3.9925,
+      "step": 22773
+    },
+    {
+      "epoch": 0.22774,
+      "grad_norm": 1.3810630101246375,
+      "learning_rate": 0.003,
+      "loss": 3.9853,
+      "step": 22774
+    },
+    {
+      "epoch": 0.22775,
+      "grad_norm": 1.2490185285535607,
+      "learning_rate": 0.003,
+      "loss": 3.9915,
+      "step": 22775
+    },
+    {
+      "epoch": 0.22776,
+      "grad_norm": 1.316094824869895,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 22776
+    },
+    {
+      "epoch": 0.22777,
+      "grad_norm": 1.0935187339694499,
+      "learning_rate": 0.003,
+      "loss": 4.0011,
+      "step": 22777
+    },
+    {
+      "epoch": 0.22778,
+      "grad_norm": 1.3636755997242829,
+      "learning_rate": 0.003,
+      "loss": 3.9861,
+      "step": 22778
+    },
+    {
+      "epoch": 0.22779,
+      "grad_norm": 1.4046544075301732,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 22779
+    },
+    {
+      "epoch": 0.2278,
+      "grad_norm": 1.138528330728008,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 22780
+    },
+    {
+      "epoch": 0.22781,
+      "grad_norm": 1.4035572190800416,
+      "learning_rate": 0.003,
+      "loss": 4.0082,
+      "step": 22781
+    },
+    {
+      "epoch": 0.22782,
+      "grad_norm": 1.225482448077427,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 22782
+    },
+    {
+      "epoch": 0.22783,
+      "grad_norm": 1.336110439176084,
+      "learning_rate": 0.003,
+      "loss": 3.9939,
+      "step": 22783
+    },
+    {
+      "epoch": 0.22784,
+      "grad_norm": 1.2352250782641265,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 22784
+    },
+    {
+      "epoch": 0.22785,
+      "grad_norm": 1.2880238440120177,
+      "learning_rate": 0.003,
+      "loss": 3.9967,
+      "step": 22785
+    },
+    {
+      "epoch": 0.22786,
+      "grad_norm": 1.2365842646430607,
+      "learning_rate": 0.003,
+      "loss": 3.9623,
+      "step": 22786
+    },
+    {
+      "epoch": 0.22787,
+      "grad_norm": 1.1775246177798386,
+      "learning_rate": 0.003,
+      "loss": 4.0182,
+      "step": 22787
+    },
+    {
+      "epoch": 0.22788,
+      "grad_norm": 1.3552082518559576,
+      "learning_rate": 0.003,
+      "loss": 4.0212,
+      "step": 22788
+    },
+    {
+      "epoch": 0.22789,
+      "grad_norm": 1.2440743984519547,
+      "learning_rate": 0.003,
+      "loss": 4.0163,
+      "step": 22789
+    },
+    {
+      "epoch": 0.2279,
+      "grad_norm": 1.4526980493121227,
+      "learning_rate": 0.003,
+      "loss": 4.0259,
+      "step": 22790
+    },
+    {
+      "epoch": 0.22791,
+      "grad_norm": 1.2376577863670084,
+      "learning_rate": 0.003,
+      "loss": 4.0096,
+      "step": 22791
+    },
+    {
+      "epoch": 0.22792,
+      "grad_norm": 1.1957332778745147,
+      "learning_rate": 0.003,
+      "loss": 3.9832,
+      "step": 22792
+    },
+    {
+      "epoch": 0.22793,
+      "grad_norm": 1.319609375118359,
+      "learning_rate": 0.003,
+      "loss": 3.9725,
+      "step": 22793
+    },
+    {
+      "epoch": 0.22794,
+      "grad_norm": 1.0217913840247175,
+      "learning_rate": 0.003,
+      "loss": 3.9927,
+      "step": 22794
+    },
+    {
+      "epoch": 0.22795,
+      "grad_norm": 1.3868513507403712,
+      "learning_rate": 0.003,
+      "loss": 4.0543,
+      "step": 22795
+    },
+    {
+      "epoch": 0.22796,
+      "grad_norm": 1.0914213545388554,
+      "learning_rate": 0.003,
+      "loss": 4.0023,
+      "step": 22796
+    },
+    {
+      "epoch": 0.22797,
+      "grad_norm": 1.3911119927153326,
+      "learning_rate": 0.003,
+      "loss": 4.0059,
+      "step": 22797
+    },
+    {
+      "epoch": 0.22798,
+      "grad_norm": 1.3768223117572398,
+      "learning_rate": 0.003,
+      "loss": 4.0223,
+      "step": 22798
+    },
+    {
+      "epoch": 0.22799,
+      "grad_norm": 1.113786777072013,
+      "learning_rate": 0.003,
+      "loss": 4.0105,
+      "step": 22799
+    },
+    {
+      "epoch": 0.228,
+      "grad_norm": 1.4685965350496215,
+      "learning_rate": 0.003,
+      "loss": 3.9985,
+      "step": 22800
+    },
+    {
+      "epoch": 0.22801,
+      "grad_norm": 1.0282444937554793,
+      "learning_rate": 0.003,
+      "loss": 4.0091,
+      "step": 22801
+    },
+    {
+      "epoch": 0.22802,
+      "grad_norm": 1.4126466335694734,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 22802
+    },
+    {
+      "epoch": 0.22803,
+      "grad_norm": 1.231298317673242,
+      "learning_rate": 0.003,
+      "loss": 4.0214,
+      "step": 22803
+    },
+    {
+      "epoch": 0.22804,
+      "grad_norm": 1.543613864181843,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 22804
+    },
+    {
+      "epoch": 0.22805,
+      "grad_norm": 1.1763218352543272,
+      "learning_rate": 0.003,
+      "loss": 3.9944,
+      "step": 22805
+    },
+    {
+      "epoch": 0.22806,
+      "grad_norm": 1.641364788025431,
+      "learning_rate": 0.003,
+      "loss": 4.0069,
+      "step": 22806
+    },
+    {
+      "epoch": 0.22807,
+      "grad_norm": 1.2107982523033802,
+      "learning_rate": 0.003,
+      "loss": 4.0015,
+      "step": 22807
+    },
+    {
+      "epoch": 0.22808,
+      "grad_norm": 1.285641234778508,
+      "learning_rate": 0.003,
+      "loss": 4.0261,
+      "step": 22808
+    },
+    {
+      "epoch": 0.22809,
+      "grad_norm": 1.255289328207901,
+      "learning_rate": 0.003,
+      "loss": 4.0282,
+      "step": 22809
+    },
+    {
+      "epoch": 0.2281,
+      "grad_norm": 1.2053008370013256,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 22810
+    },
+    {
+      "epoch": 0.22811,
+      "grad_norm": 1.3908372041860104,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 22811
+    },
+    {
+      "epoch": 0.22812,
+      "grad_norm": 1.1187132580175196,
+      "learning_rate": 0.003,
+      "loss": 4.0056,
+      "step": 22812
+    },
+    {
+      "epoch": 0.22813,
+      "grad_norm": 1.5075767585649071,
+      "learning_rate": 0.003,
+      "loss": 3.9896,
+      "step": 22813
+    },
+    {
+      "epoch": 0.22814,
+      "grad_norm": 1.021010131919982,
+      "learning_rate": 0.003,
+      "loss": 3.9852,
+      "step": 22814
+    },
+    {
+      "epoch": 0.22815,
+      "grad_norm": 1.7595364513170932,
+      "learning_rate": 0.003,
+      "loss": 4.026,
+      "step": 22815
+    },
+    {
+      "epoch": 0.22816,
+      "grad_norm": 0.94600182204242,
+      "learning_rate": 0.003,
+      "loss": 4.0211,
+      "step": 22816
+    },
+    {
+      "epoch": 0.22817,
+      "grad_norm": 1.473202665684118,
+      "learning_rate": 0.003,
+      "loss": 3.9847,
+      "step": 22817
+    },
+    {
+      "epoch": 0.22818,
+      "grad_norm": 1.5491610834041847,
+      "learning_rate": 0.003,
+      "loss": 4.0036,
+      "step": 22818
+    },
+    {
+      "epoch": 0.22819,
+      "grad_norm": 1.1369931042909434,
+      "learning_rate": 0.003,
+      "loss": 3.9975,
+      "step": 22819
+    },
+    {
+      "epoch": 0.2282,
+      "grad_norm": 1.582713658728168,
+      "learning_rate": 0.003,
+      "loss": 4.008,
+      "step": 22820
+    },
+    {
+      "epoch": 0.22821,
+      "grad_norm": 0.9019998499723918,
+      "learning_rate": 0.003,
+      "loss": 3.9906,
+      "step": 22821
+    },
+    {
+      "epoch": 0.22822,
+      "grad_norm": 1.3137835488140233,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 22822
+    },
+    {
+      "epoch": 0.22823,
+      "grad_norm": 1.3355205858405856,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 22823
+    },
+    {
+      "epoch": 0.22824,
+      "grad_norm": 1.1745092654778806,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 22824
+    },
+    {
+      "epoch": 0.22825,
+      "grad_norm": 1.30069204852341,
+      "learning_rate": 0.003,
+      "loss": 3.9901,
+      "step": 22825
+    },
+    {
+      "epoch": 0.22826,
+      "grad_norm": 1.166789266483824,
+      "learning_rate": 0.003,
+      "loss": 4.0084,
+      "step": 22826
+    },
+    {
+      "epoch": 0.22827,
+      "grad_norm": 1.5331511217922593,
+      "learning_rate": 0.003,
+      "loss": 4.0357,
+      "step": 22827
+    },
+    {
+      "epoch": 0.22828,
+      "grad_norm": 0.8837673616290268,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 22828
+    },
+    {
+      "epoch": 0.22829,
+      "grad_norm": 1.3951058014975326,
+      "learning_rate": 0.003,
+      "loss": 3.9876,
+      "step": 22829
+    },
+    {
+      "epoch": 0.2283,
+      "grad_norm": 1.3141551435894296,
+      "learning_rate": 0.003,
+      "loss": 4.0116,
+      "step": 22830
+    },
+    {
+      "epoch": 0.22831,
+      "grad_norm": 1.3179459675015366,
+      "learning_rate": 0.003,
+      "loss": 3.998,
+      "step": 22831
+    },
+    {
+      "epoch": 0.22832,
+      "grad_norm": 1.41403340431125,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 22832
+    },
+    {
+      "epoch": 0.22833,
+      "grad_norm": 1.0613079166796122,
+      "learning_rate": 0.003,
+      "loss": 4.0063,
+      "step": 22833
+    },
+    {
+      "epoch": 0.22834,
+      "grad_norm": 1.1729904958564557,
+      "learning_rate": 0.003,
+      "loss": 4.0285,
+      "step": 22834
+    },
+    {
+      "epoch": 0.22835,
+      "grad_norm": 1.3704998471808278,
+      "learning_rate": 0.003,
+      "loss": 3.973,
+      "step": 22835
+    },
+    {
+      "epoch": 0.22836,
+      "grad_norm": 1.0685683183547479,
+      "learning_rate": 0.003,
+      "loss": 4.0353,
+      "step": 22836
+    },
+    {
+      "epoch": 0.22837,
+      "grad_norm": 1.6361079070392925,
+      "learning_rate": 0.003,
+      "loss": 4.0284,
+      "step": 22837
+    },
+    {
+      "epoch": 0.22838,
+      "grad_norm": 0.9916557090948457,
+      "learning_rate": 0.003,
+      "loss": 4.0508,
+      "step": 22838
+    },
+    {
+      "epoch": 0.22839,
+      "grad_norm": 1.4974741565573721,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 22839
+    },
+    {
+      "epoch": 0.2284,
+      "grad_norm": 1.0943633085811661,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 22840
+    },
+    {
+      "epoch": 0.22841,
+      "grad_norm": 1.2696797465771614,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 22841
+    },
+    {
+      "epoch": 0.22842,
+      "grad_norm": 1.1399021557114701,
+      "learning_rate": 0.003,
+      "loss": 3.9822,
+      "step": 22842
+    },
+    {
+      "epoch": 0.22843,
+      "grad_norm": 1.0560671345647084,
+      "learning_rate": 0.003,
+      "loss": 4.0109,
+      "step": 22843
+    },
+    {
+      "epoch": 0.22844,
+      "grad_norm": 1.2234565265101744,
+      "learning_rate": 0.003,
+      "loss": 3.9885,
+      "step": 22844
+    },
+    {
+      "epoch": 0.22845,
+      "grad_norm": 1.3501042925854136,
+      "learning_rate": 0.003,
+      "loss": 3.981,
+      "step": 22845
+    },
+    {
+      "epoch": 0.22846,
+      "grad_norm": 1.389607450387172,
+      "learning_rate": 0.003,
+      "loss": 3.9998,
+      "step": 22846
+    },
+    {
+      "epoch": 0.22847,
+      "grad_norm": 1.317723722756291,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 22847
+    },
+    {
+      "epoch": 0.22848,
+      "grad_norm": 1.4116310747151186,
+      "learning_rate": 0.003,
+      "loss": 4.0403,
+      "step": 22848
+    },
+    {
+      "epoch": 0.22849,
+      "grad_norm": 1.2659497099628325,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 22849
+    },
+    {
+      "epoch": 0.2285,
+      "grad_norm": 1.1979647864507643,
+      "learning_rate": 0.003,
+      "loss": 4.0027,
+      "step": 22850
+    },
+    {
+      "epoch": 0.22851,
+      "grad_norm": 1.3454571824942103,
+      "learning_rate": 0.003,
+      "loss": 3.9951,
+      "step": 22851
+    },
+    {
+      "epoch": 0.22852,
+      "grad_norm": 1.1241110583115308,
+      "learning_rate": 0.003,
+      "loss": 3.9734,
+      "step": 22852
+    },
+    {
+      "epoch": 0.22853,
+      "grad_norm": 1.4249823606450511,
+      "learning_rate": 0.003,
+      "loss": 4.0098,
+      "step": 22853
+    },
+    {
+      "epoch": 0.22854,
+      "grad_norm": 1.2361611991863812,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 22854
+    },
+    {
+      "epoch": 0.22855,
+      "grad_norm": 1.4165650620078218,
+      "learning_rate": 0.003,
+      "loss": 4.0367,
+      "step": 22855
+    },
+    {
+      "epoch": 0.22856,
+      "grad_norm": 1.2624221121043417,
+      "learning_rate": 0.003,
+      "loss": 4.0391,
+      "step": 22856
+    },
+    {
+      "epoch": 0.22857,
+      "grad_norm": 1.4732298970746118,
+      "learning_rate": 0.003,
+      "loss": 3.9869,
+      "step": 22857
+    },
+    {
+      "epoch": 0.22858,
+      "grad_norm": 1.3395552573869922,
+      "learning_rate": 0.003,
+      "loss": 4.0057,
+      "step": 22858
+    },
+    {
+      "epoch": 0.22859,
+      "grad_norm": 1.3208447554594442,
+      "learning_rate": 0.003,
+      "loss": 4.0011,
+      "step": 22859
+    },
+    {
+      "epoch": 0.2286,
+      "grad_norm": 1.344329462289818,
+      "learning_rate": 0.003,
+      "loss": 4.0332,
+      "step": 22860
+    },
+    {
+      "epoch": 0.22861,
+      "grad_norm": 1.2962470130507053,
+      "learning_rate": 0.003,
+      "loss": 3.9856,
+      "step": 22861
+    },
+    {
+      "epoch": 0.22862,
+      "grad_norm": 1.0949085274573522,
+      "learning_rate": 0.003,
+      "loss": 4.0228,
+      "step": 22862
+    },
+    {
+      "epoch": 0.22863,
+      "grad_norm": 1.3300177469928192,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 22863
+    },
+    {
+      "epoch": 0.22864,
+      "grad_norm": 1.3578589249110868,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 22864
+    },
+    {
+      "epoch": 0.22865,
+      "grad_norm": 1.1987202877881173,
+      "learning_rate": 0.003,
+      "loss": 3.9853,
+      "step": 22865
+    },
+    {
+      "epoch": 0.22866,
+      "grad_norm": 1.410898337901932,
+      "learning_rate": 0.003,
+      "loss": 4.0003,
+      "step": 22866
+    },
+    {
+      "epoch": 0.22867,
+      "grad_norm": 1.031504534328135,
+      "learning_rate": 0.003,
+      "loss": 3.9813,
+      "step": 22867
+    },
+    {
+      "epoch": 0.22868,
+      "grad_norm": 1.4841855258400767,
+      "learning_rate": 0.003,
+      "loss": 4.0425,
+      "step": 22868
+    },
+    {
+      "epoch": 0.22869,
+      "grad_norm": 1.3799669634832519,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 22869
+    },
+    {
+      "epoch": 0.2287,
+      "grad_norm": 1.4476413555535221,
+      "learning_rate": 0.003,
+      "loss": 4.005,
+      "step": 22870
+    },
+    {
+      "epoch": 0.22871,
+      "grad_norm": 1.3090909314127972,
+      "learning_rate": 0.003,
+      "loss": 3.9801,
+      "step": 22871
+    },
+    {
+      "epoch": 0.22872,
+      "grad_norm": 1.290808610032038,
+      "learning_rate": 0.003,
+      "loss": 3.9782,
+      "step": 22872
+    },
+    {
+      "epoch": 0.22873,
+      "grad_norm": 1.3226407073968094,
+      "learning_rate": 0.003,
+      "loss": 4.0135,
+      "step": 22873
+    },
+    {
+      "epoch": 0.22874,
+      "grad_norm": 1.139736109575469,
+      "learning_rate": 0.003,
+      "loss": 3.9987,
+      "step": 22874
+    },
+    {
+      "epoch": 0.22875,
+      "grad_norm": 1.255673620519553,
+      "learning_rate": 0.003,
+      "loss": 3.9678,
+      "step": 22875
+    },
+    {
+      "epoch": 0.22876,
+      "grad_norm": 1.2970898294237163,
+      "learning_rate": 0.003,
+      "loss": 3.969,
+      "step": 22876
+    },
+    {
+      "epoch": 0.22877,
+      "grad_norm": 1.3235384362764837,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 22877
+    },
+    {
+      "epoch": 0.22878,
+      "grad_norm": 1.1838965063678764,
+      "learning_rate": 0.003,
+      "loss": 4.0111,
+      "step": 22878
+    },
+    {
+      "epoch": 0.22879,
+      "grad_norm": 1.3443917224670625,
+      "learning_rate": 0.003,
+      "loss": 3.9825,
+      "step": 22879
+    },
+    {
+      "epoch": 0.2288,
+      "grad_norm": 1.1651856068023982,
+      "learning_rate": 0.003,
+      "loss": 3.9951,
+      "step": 22880
+    },
+    {
+      "epoch": 0.22881,
+      "grad_norm": 1.5518064030667784,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 22881
+    },
+    {
+      "epoch": 0.22882,
+      "grad_norm": 1.067594910460725,
+      "learning_rate": 0.003,
+      "loss": 4.0012,
+      "step": 22882
+    },
+    {
+      "epoch": 0.22883,
+      "grad_norm": 1.2557755210771828,
+      "learning_rate": 0.003,
+      "loss": 3.9772,
+      "step": 22883
+    },
+    {
+      "epoch": 0.22884,
+      "grad_norm": 1.1076929091018506,
+      "learning_rate": 0.003,
+      "loss": 3.985,
+      "step": 22884
+    },
+    {
+      "epoch": 0.22885,
+      "grad_norm": 1.2638300689860762,
+      "learning_rate": 0.003,
+      "loss": 3.9969,
+      "step": 22885
+    },
+    {
+      "epoch": 0.22886,
+      "grad_norm": 1.3992691032596258,
+      "learning_rate": 0.003,
+      "loss": 4.0263,
+      "step": 22886
+    },
+    {
+      "epoch": 0.22887,
+      "grad_norm": 1.3726045099022606,
+      "learning_rate": 0.003,
+      "loss": 3.9998,
+      "step": 22887
+    },
+    {
+      "epoch": 0.22888,
+      "grad_norm": 1.0701488442322062,
+      "learning_rate": 0.003,
+      "loss": 3.9698,
+      "step": 22888
+    },
+    {
+      "epoch": 0.22889,
+      "grad_norm": 1.3851299840286484,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 22889
+    },
+    {
+      "epoch": 0.2289,
+      "grad_norm": 1.295117216707045,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 22890
+    },
+    {
+      "epoch": 0.22891,
+      "grad_norm": 1.332459571357105,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 22891
+    },
+    {
+      "epoch": 0.22892,
+      "grad_norm": 1.2007319907063327,
+      "learning_rate": 0.003,
+      "loss": 3.9731,
+      "step": 22892
+    },
+    {
+      "epoch": 0.22893,
+      "grad_norm": 1.1826756123994586,
+      "learning_rate": 0.003,
+      "loss": 3.9937,
+      "step": 22893
+    },
+    {
+      "epoch": 0.22894,
+      "grad_norm": 1.0678605193904154,
+      "learning_rate": 0.003,
+      "loss": 4.0296,
+      "step": 22894
+    },
+    {
+      "epoch": 0.22895,
+      "grad_norm": 1.3450510196592982,
+      "learning_rate": 0.003,
+      "loss": 4.0029,
+      "step": 22895
+    },
+    {
+      "epoch": 0.22896,
+      "grad_norm": 1.1926262274039583,
+      "learning_rate": 0.003,
+      "loss": 4.0096,
+      "step": 22896
+    },
+    {
+      "epoch": 0.22897,
+      "grad_norm": 1.305571030255802,
+      "learning_rate": 0.003,
+      "loss": 3.9693,
+      "step": 22897
+    },
+    {
+      "epoch": 0.22898,
+      "grad_norm": 1.3108533277300807,
+      "learning_rate": 0.003,
+      "loss": 3.9896,
+      "step": 22898
+    },
+    {
+      "epoch": 0.22899,
+      "grad_norm": 1.2735918978038039,
+      "learning_rate": 0.003,
+      "loss": 4.0064,
+      "step": 22899
+    },
+    {
+      "epoch": 0.229,
+      "grad_norm": 1.0807280457880712,
+      "learning_rate": 0.003,
+      "loss": 3.9808,
+      "step": 22900
+    },
+    {
+      "epoch": 0.22901,
+      "grad_norm": 1.2709077519337413,
+      "learning_rate": 0.003,
+      "loss": 3.9873,
+      "step": 22901
+    },
+    {
+      "epoch": 0.22902,
+      "grad_norm": 1.229348935716512,
+      "learning_rate": 0.003,
+      "loss": 4.0043,
+      "step": 22902
+    },
+    {
+      "epoch": 0.22903,
+      "grad_norm": 1.2385626777892655,
+      "learning_rate": 0.003,
+      "loss": 4.0298,
+      "step": 22903
+    },
+    {
+      "epoch": 0.22904,
+      "grad_norm": 1.1283864596983078,
+      "learning_rate": 0.003,
+      "loss": 3.965,
+      "step": 22904
+    },
+    {
+      "epoch": 0.22905,
+      "grad_norm": 1.7608466727173815,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 22905
+    },
+    {
+      "epoch": 0.22906,
+      "grad_norm": 1.089156786008958,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 22906
+    },
+    {
+      "epoch": 0.22907,
+      "grad_norm": 1.6459305022070918,
+      "learning_rate": 0.003,
+      "loss": 4.0228,
+      "step": 22907
+    },
+    {
+      "epoch": 0.22908,
+      "grad_norm": 1.2018303017334457,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 22908
+    },
+    {
+      "epoch": 0.22909,
+      "grad_norm": 1.5568250286710077,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 22909
+    },
+    {
+      "epoch": 0.2291,
+      "grad_norm": 1.348772578484843,
+      "learning_rate": 0.003,
+      "loss": 4.0112,
+      "step": 22910
+    },
+    {
+      "epoch": 0.22911,
+      "grad_norm": 1.3879857527401456,
+      "learning_rate": 0.003,
+      "loss": 3.9999,
+      "step": 22911
+    },
+    {
+      "epoch": 0.22912,
+      "grad_norm": 1.1929037856087004,
+      "learning_rate": 0.003,
+      "loss": 4.0335,
+      "step": 22912
+    },
+    {
+      "epoch": 0.22913,
+      "grad_norm": 1.1651459667906923,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 22913
+    },
+    {
+      "epoch": 0.22914,
+      "grad_norm": 1.1677524878896797,
+      "learning_rate": 0.003,
+      "loss": 4.0242,
+      "step": 22914
+    },
+    {
+      "epoch": 0.22915,
+      "grad_norm": 1.2876402074051705,
+      "learning_rate": 0.003,
+      "loss": 4.0112,
+      "step": 22915
+    },
+    {
+      "epoch": 0.22916,
+      "grad_norm": 1.3507795719137674,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 22916
+    },
+    {
+      "epoch": 0.22917,
+      "grad_norm": 1.2118417938436439,
+      "learning_rate": 0.003,
+      "loss": 3.9891,
+      "step": 22917
+    },
+    {
+      "epoch": 0.22918,
+      "grad_norm": 1.573368322886035,
+      "learning_rate": 0.003,
+      "loss": 4.032,
+      "step": 22918
+    },
+    {
+      "epoch": 0.22919,
+      "grad_norm": 1.1083239927707969,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 22919
+    },
+    {
+      "epoch": 0.2292,
+      "grad_norm": 1.2729776985931964,
+      "learning_rate": 0.003,
+      "loss": 4.0327,
+      "step": 22920
+    },
+    {
+      "epoch": 0.22921,
+      "grad_norm": 1.1904049113502344,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 22921
+    },
+    {
+      "epoch": 0.22922,
+      "grad_norm": 1.3814480367936026,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 22922
+    },
+    {
+      "epoch": 0.22923,
+      "grad_norm": 1.1820099133400894,
+      "learning_rate": 0.003,
+      "loss": 4.0039,
+      "step": 22923
+    },
+    {
+      "epoch": 0.22924,
+      "grad_norm": 1.485259267540033,
+      "learning_rate": 0.003,
+      "loss": 3.9964,
+      "step": 22924
+    },
+    {
+      "epoch": 0.22925,
+      "grad_norm": 1.0014227474820436,
+      "learning_rate": 0.003,
+      "loss": 3.9998,
+      "step": 22925
+    },
+    {
+      "epoch": 0.22926,
+      "grad_norm": 1.6475135248890667,
+      "learning_rate": 0.003,
+      "loss": 4.0422,
+      "step": 22926
+    },
+    {
+      "epoch": 0.22927,
+      "grad_norm": 0.9218377200346827,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 22927
+    },
+    {
+      "epoch": 0.22928,
+      "grad_norm": 1.2693826967744082,
+      "learning_rate": 0.003,
+      "loss": 4.0011,
+      "step": 22928
+    },
+    {
+      "epoch": 0.22929,
+      "grad_norm": 1.3186363689510407,
+      "learning_rate": 0.003,
+      "loss": 4.0275,
+      "step": 22929
+    },
+    {
+      "epoch": 0.2293,
+      "grad_norm": 1.457143929674499,
+      "learning_rate": 0.003,
+      "loss": 3.987,
+      "step": 22930
+    },
+    {
+      "epoch": 0.22931,
+      "grad_norm": 1.3008452051974975,
+      "learning_rate": 0.003,
+      "loss": 4.016,
+      "step": 22931
+    },
+    {
+      "epoch": 0.22932,
+      "grad_norm": 1.449027443211409,
+      "learning_rate": 0.003,
+      "loss": 3.9759,
+      "step": 22932
+    },
+    {
+      "epoch": 0.22933,
+      "grad_norm": 1.204639228207519,
+      "learning_rate": 0.003,
+      "loss": 4.0228,
+      "step": 22933
+    },
+    {
+      "epoch": 0.22934,
+      "grad_norm": 1.3688305548558848,
+      "learning_rate": 0.003,
+      "loss": 3.9998,
+      "step": 22934
+    },
+    {
+      "epoch": 0.22935,
+      "grad_norm": 1.261229535869728,
+      "learning_rate": 0.003,
+      "loss": 3.9706,
+      "step": 22935
+    },
+    {
+      "epoch": 0.22936,
+      "grad_norm": 1.1825831790726533,
+      "learning_rate": 0.003,
+      "loss": 4.0007,
+      "step": 22936
+    },
+    {
+      "epoch": 0.22937,
+      "grad_norm": 1.3169836649156967,
+      "learning_rate": 0.003,
+      "loss": 3.9971,
+      "step": 22937
+    },
+    {
+      "epoch": 0.22938,
+      "grad_norm": 1.3762773321085373,
+      "learning_rate": 0.003,
+      "loss": 4.002,
+      "step": 22938
+    },
+    {
+      "epoch": 0.22939,
+      "grad_norm": 1.308050086113338,
+      "learning_rate": 0.003,
+      "loss": 4.0001,
+      "step": 22939
+    },
+    {
+      "epoch": 0.2294,
+      "grad_norm": 1.2113331808039436,
+      "learning_rate": 0.003,
+      "loss": 3.9894,
+      "step": 22940
+    },
+    {
+      "epoch": 0.22941,
+      "grad_norm": 1.1659494889690727,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 22941
+    },
+    {
+      "epoch": 0.22942,
+      "grad_norm": 1.298820278352854,
+      "learning_rate": 0.003,
+      "loss": 3.964,
+      "step": 22942
+    },
+    {
+      "epoch": 0.22943,
+      "grad_norm": 1.2679729052849942,
+      "learning_rate": 0.003,
+      "loss": 3.9814,
+      "step": 22943
+    },
+    {
+      "epoch": 0.22944,
+      "grad_norm": 1.4568311930004374,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 22944
+    },
+    {
+      "epoch": 0.22945,
+      "grad_norm": 1.3217142400097326,
+      "learning_rate": 0.003,
+      "loss": 4.0004,
+      "step": 22945
+    },
+    {
+      "epoch": 0.22946,
+      "grad_norm": 1.4198696039174268,
+      "learning_rate": 0.003,
+      "loss": 3.9923,
+      "step": 22946
+    },
+    {
+      "epoch": 0.22947,
+      "grad_norm": 1.4346316347244223,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 22947
+    },
+    {
+      "epoch": 0.22948,
+      "grad_norm": 1.278990237132488,
+      "learning_rate": 0.003,
+      "loss": 4.034,
+      "step": 22948
+    },
+    {
+      "epoch": 0.22949,
+      "grad_norm": 1.1720761761139131,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 22949
+    },
+    {
+      "epoch": 0.2295,
+      "grad_norm": 1.395529801521344,
+      "learning_rate": 0.003,
+      "loss": 4.0078,
+      "step": 22950
+    },
+    {
+      "epoch": 0.22951,
+      "grad_norm": 1.1270967860569157,
+      "learning_rate": 0.003,
+      "loss": 4.0085,
+      "step": 22951
+    },
+    {
+      "epoch": 0.22952,
+      "grad_norm": 1.3190851196448452,
+      "learning_rate": 0.003,
+      "loss": 4.0313,
+      "step": 22952
+    },
+    {
+      "epoch": 0.22953,
+      "grad_norm": 1.0030462323668292,
+      "learning_rate": 0.003,
+      "loss": 3.9763,
+      "step": 22953
+    },
+    {
+      "epoch": 0.22954,
+      "grad_norm": 1.3397699933909168,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 22954
+    },
+    {
+      "epoch": 0.22955,
+      "grad_norm": 1.0111819141108243,
+      "learning_rate": 0.003,
+      "loss": 3.9993,
+      "step": 22955
+    },
+    {
+      "epoch": 0.22956,
+      "grad_norm": 1.418696053642676,
+      "learning_rate": 0.003,
+      "loss": 4.0088,
+      "step": 22956
+    },
+    {
+      "epoch": 0.22957,
+      "grad_norm": 1.076479587259374,
+      "learning_rate": 0.003,
+      "loss": 4.0051,
+      "step": 22957
+    },
+    {
+      "epoch": 0.22958,
+      "grad_norm": 1.4877672744241417,
+      "learning_rate": 0.003,
+      "loss": 4.0154,
+      "step": 22958
+    },
+    {
+      "epoch": 0.22959,
+      "grad_norm": 1.2192911862573945,
+      "learning_rate": 0.003,
+      "loss": 4.0024,
+      "step": 22959
+    },
+    {
+      "epoch": 0.2296,
+      "grad_norm": 1.2585460407509144,
+      "learning_rate": 0.003,
+      "loss": 3.947,
+      "step": 22960
+    },
+    {
+      "epoch": 0.22961,
+      "grad_norm": 1.4390063519275438,
+      "learning_rate": 0.003,
+      "loss": 4.0058,
+      "step": 22961
+    },
+    {
+      "epoch": 0.22962,
+      "grad_norm": 1.1746081100871908,
+      "learning_rate": 0.003,
+      "loss": 4.0169,
+      "step": 22962
+    },
+    {
+      "epoch": 0.22963,
+      "grad_norm": 1.1531452669053521,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 22963
+    },
+    {
+      "epoch": 0.22964,
+      "grad_norm": 1.4245282754541115,
+      "learning_rate": 0.003,
+      "loss": 3.9946,
+      "step": 22964
+    },
+    {
+      "epoch": 0.22965,
+      "grad_norm": 1.2259710704275566,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 22965
+    },
+    {
+      "epoch": 0.22966,
+      "grad_norm": 1.3200216581068887,
+      "learning_rate": 0.003,
+      "loss": 3.988,
+      "step": 22966
+    },
+    {
+      "epoch": 0.22967,
+      "grad_norm": 1.1608949261278827,
+      "learning_rate": 0.003,
+      "loss": 4.031,
+      "step": 22967
+    },
+    {
+      "epoch": 0.22968,
+      "grad_norm": 1.403277657237164,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 22968
+    },
+    {
+      "epoch": 0.22969,
+      "grad_norm": 1.2985422808440694,
+      "learning_rate": 0.003,
+      "loss": 4.0314,
+      "step": 22969
+    },
+    {
+      "epoch": 0.2297,
+      "grad_norm": 1.4432063398143462,
+      "learning_rate": 0.003,
+      "loss": 3.9975,
+      "step": 22970
+    },
+    {
+      "epoch": 0.22971,
+      "grad_norm": 1.1428641773997934,
+      "learning_rate": 0.003,
+      "loss": 4.0308,
+      "step": 22971
+    },
+    {
+      "epoch": 0.22972,
+      "grad_norm": 1.672495233043559,
+      "learning_rate": 0.003,
+      "loss": 4.0212,
+      "step": 22972
+    },
+    {
+      "epoch": 0.22973,
+      "grad_norm": 1.247073990337511,
+      "learning_rate": 0.003,
+      "loss": 3.9949,
+      "step": 22973
+    },
+    {
+      "epoch": 0.22974,
+      "grad_norm": 1.3983095006568693,
+      "learning_rate": 0.003,
+      "loss": 4.0237,
+      "step": 22974
+    },
+    {
+      "epoch": 0.22975,
+      "grad_norm": 1.2805376486243776,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 22975
+    },
+    {
+      "epoch": 0.22976,
+      "grad_norm": 1.4026578922464308,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 22976
+    },
+    {
+      "epoch": 0.22977,
+      "grad_norm": 1.0788189347713157,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 22977
+    },
+    {
+      "epoch": 0.22978,
+      "grad_norm": 1.2043014278781752,
+      "learning_rate": 0.003,
+      "loss": 4.0003,
+      "step": 22978
+    },
+    {
+      "epoch": 0.22979,
+      "grad_norm": 1.2061909228210865,
+      "learning_rate": 0.003,
+      "loss": 4.0068,
+      "step": 22979
+    },
+    {
+      "epoch": 0.2298,
+      "grad_norm": 1.2845772482740414,
+      "learning_rate": 0.003,
+      "loss": 4.0181,
+      "step": 22980
+    },
+    {
+      "epoch": 0.22981,
+      "grad_norm": 1.2480651236283458,
+      "learning_rate": 0.003,
+      "loss": 4.0316,
+      "step": 22981
+    },
+    {
+      "epoch": 0.22982,
+      "grad_norm": 1.3340756567334484,
+      "learning_rate": 0.003,
+      "loss": 4.004,
+      "step": 22982
+    },
+    {
+      "epoch": 0.22983,
+      "grad_norm": 1.3867994140456184,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 22983
+    },
+    {
+      "epoch": 0.22984,
+      "grad_norm": 1.2628417960796448,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 22984
+    },
+    {
+      "epoch": 0.22985,
+      "grad_norm": 1.2235420712826732,
+      "learning_rate": 0.003,
+      "loss": 4.0031,
+      "step": 22985
+    },
+    {
+      "epoch": 0.22986,
+      "grad_norm": 1.1418839950611503,
+      "learning_rate": 0.003,
+      "loss": 3.9801,
+      "step": 22986
+    },
+    {
+      "epoch": 0.22987,
+      "grad_norm": 1.4223954779882904,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 22987
+    },
+    {
+      "epoch": 0.22988,
+      "grad_norm": 1.356503880066828,
+      "learning_rate": 0.003,
+      "loss": 4.0001,
+      "step": 22988
+    },
+    {
+      "epoch": 0.22989,
+      "grad_norm": 1.5311373493801153,
+      "learning_rate": 0.003,
+      "loss": 3.9905,
+      "step": 22989
+    },
+    {
+      "epoch": 0.2299,
+      "grad_norm": 0.9682603300710592,
+      "learning_rate": 0.003,
+      "loss": 3.9994,
+      "step": 22990
+    },
+    {
+      "epoch": 0.22991,
+      "grad_norm": 1.3261767563580835,
+      "learning_rate": 0.003,
+      "loss": 4.0283,
+      "step": 22991
+    },
+    {
+      "epoch": 0.22992,
+      "grad_norm": 1.2240537974171242,
+      "learning_rate": 0.003,
+      "loss": 3.9927,
+      "step": 22992
+    },
+    {
+      "epoch": 0.22993,
+      "grad_norm": 1.2198894892006664,
+      "learning_rate": 0.003,
+      "loss": 4.0421,
+      "step": 22993
+    },
+    {
+      "epoch": 0.22994,
+      "grad_norm": 1.379204191847762,
+      "learning_rate": 0.003,
+      "loss": 4.0339,
+      "step": 22994
+    },
+    {
+      "epoch": 0.22995,
+      "grad_norm": 1.15422855208069,
+      "learning_rate": 0.003,
+      "loss": 4.0105,
+      "step": 22995
+    },
+    {
+      "epoch": 0.22996,
+      "grad_norm": 1.4965811646148512,
+      "learning_rate": 0.003,
+      "loss": 4.0128,
+      "step": 22996
+    },
+    {
+      "epoch": 0.22997,
+      "grad_norm": 1.2110027415508002,
+      "learning_rate": 0.003,
+      "loss": 3.9706,
+      "step": 22997
+    },
+    {
+      "epoch": 0.22998,
+      "grad_norm": 1.4149365378973238,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 22998
+    },
+    {
+      "epoch": 0.22999,
+      "grad_norm": 1.4210564222053095,
+      "learning_rate": 0.003,
+      "loss": 3.9842,
+      "step": 22999
+    },
+    {
+      "epoch": 0.23,
+      "grad_norm": 1.450735099400525,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 23000
+    },
+    {
+      "epoch": 0.23001,
+      "grad_norm": 1.1864087666655128,
+      "learning_rate": 0.003,
+      "loss": 4.0218,
+      "step": 23001
+    },
+    {
+      "epoch": 0.23002,
+      "grad_norm": 1.2546454002870988,
+      "learning_rate": 0.003,
+      "loss": 4.0008,
+      "step": 23002
+    },
+    {
+      "epoch": 0.23003,
+      "grad_norm": 1.3293096371101252,
+      "learning_rate": 0.003,
+      "loss": 3.9916,
+      "step": 23003
+    },
+    {
+      "epoch": 0.23004,
+      "grad_norm": 1.2627846029009142,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 23004
+    },
+    {
+      "epoch": 0.23005,
+      "grad_norm": 1.1571498502713646,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 23005
+    },
+    {
+      "epoch": 0.23006,
+      "grad_norm": 1.4436253605521503,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 23006
+    },
+    {
+      "epoch": 0.23007,
+      "grad_norm": 1.2443703006266817,
+      "learning_rate": 0.003,
+      "loss": 3.9994,
+      "step": 23007
+    },
+    {
+      "epoch": 0.23008,
+      "grad_norm": 1.4995915572057086,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 23008
+    },
+    {
+      "epoch": 0.23009,
+      "grad_norm": 0.9782119237645696,
+      "learning_rate": 0.003,
+      "loss": 4.0007,
+      "step": 23009
+    },
+    {
+      "epoch": 0.2301,
+      "grad_norm": 1.6199076561857437,
+      "learning_rate": 0.003,
+      "loss": 4.0058,
+      "step": 23010
+    },
+    {
+      "epoch": 0.23011,
+      "grad_norm": 1.1080193282752258,
+      "learning_rate": 0.003,
+      "loss": 4.0075,
+      "step": 23011
+    },
+    {
+      "epoch": 0.23012,
+      "grad_norm": 1.613409706261043,
+      "learning_rate": 0.003,
+      "loss": 3.9984,
+      "step": 23012
+    },
+    {
+      "epoch": 0.23013,
+      "grad_norm": 0.9947234508633239,
+      "learning_rate": 0.003,
+      "loss": 4.012,
+      "step": 23013
+    },
+    {
+      "epoch": 0.23014,
+      "grad_norm": 1.2306614406750271,
+      "learning_rate": 0.003,
+      "loss": 3.9886,
+      "step": 23014
+    },
+    {
+      "epoch": 0.23015,
+      "grad_norm": 1.2076729308157366,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 23015
+    },
+    {
+      "epoch": 0.23016,
+      "grad_norm": 1.190682846061786,
+      "learning_rate": 0.003,
+      "loss": 3.9916,
+      "step": 23016
+    },
+    {
+      "epoch": 0.23017,
+      "grad_norm": 1.3808328335545539,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 23017
+    },
+    {
+      "epoch": 0.23018,
+      "grad_norm": 1.3317786206898687,
+      "learning_rate": 0.003,
+      "loss": 3.9851,
+      "step": 23018
+    },
+    {
+      "epoch": 0.23019,
+      "grad_norm": 1.268126119173935,
+      "learning_rate": 0.003,
+      "loss": 3.9824,
+      "step": 23019
+    },
+    {
+      "epoch": 0.2302,
+      "grad_norm": 1.4026696390391504,
+      "learning_rate": 0.003,
+      "loss": 4.0184,
+      "step": 23020
+    },
+    {
+      "epoch": 0.23021,
+      "grad_norm": 1.1562008299841549,
+      "learning_rate": 0.003,
+      "loss": 3.9851,
+      "step": 23021
+    },
+    {
+      "epoch": 0.23022,
+      "grad_norm": 1.4549086751744846,
+      "learning_rate": 0.003,
+      "loss": 3.9952,
+      "step": 23022
+    },
+    {
+      "epoch": 0.23023,
+      "grad_norm": 1.162614650567156,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 23023
+    },
+    {
+      "epoch": 0.23024,
+      "grad_norm": 1.437941716438167,
+      "learning_rate": 0.003,
+      "loss": 3.9765,
+      "step": 23024
+    },
+    {
+      "epoch": 0.23025,
+      "grad_norm": 1.2735828693743627,
+      "learning_rate": 0.003,
+      "loss": 3.9958,
+      "step": 23025
+    },
+    {
+      "epoch": 0.23026,
+      "grad_norm": 1.2408918953522998,
+      "learning_rate": 0.003,
+      "loss": 4.0107,
+      "step": 23026
+    },
+    {
+      "epoch": 0.23027,
+      "grad_norm": 1.194659197392445,
+      "learning_rate": 0.003,
+      "loss": 4.0046,
+      "step": 23027
+    },
+    {
+      "epoch": 0.23028,
+      "grad_norm": 1.3781928265111623,
+      "learning_rate": 0.003,
+      "loss": 4.0082,
+      "step": 23028
+    },
+    {
+      "epoch": 0.23029,
+      "grad_norm": 1.1119072531425376,
+      "learning_rate": 0.003,
+      "loss": 3.9746,
+      "step": 23029
+    },
+    {
+      "epoch": 0.2303,
+      "grad_norm": 1.3222890596320902,
+      "learning_rate": 0.003,
+      "loss": 3.9891,
+      "step": 23030
+    },
+    {
+      "epoch": 0.23031,
+      "grad_norm": 1.1817980667688874,
+      "learning_rate": 0.003,
+      "loss": 3.9963,
+      "step": 23031
+    },
+    {
+      "epoch": 0.23032,
+      "grad_norm": 1.618828552113822,
+      "learning_rate": 0.003,
+      "loss": 4.0103,
+      "step": 23032
+    },
+    {
+      "epoch": 0.23033,
+      "grad_norm": 0.9223978627329792,
+      "learning_rate": 0.003,
+      "loss": 3.9973,
+      "step": 23033
+    },
+    {
+      "epoch": 0.23034,
+      "grad_norm": 1.102606015387404,
+      "learning_rate": 0.003,
+      "loss": 3.9991,
+      "step": 23034
+    },
+    {
+      "epoch": 0.23035,
+      "grad_norm": 1.3349389284810345,
+      "learning_rate": 0.003,
+      "loss": 4.0181,
+      "step": 23035
+    },
+    {
+      "epoch": 0.23036,
+      "grad_norm": 1.3818115310616315,
+      "learning_rate": 0.003,
+      "loss": 3.9842,
+      "step": 23036
+    },
+    {
+      "epoch": 0.23037,
+      "grad_norm": 1.3434385229889416,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 23037
+    },
+    {
+      "epoch": 0.23038,
+      "grad_norm": 1.2731657868911725,
+      "learning_rate": 0.003,
+      "loss": 4.0056,
+      "step": 23038
+    },
+    {
+      "epoch": 0.23039,
+      "grad_norm": 1.2225154149601183,
+      "learning_rate": 0.003,
+      "loss": 3.9813,
+      "step": 23039
+    },
+    {
+      "epoch": 0.2304,
+      "grad_norm": 1.2191270563972854,
+      "learning_rate": 0.003,
+      "loss": 4.0278,
+      "step": 23040
+    },
+    {
+      "epoch": 0.23041,
+      "grad_norm": 1.2717429730268999,
+      "learning_rate": 0.003,
+      "loss": 4.0254,
+      "step": 23041
+    },
+    {
+      "epoch": 0.23042,
+      "grad_norm": 1.3547141886282537,
+      "learning_rate": 0.003,
+      "loss": 3.9579,
+      "step": 23042
+    },
+    {
+      "epoch": 0.23043,
+      "grad_norm": 1.176574341716608,
+      "learning_rate": 0.003,
+      "loss": 3.9746,
+      "step": 23043
+    },
+    {
+      "epoch": 0.23044,
+      "grad_norm": 1.102532847015589,
+      "learning_rate": 0.003,
+      "loss": 3.9787,
+      "step": 23044
+    },
+    {
+      "epoch": 0.23045,
+      "grad_norm": 1.3267603396461047,
+      "learning_rate": 0.003,
+      "loss": 3.9985,
+      "step": 23045
+    },
+    {
+      "epoch": 0.23046,
+      "grad_norm": 1.2854588017202238,
+      "learning_rate": 0.003,
+      "loss": 4.001,
+      "step": 23046
+    },
+    {
+      "epoch": 0.23047,
+      "grad_norm": 1.2932561438902452,
+      "learning_rate": 0.003,
+      "loss": 4.0037,
+      "step": 23047
+    },
+    {
+      "epoch": 0.23048,
+      "grad_norm": 1.306124887314224,
+      "learning_rate": 0.003,
+      "loss": 3.9832,
+      "step": 23048
+    },
+    {
+      "epoch": 0.23049,
+      "grad_norm": 1.4040126560900135,
+      "learning_rate": 0.003,
+      "loss": 3.9963,
+      "step": 23049
+    },
+    {
+      "epoch": 0.2305,
+      "grad_norm": 1.2044870277686868,
+      "learning_rate": 0.003,
+      "loss": 3.9875,
+      "step": 23050
+    },
+    {
+      "epoch": 0.23051,
+      "grad_norm": 1.1663334759760853,
+      "learning_rate": 0.003,
+      "loss": 3.9945,
+      "step": 23051
+    },
+    {
+      "epoch": 0.23052,
+      "grad_norm": 1.1616507838790464,
+      "learning_rate": 0.003,
+      "loss": 4.0093,
+      "step": 23052
+    },
+    {
+      "epoch": 0.23053,
+      "grad_norm": 1.2339103942540934,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 23053
+    },
+    {
+      "epoch": 0.23054,
+      "grad_norm": 1.2805689374617113,
+      "learning_rate": 0.003,
+      "loss": 4.0116,
+      "step": 23054
+    },
+    {
+      "epoch": 0.23055,
+      "grad_norm": 1.2888652283176338,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 23055
+    },
+    {
+      "epoch": 0.23056,
+      "grad_norm": 1.172117759920887,
+      "learning_rate": 0.003,
+      "loss": 3.9809,
+      "step": 23056
+    },
+    {
+      "epoch": 0.23057,
+      "grad_norm": 1.300250171331353,
+      "learning_rate": 0.003,
+      "loss": 4.0305,
+      "step": 23057
+    },
+    {
+      "epoch": 0.23058,
+      "grad_norm": 1.1438711864536497,
+      "learning_rate": 0.003,
+      "loss": 3.9652,
+      "step": 23058
+    },
+    {
+      "epoch": 0.23059,
+      "grad_norm": 1.2677612811632857,
+      "learning_rate": 0.003,
+      "loss": 3.9751,
+      "step": 23059
+    },
+    {
+      "epoch": 0.2306,
+      "grad_norm": 1.2901775473784989,
+      "learning_rate": 0.003,
+      "loss": 4.0036,
+      "step": 23060
+    },
+    {
+      "epoch": 0.23061,
+      "grad_norm": 1.2741955350799052,
+      "learning_rate": 0.003,
+      "loss": 4.0374,
+      "step": 23061
+    },
+    {
+      "epoch": 0.23062,
+      "grad_norm": 1.4740406525357705,
+      "learning_rate": 0.003,
+      "loss": 3.9786,
+      "step": 23062
+    },
+    {
+      "epoch": 0.23063,
+      "grad_norm": 1.4908251669255153,
+      "learning_rate": 0.003,
+      "loss": 4.0224,
+      "step": 23063
+    },
+    {
+      "epoch": 0.23064,
+      "grad_norm": 1.1035146291497928,
+      "learning_rate": 0.003,
+      "loss": 4.0006,
+      "step": 23064
+    },
+    {
+      "epoch": 0.23065,
+      "grad_norm": 1.3137142576006435,
+      "learning_rate": 0.003,
+      "loss": 3.9992,
+      "step": 23065
+    },
+    {
+      "epoch": 0.23066,
+      "grad_norm": 1.358453762142197,
+      "learning_rate": 0.003,
+      "loss": 4.0077,
+      "step": 23066
+    },
+    {
+      "epoch": 0.23067,
+      "grad_norm": 1.212858394576925,
+      "learning_rate": 0.003,
+      "loss": 3.9913,
+      "step": 23067
+    },
+    {
+      "epoch": 0.23068,
+      "grad_norm": 1.2323602838086956,
+      "learning_rate": 0.003,
+      "loss": 3.9814,
+      "step": 23068
+    },
+    {
+      "epoch": 0.23069,
+      "grad_norm": 1.314132354083005,
+      "learning_rate": 0.003,
+      "loss": 4.0065,
+      "step": 23069
+    },
+    {
+      "epoch": 0.2307,
+      "grad_norm": 1.299617119269189,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 23070
+    },
+    {
+      "epoch": 0.23071,
+      "grad_norm": 1.3071423747786404,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 23071
+    },
+    {
+      "epoch": 0.23072,
+      "grad_norm": 1.1836531520448512,
+      "learning_rate": 0.003,
+      "loss": 3.9886,
+      "step": 23072
+    },
+    {
+      "epoch": 0.23073,
+      "grad_norm": 1.345668669790869,
+      "learning_rate": 0.003,
+      "loss": 4.0035,
+      "step": 23073
+    },
+    {
+      "epoch": 0.23074,
+      "grad_norm": 1.192875494128662,
+      "learning_rate": 0.003,
+      "loss": 3.9808,
+      "step": 23074
+    },
+    {
+      "epoch": 0.23075,
+      "grad_norm": 1.296694569174857,
+      "learning_rate": 0.003,
+      "loss": 4.0259,
+      "step": 23075
+    },
+    {
+      "epoch": 0.23076,
+      "grad_norm": 1.3647481135349497,
+      "learning_rate": 0.003,
+      "loss": 4.0211,
+      "step": 23076
+    },
+    {
+      "epoch": 0.23077,
+      "grad_norm": 1.2453868109963857,
+      "learning_rate": 0.003,
+      "loss": 3.997,
+      "step": 23077
+    },
+    {
+      "epoch": 0.23078,
+      "grad_norm": 1.278690086116135,
+      "learning_rate": 0.003,
+      "loss": 4.0033,
+      "step": 23078
+    },
+    {
+      "epoch": 0.23079,
+      "grad_norm": 1.3838496999240222,
+      "learning_rate": 0.003,
+      "loss": 3.9711,
+      "step": 23079
+    },
+    {
+      "epoch": 0.2308,
+      "grad_norm": 1.3709076059104395,
+      "learning_rate": 0.003,
+      "loss": 4.0251,
+      "step": 23080
+    },
+    {
+      "epoch": 0.23081,
+      "grad_norm": 1.4727674533381288,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 23081
+    },
+    {
+      "epoch": 0.23082,
+      "grad_norm": 1.1462501360335402,
+      "learning_rate": 0.003,
+      "loss": 3.9867,
+      "step": 23082
+    },
+    {
+      "epoch": 0.23083,
+      "grad_norm": 1.2316742332631416,
+      "learning_rate": 0.003,
+      "loss": 3.9944,
+      "step": 23083
+    },
+    {
+      "epoch": 0.23084,
+      "grad_norm": 1.203541108409638,
+      "learning_rate": 0.003,
+      "loss": 4.0072,
+      "step": 23084
+    },
+    {
+      "epoch": 0.23085,
+      "grad_norm": 1.2404573981906961,
+      "learning_rate": 0.003,
+      "loss": 3.9734,
+      "step": 23085
+    },
+    {
+      "epoch": 0.23086,
+      "grad_norm": 1.2406301905615507,
+      "learning_rate": 0.003,
+      "loss": 3.9904,
+      "step": 23086
+    },
+    {
+      "epoch": 0.23087,
+      "grad_norm": 1.350519511434891,
+      "learning_rate": 0.003,
+      "loss": 3.9953,
+      "step": 23087
+    },
+    {
+      "epoch": 0.23088,
+      "grad_norm": 1.2519572702777204,
+      "learning_rate": 0.003,
+      "loss": 4.0036,
+      "step": 23088
+    },
+    {
+      "epoch": 0.23089,
+      "grad_norm": 1.1879901626729206,
+      "learning_rate": 0.003,
+      "loss": 3.9735,
+      "step": 23089
+    },
+    {
+      "epoch": 0.2309,
+      "grad_norm": 1.5353127005892957,
+      "learning_rate": 0.003,
+      "loss": 3.9876,
+      "step": 23090
+    },
+    {
+      "epoch": 0.23091,
+      "grad_norm": 1.2189103142799262,
+      "learning_rate": 0.003,
+      "loss": 4.022,
+      "step": 23091
+    },
+    {
+      "epoch": 0.23092,
+      "grad_norm": 1.3299399415657613,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 23092
+    },
+    {
+      "epoch": 0.23093,
+      "grad_norm": 1.1532443414443996,
+      "learning_rate": 0.003,
+      "loss": 4.0146,
+      "step": 23093
+    },
+    {
+      "epoch": 0.23094,
+      "grad_norm": 1.2112010312718047,
+      "learning_rate": 0.003,
+      "loss": 3.9974,
+      "step": 23094
+    },
+    {
+      "epoch": 0.23095,
+      "grad_norm": 1.4145504175009473,
+      "learning_rate": 0.003,
+      "loss": 3.9994,
+      "step": 23095
+    },
+    {
+      "epoch": 0.23096,
+      "grad_norm": 1.4224665752944214,
+      "learning_rate": 0.003,
+      "loss": 3.9984,
+      "step": 23096
+    },
+    {
+      "epoch": 0.23097,
+      "grad_norm": 1.290862143824134,
+      "learning_rate": 0.003,
+      "loss": 3.9911,
+      "step": 23097
+    },
+    {
+      "epoch": 0.23098,
+      "grad_norm": 1.6944761089121299,
+      "learning_rate": 0.003,
+      "loss": 3.9944,
+      "step": 23098
+    },
+    {
+      "epoch": 0.23099,
+      "grad_norm": 0.9482243296408102,
+      "learning_rate": 0.003,
+      "loss": 4.0056,
+      "step": 23099
+    },
+    {
+      "epoch": 0.231,
+      "grad_norm": 1.343014994976686,
+      "learning_rate": 0.003,
+      "loss": 4.0169,
+      "step": 23100
+    },
+    {
+      "epoch": 0.23101,
+      "grad_norm": 1.1792737410390521,
+      "learning_rate": 0.003,
+      "loss": 4.0187,
+      "step": 23101
+    },
+    {
+      "epoch": 0.23102,
+      "grad_norm": 1.2792944499559704,
+      "learning_rate": 0.003,
+      "loss": 3.9995,
+      "step": 23102
+    },
+    {
+      "epoch": 0.23103,
+      "grad_norm": 1.3535222588645606,
+      "learning_rate": 0.003,
+      "loss": 4.0111,
+      "step": 23103
+    },
+    {
+      "epoch": 0.23104,
+      "grad_norm": 1.408898680392564,
+      "learning_rate": 0.003,
+      "loss": 4.0084,
+      "step": 23104
+    },
+    {
+      "epoch": 0.23105,
+      "grad_norm": 1.069988546513794,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 23105
+    },
+    {
+      "epoch": 0.23106,
+      "grad_norm": 1.3248523767164555,
+      "learning_rate": 0.003,
+      "loss": 4.0013,
+      "step": 23106
+    },
+    {
+      "epoch": 0.23107,
+      "grad_norm": 1.294542894049446,
+      "learning_rate": 0.003,
+      "loss": 4.0207,
+      "step": 23107
+    },
+    {
+      "epoch": 0.23108,
+      "grad_norm": 1.222309387265079,
+      "learning_rate": 0.003,
+      "loss": 3.955,
+      "step": 23108
+    },
+    {
+      "epoch": 0.23109,
+      "grad_norm": 1.1732635726511949,
+      "learning_rate": 0.003,
+      "loss": 3.9948,
+      "step": 23109
+    },
+    {
+      "epoch": 0.2311,
+      "grad_norm": 1.397429694971451,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 23110
+    },
+    {
+      "epoch": 0.23111,
+      "grad_norm": 1.0776690964269424,
+      "learning_rate": 0.003,
+      "loss": 3.985,
+      "step": 23111
+    },
+    {
+      "epoch": 0.23112,
+      "grad_norm": 1.587126834329404,
+      "learning_rate": 0.003,
+      "loss": 4.0474,
+      "step": 23112
+    },
+    {
+      "epoch": 0.23113,
+      "grad_norm": 1.0987751407000188,
+      "learning_rate": 0.003,
+      "loss": 3.9933,
+      "step": 23113
+    },
+    {
+      "epoch": 0.23114,
+      "grad_norm": 1.2612556981201075,
+      "learning_rate": 0.003,
+      "loss": 3.9813,
+      "step": 23114
+    },
+    {
+      "epoch": 0.23115,
+      "grad_norm": 1.2361589044982908,
+      "learning_rate": 0.003,
+      "loss": 4.0278,
+      "step": 23115
+    },
+    {
+      "epoch": 0.23116,
+      "grad_norm": 1.3009802195319138,
+      "learning_rate": 0.003,
+      "loss": 3.9866,
+      "step": 23116
+    },
+    {
+      "epoch": 0.23117,
+      "grad_norm": 1.1083193937727676,
+      "learning_rate": 0.003,
+      "loss": 4.0132,
+      "step": 23117
+    },
+    {
+      "epoch": 0.23118,
+      "grad_norm": 1.3992907872914593,
+      "learning_rate": 0.003,
+      "loss": 3.9962,
+      "step": 23118
+    },
+    {
+      "epoch": 0.23119,
+      "grad_norm": 1.0556974700785156,
+      "learning_rate": 0.003,
+      "loss": 4.0177,
+      "step": 23119
+    },
+    {
+      "epoch": 0.2312,
+      "grad_norm": 1.6258176302815621,
+      "learning_rate": 0.003,
+      "loss": 4.0279,
+      "step": 23120
+    },
+    {
+      "epoch": 0.23121,
+      "grad_norm": 1.0170109378169692,
+      "learning_rate": 0.003,
+      "loss": 3.9933,
+      "step": 23121
+    },
+    {
+      "epoch": 0.23122,
+      "grad_norm": 1.3647450658126354,
+      "learning_rate": 0.003,
+      "loss": 4.0079,
+      "step": 23122
+    },
+    {
+      "epoch": 0.23123,
+      "grad_norm": 1.313305715060407,
+      "learning_rate": 0.003,
+      "loss": 4.0163,
+      "step": 23123
+    },
+    {
+      "epoch": 0.23124,
+      "grad_norm": 1.6301783372725656,
+      "learning_rate": 0.003,
+      "loss": 4.015,
+      "step": 23124
+    },
+    {
+      "epoch": 0.23125,
+      "grad_norm": 1.2300651232664603,
+      "learning_rate": 0.003,
+      "loss": 4.0273,
+      "step": 23125
+    },
+    {
+      "epoch": 0.23126,
+      "grad_norm": 1.1898363634589282,
+      "learning_rate": 0.003,
+      "loss": 4.0278,
+      "step": 23126
+    },
+    {
+      "epoch": 0.23127,
+      "grad_norm": 1.3180536590736793,
+      "learning_rate": 0.003,
+      "loss": 4.0002,
+      "step": 23127
+    },
+    {
+      "epoch": 0.23128,
+      "grad_norm": 1.1209573452722297,
+      "learning_rate": 0.003,
+      "loss": 3.98,
+      "step": 23128
+    },
+    {
+      "epoch": 0.23129,
+      "grad_norm": 1.2259430163762792,
+      "learning_rate": 0.003,
+      "loss": 3.9965,
+      "step": 23129
+    },
+    {
+      "epoch": 0.2313,
+      "grad_norm": 1.500318000843982,
+      "learning_rate": 0.003,
+      "loss": 3.9992,
+      "step": 23130
+    },
+    {
+      "epoch": 0.23131,
+      "grad_norm": 1.2311152584055411,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 23131
+    },
+    {
+      "epoch": 0.23132,
+      "grad_norm": 1.3226571391861166,
+      "learning_rate": 0.003,
+      "loss": 3.993,
+      "step": 23132
+    },
+    {
+      "epoch": 0.23133,
+      "grad_norm": 1.2637931016525525,
+      "learning_rate": 0.003,
+      "loss": 4.0029,
+      "step": 23133
+    },
+    {
+      "epoch": 0.23134,
+      "grad_norm": 1.4464313692139898,
+      "learning_rate": 0.003,
+      "loss": 4.0,
+      "step": 23134
+    },
+    {
+      "epoch": 0.23135,
+      "grad_norm": 1.0946950009797,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 23135
+    },
+    {
+      "epoch": 0.23136,
+      "grad_norm": 1.3127300380501867,
+      "learning_rate": 0.003,
+      "loss": 3.973,
+      "step": 23136
+    },
+    {
+      "epoch": 0.23137,
+      "grad_norm": 1.0591850218627143,
+      "learning_rate": 0.003,
+      "loss": 3.9827,
+      "step": 23137
+    },
+    {
+      "epoch": 0.23138,
+      "grad_norm": 1.1957119120490916,
+      "learning_rate": 0.003,
+      "loss": 3.9731,
+      "step": 23138
+    },
+    {
+      "epoch": 0.23139,
+      "grad_norm": 1.1280612572628501,
+      "learning_rate": 0.003,
+      "loss": 4.034,
+      "step": 23139
+    },
+    {
+      "epoch": 0.2314,
+      "grad_norm": 1.5059360515857598,
+      "learning_rate": 0.003,
+      "loss": 3.9853,
+      "step": 23140
+    },
+    {
+      "epoch": 0.23141,
+      "grad_norm": 1.1432317280495057,
+      "learning_rate": 0.003,
+      "loss": 3.9969,
+      "step": 23141
+    },
+    {
+      "epoch": 0.23142,
+      "grad_norm": 1.4574586825987108,
+      "learning_rate": 0.003,
+      "loss": 3.9956,
+      "step": 23142
+    },
+    {
+      "epoch": 0.23143,
+      "grad_norm": 1.2094917946396397,
+      "learning_rate": 0.003,
+      "loss": 3.9701,
+      "step": 23143
+    },
+    {
+      "epoch": 0.23144,
+      "grad_norm": 1.3704885955540425,
+      "learning_rate": 0.003,
+      "loss": 3.9915,
+      "step": 23144
+    },
+    {
+      "epoch": 0.23145,
+      "grad_norm": 0.9369156098239874,
+      "learning_rate": 0.003,
+      "loss": 3.9804,
+      "step": 23145
+    },
+    {
+      "epoch": 0.23146,
+      "grad_norm": 1.4346466111603497,
+      "learning_rate": 0.003,
+      "loss": 4.0403,
+      "step": 23146
+    },
+    {
+      "epoch": 0.23147,
+      "grad_norm": 1.2133795601721404,
+      "learning_rate": 0.003,
+      "loss": 4.0187,
+      "step": 23147
+    },
+    {
+      "epoch": 0.23148,
+      "grad_norm": 1.600406047574792,
+      "learning_rate": 0.003,
+      "loss": 4.0519,
+      "step": 23148
+    },
+    {
+      "epoch": 0.23149,
+      "grad_norm": 1.061528486488992,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 23149
+    },
+    {
+      "epoch": 0.2315,
+      "grad_norm": 1.3641262681711535,
+      "learning_rate": 0.003,
+      "loss": 4.0132,
+      "step": 23150
+    },
+    {
+      "epoch": 0.23151,
+      "grad_norm": 1.289496809982985,
+      "learning_rate": 0.003,
+      "loss": 4.0084,
+      "step": 23151
+    },
+    {
+      "epoch": 0.23152,
+      "grad_norm": 1.1705485061495786,
+      "learning_rate": 0.003,
+      "loss": 3.9809,
+      "step": 23152
+    },
+    {
+      "epoch": 0.23153,
+      "grad_norm": 1.1151742491838759,
+      "learning_rate": 0.003,
+      "loss": 4.0366,
+      "step": 23153
+    },
+    {
+      "epoch": 0.23154,
+      "grad_norm": 1.3081106571521177,
+      "learning_rate": 0.003,
+      "loss": 4.0041,
+      "step": 23154
+    },
+    {
+      "epoch": 0.23155,
+      "grad_norm": 1.29322083005088,
+      "learning_rate": 0.003,
+      "loss": 3.9819,
+      "step": 23155
+    },
+    {
+      "epoch": 0.23156,
+      "grad_norm": 1.4133404883462242,
+      "learning_rate": 0.003,
+      "loss": 3.988,
+      "step": 23156
+    },
+    {
+      "epoch": 0.23157,
+      "grad_norm": 1.2145791477186805,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 23157
+    },
+    {
+      "epoch": 0.23158,
+      "grad_norm": 1.4339465444603625,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 23158
+    },
+    {
+      "epoch": 0.23159,
+      "grad_norm": 1.041818361740654,
+      "learning_rate": 0.003,
+      "loss": 3.9965,
+      "step": 23159
+    },
+    {
+      "epoch": 0.2316,
+      "grad_norm": 1.4274959218485759,
+      "learning_rate": 0.003,
+      "loss": 4.0049,
+      "step": 23160
+    },
+    {
+      "epoch": 0.23161,
+      "grad_norm": 1.157430631077555,
+      "learning_rate": 0.003,
+      "loss": 3.9825,
+      "step": 23161
+    },
+    {
+      "epoch": 0.23162,
+      "grad_norm": 1.4939249749539993,
+      "learning_rate": 0.003,
+      "loss": 4.0001,
+      "step": 23162
+    },
+    {
+      "epoch": 0.23163,
+      "grad_norm": 1.181093806251131,
+      "learning_rate": 0.003,
+      "loss": 4.0333,
+      "step": 23163
+    },
+    {
+      "epoch": 0.23164,
+      "grad_norm": 1.3998133032527118,
+      "learning_rate": 0.003,
+      "loss": 4.0304,
+      "step": 23164
+    },
+    {
+      "epoch": 0.23165,
+      "grad_norm": 1.2346632568558984,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 23165
+    },
+    {
+      "epoch": 0.23166,
+      "grad_norm": 1.2346125750211845,
+      "learning_rate": 0.003,
+      "loss": 4.0002,
+      "step": 23166
+    },
+    {
+      "epoch": 0.23167,
+      "grad_norm": 1.5372341178651312,
+      "learning_rate": 0.003,
+      "loss": 4.0048,
+      "step": 23167
+    },
+    {
+      "epoch": 0.23168,
+      "grad_norm": 1.1487506141196626,
+      "learning_rate": 0.003,
+      "loss": 3.9718,
+      "step": 23168
+    },
+    {
+      "epoch": 0.23169,
+      "grad_norm": 1.18358730262482,
+      "learning_rate": 0.003,
+      "loss": 3.9598,
+      "step": 23169
+    },
+    {
+      "epoch": 0.2317,
+      "grad_norm": 1.1148379147179075,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 23170
+    },
+    {
+      "epoch": 0.23171,
+      "grad_norm": 1.3446053111982117,
+      "learning_rate": 0.003,
+      "loss": 4.0115,
+      "step": 23171
+    },
+    {
+      "epoch": 0.23172,
+      "grad_norm": 1.307297752056523,
+      "learning_rate": 0.003,
+      "loss": 4.0008,
+      "step": 23172
+    },
+    {
+      "epoch": 0.23173,
+      "grad_norm": 1.354283048693125,
+      "learning_rate": 0.003,
+      "loss": 3.9954,
+      "step": 23173
+    },
+    {
+      "epoch": 0.23174,
+      "grad_norm": 1.1738757598255483,
+      "learning_rate": 0.003,
+      "loss": 3.9917,
+      "step": 23174
+    },
+    {
+      "epoch": 0.23175,
+      "grad_norm": 1.2069606135143631,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 23175
+    },
+    {
+      "epoch": 0.23176,
+      "grad_norm": 1.2564726897970866,
+      "learning_rate": 0.003,
+      "loss": 3.9846,
+      "step": 23176
+    },
+    {
+      "epoch": 0.23177,
+      "grad_norm": 1.2097429077483923,
+      "learning_rate": 0.003,
+      "loss": 4.0005,
+      "step": 23177
+    },
+    {
+      "epoch": 0.23178,
+      "grad_norm": 1.245136085572146,
+      "learning_rate": 0.003,
+      "loss": 3.9878,
+      "step": 23178
+    },
+    {
+      "epoch": 0.23179,
+      "grad_norm": 1.315434913833972,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 23179
+    },
+    {
+      "epoch": 0.2318,
+      "grad_norm": 1.2320726096819925,
+      "learning_rate": 0.003,
+      "loss": 4.0113,
+      "step": 23180
+    },
+    {
+      "epoch": 0.23181,
+      "grad_norm": 1.388499144616512,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 23181
+    },
+    {
+      "epoch": 0.23182,
+      "grad_norm": 1.2104977199485498,
+      "learning_rate": 0.003,
+      "loss": 3.9966,
+      "step": 23182
+    },
+    {
+      "epoch": 0.23183,
+      "grad_norm": 1.2766016564086722,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 23183
+    },
+    {
+      "epoch": 0.23184,
+      "grad_norm": 1.3178030740214193,
+      "learning_rate": 0.003,
+      "loss": 4.0058,
+      "step": 23184
+    },
+    {
+      "epoch": 0.23185,
+      "grad_norm": 1.1749403171503205,
+      "learning_rate": 0.003,
+      "loss": 4.0198,
+      "step": 23185
+    },
+    {
+      "epoch": 0.23186,
+      "grad_norm": 1.4760437533808894,
+      "learning_rate": 0.003,
+      "loss": 3.9826,
+      "step": 23186
+    },
+    {
+      "epoch": 0.23187,
+      "grad_norm": 1.2234664980650152,
+      "learning_rate": 0.003,
+      "loss": 4.0051,
+      "step": 23187
+    },
+    {
+      "epoch": 0.23188,
+      "grad_norm": 1.2243670497430506,
+      "learning_rate": 0.003,
+      "loss": 4.0422,
+      "step": 23188
+    },
+    {
+      "epoch": 0.23189,
+      "grad_norm": 1.4569476835581894,
+      "learning_rate": 0.003,
+      "loss": 3.9861,
+      "step": 23189
+    },
+    {
+      "epoch": 0.2319,
+      "grad_norm": 1.035509642068983,
+      "learning_rate": 0.003,
+      "loss": 3.9751,
+      "step": 23190
+    },
+    {
+      "epoch": 0.23191,
+      "grad_norm": 1.3461015106509648,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 23191
+    },
+    {
+      "epoch": 0.23192,
+      "grad_norm": 1.144902906835363,
+      "learning_rate": 0.003,
+      "loss": 4.0196,
+      "step": 23192
+    },
+    {
+      "epoch": 0.23193,
+      "grad_norm": 1.3770860024238696,
+      "learning_rate": 0.003,
+      "loss": 4.0051,
+      "step": 23193
+    },
+    {
+      "epoch": 0.23194,
+      "grad_norm": 1.2520154668644974,
+      "learning_rate": 0.003,
+      "loss": 3.9646,
+      "step": 23194
+    },
+    {
+      "epoch": 0.23195,
+      "grad_norm": 1.4197629932596654,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 23195
+    },
+    {
+      "epoch": 0.23196,
+      "grad_norm": 1.3556402225466022,
+      "learning_rate": 0.003,
+      "loss": 4.0301,
+      "step": 23196
+    },
+    {
+      "epoch": 0.23197,
+      "grad_norm": 1.089252233933237,
+      "learning_rate": 0.003,
+      "loss": 4.0014,
+      "step": 23197
+    },
+    {
+      "epoch": 0.23198,
+      "grad_norm": 1.3126871623273861,
+      "learning_rate": 0.003,
+      "loss": 3.9859,
+      "step": 23198
+    },
+    {
+      "epoch": 0.23199,
+      "grad_norm": 1.0964016541736261,
+      "learning_rate": 0.003,
+      "loss": 4.0089,
+      "step": 23199
+    },
+    {
+      "epoch": 0.232,
+      "grad_norm": 1.394088429475266,
+      "learning_rate": 0.003,
+      "loss": 4.0077,
+      "step": 23200
+    },
+    {
+      "epoch": 0.23201,
+      "grad_norm": 1.2267298960205018,
+      "learning_rate": 0.003,
+      "loss": 4.0031,
+      "step": 23201
+    },
+    {
+      "epoch": 0.23202,
+      "grad_norm": 1.2949117594334543,
+      "learning_rate": 0.003,
+      "loss": 4.0085,
+      "step": 23202
+    },
+    {
+      "epoch": 0.23203,
+      "grad_norm": 1.4378978252915577,
+      "learning_rate": 0.003,
+      "loss": 4.0182,
+      "step": 23203
+    },
+    {
+      "epoch": 0.23204,
+      "grad_norm": 1.1985172417855132,
+      "learning_rate": 0.003,
+      "loss": 3.9907,
+      "step": 23204
+    },
+    {
+      "epoch": 0.23205,
+      "grad_norm": 1.470777855319753,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 23205
+    },
+    {
+      "epoch": 0.23206,
+      "grad_norm": 1.1731962904847624,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 23206
+    },
+    {
+      "epoch": 0.23207,
+      "grad_norm": 1.21476360023474,
+      "learning_rate": 0.003,
+      "loss": 4.0228,
+      "step": 23207
+    },
+    {
+      "epoch": 0.23208,
+      "grad_norm": 1.3000678263209975,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 23208
+    },
+    {
+      "epoch": 0.23209,
+      "grad_norm": 1.2352052891314182,
+      "learning_rate": 0.003,
+      "loss": 3.9818,
+      "step": 23209
+    },
+    {
+      "epoch": 0.2321,
+      "grad_norm": 1.4411476265385104,
+      "learning_rate": 0.003,
+      "loss": 4.0122,
+      "step": 23210
+    },
+    {
+      "epoch": 0.23211,
+      "grad_norm": 1.0731418143451206,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 23211
+    },
+    {
+      "epoch": 0.23212,
+      "grad_norm": 1.2839961966999311,
+      "learning_rate": 0.003,
+      "loss": 3.9979,
+      "step": 23212
+    },
+    {
+      "epoch": 0.23213,
+      "grad_norm": 1.3036290272938849,
+      "learning_rate": 0.003,
+      "loss": 4.0338,
+      "step": 23213
+    },
+    {
+      "epoch": 0.23214,
+      "grad_norm": 1.3393311592115893,
+      "learning_rate": 0.003,
+      "loss": 3.9785,
+      "step": 23214
+    },
+    {
+      "epoch": 0.23215,
+      "grad_norm": 1.0655389596901916,
+      "learning_rate": 0.003,
+      "loss": 3.9932,
+      "step": 23215
+    },
+    {
+      "epoch": 0.23216,
+      "grad_norm": 1.473801749320015,
+      "learning_rate": 0.003,
+      "loss": 3.9828,
+      "step": 23216
+    },
+    {
+      "epoch": 0.23217,
+      "grad_norm": 1.1963618586321279,
+      "learning_rate": 0.003,
+      "loss": 4.0085,
+      "step": 23217
+    },
+    {
+      "epoch": 0.23218,
+      "grad_norm": 1.2245537888601499,
+      "learning_rate": 0.003,
+      "loss": 3.9891,
+      "step": 23218
+    },
+    {
+      "epoch": 0.23219,
+      "grad_norm": 1.3900331123417675,
+      "learning_rate": 0.003,
+      "loss": 4.0122,
+      "step": 23219
+    },
+    {
+      "epoch": 0.2322,
+      "grad_norm": 1.3448607103517125,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 23220
+    },
+    {
+      "epoch": 0.23221,
+      "grad_norm": 1.4927939666311927,
+      "learning_rate": 0.003,
+      "loss": 3.9958,
+      "step": 23221
+    },
+    {
+      "epoch": 0.23222,
+      "grad_norm": 1.237926273736713,
+      "learning_rate": 0.003,
+      "loss": 3.981,
+      "step": 23222
+    },
+    {
+      "epoch": 0.23223,
+      "grad_norm": 1.135826847300623,
+      "learning_rate": 0.003,
+      "loss": 3.9947,
+      "step": 23223
+    },
+    {
+      "epoch": 0.23224,
+      "grad_norm": 1.3061386648020525,
+      "learning_rate": 0.003,
+      "loss": 4.0007,
+      "step": 23224
+    },
+    {
+      "epoch": 0.23225,
+      "grad_norm": 1.402216882456486,
+      "learning_rate": 0.003,
+      "loss": 4.019,
+      "step": 23225
+    },
+    {
+      "epoch": 0.23226,
+      "grad_norm": 1.4907985802681607,
+      "learning_rate": 0.003,
+      "loss": 3.9868,
+      "step": 23226
+    },
+    {
+      "epoch": 0.23227,
+      "grad_norm": 1.1722581146923272,
+      "learning_rate": 0.003,
+      "loss": 4.0099,
+      "step": 23227
+    },
+    {
+      "epoch": 0.23228,
+      "grad_norm": 1.3446874796343076,
+      "learning_rate": 0.003,
+      "loss": 4.0215,
+      "step": 23228
+    },
+    {
+      "epoch": 0.23229,
+      "grad_norm": 1.3319207907492157,
+      "learning_rate": 0.003,
+      "loss": 3.9924,
+      "step": 23229
+    },
+    {
+      "epoch": 0.2323,
+      "grad_norm": 1.1811499871143765,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 23230
+    },
+    {
+      "epoch": 0.23231,
+      "grad_norm": 1.2781766819946132,
+      "learning_rate": 0.003,
+      "loss": 3.9745,
+      "step": 23231
+    },
+    {
+      "epoch": 0.23232,
+      "grad_norm": 1.2035926893322342,
+      "learning_rate": 0.003,
+      "loss": 4.0112,
+      "step": 23232
+    },
+    {
+      "epoch": 0.23233,
+      "grad_norm": 1.3807459558836188,
+      "learning_rate": 0.003,
+      "loss": 4.022,
+      "step": 23233
+    },
+    {
+      "epoch": 0.23234,
+      "grad_norm": 1.3372735467191996,
+      "learning_rate": 0.003,
+      "loss": 4.006,
+      "step": 23234
+    },
+    {
+      "epoch": 0.23235,
+      "grad_norm": 1.4254573569495295,
+      "learning_rate": 0.003,
+      "loss": 3.9881,
+      "step": 23235
+    },
+    {
+      "epoch": 0.23236,
+      "grad_norm": 1.1720901841275848,
+      "learning_rate": 0.003,
+      "loss": 3.9719,
+      "step": 23236
+    },
+    {
+      "epoch": 0.23237,
+      "grad_norm": 1.2123360004839725,
+      "learning_rate": 0.003,
+      "loss": 3.9962,
+      "step": 23237
+    },
+    {
+      "epoch": 0.23238,
+      "grad_norm": 1.3119444768475967,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 23238
+    },
+    {
+      "epoch": 0.23239,
+      "grad_norm": 1.0779072065512623,
+      "learning_rate": 0.003,
+      "loss": 4.0336,
+      "step": 23239
+    },
+    {
+      "epoch": 0.2324,
+      "grad_norm": 1.4238430345947974,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 23240
+    },
+    {
+      "epoch": 0.23241,
+      "grad_norm": 1.3341693708454467,
+      "learning_rate": 0.003,
+      "loss": 4.0056,
+      "step": 23241
+    },
+    {
+      "epoch": 0.23242,
+      "grad_norm": 1.0168011507100219,
+      "learning_rate": 0.003,
+      "loss": 4.0051,
+      "step": 23242
+    },
+    {
+      "epoch": 0.23243,
+      "grad_norm": 1.5074088932716563,
+      "learning_rate": 0.003,
+      "loss": 4.0262,
+      "step": 23243
+    },
+    {
+      "epoch": 0.23244,
+      "grad_norm": 1.233032908682079,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 23244
+    },
+    {
+      "epoch": 0.23245,
+      "grad_norm": 1.4272312282762945,
+      "learning_rate": 0.003,
+      "loss": 4.0052,
+      "step": 23245
+    },
+    {
+      "epoch": 0.23246,
+      "grad_norm": 1.0972765472206414,
+      "learning_rate": 0.003,
+      "loss": 4.0223,
+      "step": 23246
+    },
+    {
+      "epoch": 0.23247,
+      "grad_norm": 1.3676047672494858,
+      "learning_rate": 0.003,
+      "loss": 4.0035,
+      "step": 23247
+    },
+    {
+      "epoch": 0.23248,
+      "grad_norm": 1.2917273795303863,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 23248
+    },
+    {
+      "epoch": 0.23249,
+      "grad_norm": 1.2985485909023766,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 23249
+    },
+    {
+      "epoch": 0.2325,
+      "grad_norm": 1.3643052327155802,
+      "learning_rate": 0.003,
+      "loss": 4.0435,
+      "step": 23250
+    },
+    {
+      "epoch": 0.23251,
+      "grad_norm": 1.311722414233303,
+      "learning_rate": 0.003,
+      "loss": 4.0104,
+      "step": 23251
+    },
+    {
+      "epoch": 0.23252,
+      "grad_norm": 1.242562223917677,
+      "learning_rate": 0.003,
+      "loss": 3.9994,
+      "step": 23252
+    },
+    {
+      "epoch": 0.23253,
+      "grad_norm": 1.2832985471810352,
+      "learning_rate": 0.003,
+      "loss": 3.9935,
+      "step": 23253
+    },
+    {
+      "epoch": 0.23254,
+      "grad_norm": 1.2518623999827168,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 23254
+    },
+    {
+      "epoch": 0.23255,
+      "grad_norm": 1.1862661915608708,
+      "learning_rate": 0.003,
+      "loss": 3.9993,
+      "step": 23255
+    },
+    {
+      "epoch": 0.23256,
+      "grad_norm": 1.2555210123060103,
+      "learning_rate": 0.003,
+      "loss": 3.9891,
+      "step": 23256
+    },
+    {
+      "epoch": 0.23257,
+      "grad_norm": 1.2602722146288003,
+      "learning_rate": 0.003,
+      "loss": 3.9876,
+      "step": 23257
+    },
+    {
+      "epoch": 0.23258,
+      "grad_norm": 1.1449647519781267,
+      "learning_rate": 0.003,
+      "loss": 3.9555,
+      "step": 23258
+    },
+    {
+      "epoch": 0.23259,
+      "grad_norm": 1.4780112842322748,
+      "learning_rate": 0.003,
+      "loss": 3.9927,
+      "step": 23259
+    },
+    {
+      "epoch": 0.2326,
+      "grad_norm": 1.1851533075268867,
+      "learning_rate": 0.003,
+      "loss": 4.02,
+      "step": 23260
+    },
+    {
+      "epoch": 0.23261,
+      "grad_norm": 1.5747618817007045,
+      "learning_rate": 0.003,
+      "loss": 4.0029,
+      "step": 23261
+    },
+    {
+      "epoch": 0.23262,
+      "grad_norm": 1.3059407097929392,
+      "learning_rate": 0.003,
+      "loss": 3.9792,
+      "step": 23262
+    },
+    {
+      "epoch": 0.23263,
+      "grad_norm": 1.3970765540319205,
+      "learning_rate": 0.003,
+      "loss": 4.0068,
+      "step": 23263
+    },
+    {
+      "epoch": 0.23264,
+      "grad_norm": 1.401979057578545,
+      "learning_rate": 0.003,
+      "loss": 4.0031,
+      "step": 23264
+    },
+    {
+      "epoch": 0.23265,
+      "grad_norm": 1.1026397784375817,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 23265
+    },
+    {
+      "epoch": 0.23266,
+      "grad_norm": 1.2751927721750025,
+      "learning_rate": 0.003,
+      "loss": 3.992,
+      "step": 23266
+    },
+    {
+      "epoch": 0.23267,
+      "grad_norm": 1.0525297212587106,
+      "learning_rate": 0.003,
+      "loss": 3.9966,
+      "step": 23267
+    },
+    {
+      "epoch": 0.23268,
+      "grad_norm": 1.3572769271185352,
+      "learning_rate": 0.003,
+      "loss": 3.9955,
+      "step": 23268
+    },
+    {
+      "epoch": 0.23269,
+      "grad_norm": 1.2010089035441744,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 23269
+    },
+    {
+      "epoch": 0.2327,
+      "grad_norm": 1.3349032762777095,
+      "learning_rate": 0.003,
+      "loss": 4.0105,
+      "step": 23270
+    },
+    {
+      "epoch": 0.23271,
+      "grad_norm": 1.1995120750776047,
+      "learning_rate": 0.003,
+      "loss": 3.9836,
+      "step": 23271
+    },
+    {
+      "epoch": 0.23272,
+      "grad_norm": 1.4105382038446599,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 23272
+    },
+    {
+      "epoch": 0.23273,
+      "grad_norm": 1.2152891675535769,
+      "learning_rate": 0.003,
+      "loss": 4.002,
+      "step": 23273
+    },
+    {
+      "epoch": 0.23274,
+      "grad_norm": 1.106116587279671,
+      "learning_rate": 0.003,
+      "loss": 3.9749,
+      "step": 23274
+    },
+    {
+      "epoch": 0.23275,
+      "grad_norm": 1.2066211343991395,
+      "learning_rate": 0.003,
+      "loss": 3.9966,
+      "step": 23275
+    },
+    {
+      "epoch": 0.23276,
+      "grad_norm": 1.2699301524276652,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 23276
+    },
+    {
+      "epoch": 0.23277,
+      "grad_norm": 1.309196882922677,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 23277
+    },
+    {
+      "epoch": 0.23278,
+      "grad_norm": 1.1434333095198757,
+      "learning_rate": 0.003,
+      "loss": 3.9943,
+      "step": 23278
+    },
+    {
+      "epoch": 0.23279,
+      "grad_norm": 1.6069516416807048,
+      "learning_rate": 0.003,
+      "loss": 3.9998,
+      "step": 23279
+    },
+    {
+      "epoch": 0.2328,
+      "grad_norm": 1.1916678562762946,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 23280
+    },
+    {
+      "epoch": 0.23281,
+      "grad_norm": 1.3511713083472952,
+      "learning_rate": 0.003,
+      "loss": 4.0073,
+      "step": 23281
+    },
+    {
+      "epoch": 0.23282,
+      "grad_norm": 1.2681480186037453,
+      "learning_rate": 0.003,
+      "loss": 3.9969,
+      "step": 23282
+    },
+    {
+      "epoch": 0.23283,
+      "grad_norm": 1.5997769691036945,
+      "learning_rate": 0.003,
+      "loss": 4.0005,
+      "step": 23283
+    },
+    {
+      "epoch": 0.23284,
+      "grad_norm": 1.1824469483432183,
+      "learning_rate": 0.003,
+      "loss": 3.9997,
+      "step": 23284
+    },
+    {
+      "epoch": 0.23285,
+      "grad_norm": 1.3227477503330156,
+      "learning_rate": 0.003,
+      "loss": 4.0109,
+      "step": 23285
+    },
+    {
+      "epoch": 0.23286,
+      "grad_norm": 1.2268678737435863,
+      "learning_rate": 0.003,
+      "loss": 3.9746,
+      "step": 23286
+    },
+    {
+      "epoch": 0.23287,
+      "grad_norm": 1.2188194931728351,
+      "learning_rate": 0.003,
+      "loss": 3.9935,
+      "step": 23287
+    },
+    {
+      "epoch": 0.23288,
+      "grad_norm": 1.6069376913450655,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 23288
+    },
+    {
+      "epoch": 0.23289,
+      "grad_norm": 1.2680174952878345,
+      "learning_rate": 0.003,
+      "loss": 3.9939,
+      "step": 23289
+    },
+    {
+      "epoch": 0.2329,
+      "grad_norm": 1.4862999116171163,
+      "learning_rate": 0.003,
+      "loss": 4.0339,
+      "step": 23290
+    },
+    {
+      "epoch": 0.23291,
+      "grad_norm": 1.0623097695126578,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 23291
+    },
+    {
+      "epoch": 0.23292,
+      "grad_norm": 1.185095495078381,
+      "learning_rate": 0.003,
+      "loss": 4.007,
+      "step": 23292
+    },
+    {
+      "epoch": 0.23293,
+      "grad_norm": 1.257552098160103,
+      "learning_rate": 0.003,
+      "loss": 4.0042,
+      "step": 23293
+    },
+    {
+      "epoch": 0.23294,
+      "grad_norm": 1.4782004476257093,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 23294
+    },
+    {
+      "epoch": 0.23295,
+      "grad_norm": 1.0632782302617312,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 23295
+    },
+    {
+      "epoch": 0.23296,
+      "grad_norm": 1.3903119564091262,
+      "learning_rate": 0.003,
+      "loss": 3.9848,
+      "step": 23296
+    },
+    {
+      "epoch": 0.23297,
+      "grad_norm": 1.3440698834424465,
+      "learning_rate": 0.003,
+      "loss": 4.0115,
+      "step": 23297
+    },
+    {
+      "epoch": 0.23298,
+      "grad_norm": 1.3721102774354053,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 23298
+    },
+    {
+      "epoch": 0.23299,
+      "grad_norm": 1.4043133891150585,
+      "learning_rate": 0.003,
+      "loss": 4.0188,
+      "step": 23299
+    },
+    {
+      "epoch": 0.233,
+      "grad_norm": 1.3457657437480448,
+      "learning_rate": 0.003,
+      "loss": 4.0434,
+      "step": 23300
+    },
+    {
+      "epoch": 0.23301,
+      "grad_norm": 1.2149424970581297,
+      "learning_rate": 0.003,
+      "loss": 4.0037,
+      "step": 23301
+    },
+    {
+      "epoch": 0.23302,
+      "grad_norm": 1.2614841589995294,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 23302
+    },
+    {
+      "epoch": 0.23303,
+      "grad_norm": 1.2544682076307114,
+      "learning_rate": 0.003,
+      "loss": 3.9601,
+      "step": 23303
+    },
+    {
+      "epoch": 0.23304,
+      "grad_norm": 1.303523660697261,
+      "learning_rate": 0.003,
+      "loss": 4.0153,
+      "step": 23304
+    },
+    {
+      "epoch": 0.23305,
+      "grad_norm": 1.40040912300424,
+      "learning_rate": 0.003,
+      "loss": 4.0099,
+      "step": 23305
+    },
+    {
+      "epoch": 0.23306,
+      "grad_norm": 1.0829071873782161,
+      "learning_rate": 0.003,
+      "loss": 3.9996,
+      "step": 23306
+    },
+    {
+      "epoch": 0.23307,
+      "grad_norm": 1.302475848609264,
+      "learning_rate": 0.003,
+      "loss": 4.0118,
+      "step": 23307
+    },
+    {
+      "epoch": 0.23308,
+      "grad_norm": 1.2300080488744,
+      "learning_rate": 0.003,
+      "loss": 3.9789,
+      "step": 23308
+    },
+    {
+      "epoch": 0.23309,
+      "grad_norm": 1.4771853655470744,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 23309
+    },
+    {
+      "epoch": 0.2331,
+      "grad_norm": 1.2517283604950549,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 23310
+    },
+    {
+      "epoch": 0.23311,
+      "grad_norm": 1.1762370771940165,
+      "learning_rate": 0.003,
+      "loss": 3.9919,
+      "step": 23311
+    },
+    {
+      "epoch": 0.23312,
+      "grad_norm": 1.632436318702279,
+      "learning_rate": 0.003,
+      "loss": 4.0086,
+      "step": 23312
+    },
+    {
+      "epoch": 0.23313,
+      "grad_norm": 1.0773248559998647,
+      "learning_rate": 0.003,
+      "loss": 3.9935,
+      "step": 23313
+    },
+    {
+      "epoch": 0.23314,
+      "grad_norm": 1.357983127716577,
+      "learning_rate": 0.003,
+      "loss": 4.0281,
+      "step": 23314
+    },
+    {
+      "epoch": 0.23315,
+      "grad_norm": 1.2996922341692567,
+      "learning_rate": 0.003,
+      "loss": 3.9932,
+      "step": 23315
+    },
+    {
+      "epoch": 0.23316,
+      "grad_norm": 1.2057028115536343,
+      "learning_rate": 0.003,
+      "loss": 4.0249,
+      "step": 23316
+    },
+    {
+      "epoch": 0.23317,
+      "grad_norm": 1.2381991399385153,
+      "learning_rate": 0.003,
+      "loss": 4.0419,
+      "step": 23317
+    },
+    {
+      "epoch": 0.23318,
+      "grad_norm": 1.095291408240569,
+      "learning_rate": 0.003,
+      "loss": 4.0104,
+      "step": 23318
+    },
+    {
+      "epoch": 0.23319,
+      "grad_norm": 1.396858977084593,
+      "learning_rate": 0.003,
+      "loss": 3.9911,
+      "step": 23319
+    },
+    {
+      "epoch": 0.2332,
+      "grad_norm": 1.4161911186653835,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 23320
+    },
+    {
+      "epoch": 0.23321,
+      "grad_norm": 1.2064716990403015,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 23321
+    },
+    {
+      "epoch": 0.23322,
+      "grad_norm": 1.3880656095075543,
+      "learning_rate": 0.003,
+      "loss": 3.9992,
+      "step": 23322
+    },
+    {
+      "epoch": 0.23323,
+      "grad_norm": 1.0809259562070959,
+      "learning_rate": 0.003,
+      "loss": 4.0057,
+      "step": 23323
+    },
+    {
+      "epoch": 0.23324,
+      "grad_norm": 1.2246559480694073,
+      "learning_rate": 0.003,
+      "loss": 4.0204,
+      "step": 23324
+    },
+    {
+      "epoch": 0.23325,
+      "grad_norm": 1.3616219027224516,
+      "learning_rate": 0.003,
+      "loss": 4.0128,
+      "step": 23325
+    },
+    {
+      "epoch": 0.23326,
+      "grad_norm": 1.1375271378960417,
+      "learning_rate": 0.003,
+      "loss": 4.0043,
+      "step": 23326
+    },
+    {
+      "epoch": 0.23327,
+      "grad_norm": 1.8097750707580202,
+      "learning_rate": 0.003,
+      "loss": 3.9785,
+      "step": 23327
+    },
+    {
+      "epoch": 0.23328,
+      "grad_norm": 0.8828628352463707,
+      "learning_rate": 0.003,
+      "loss": 3.9972,
+      "step": 23328
+    },
+    {
+      "epoch": 0.23329,
+      "grad_norm": 1.3609722839160816,
+      "learning_rate": 0.003,
+      "loss": 4.0016,
+      "step": 23329
+    },
+    {
+      "epoch": 0.2333,
+      "grad_norm": 1.4449227842687427,
+      "learning_rate": 0.003,
+      "loss": 4.0023,
+      "step": 23330
+    },
+    {
+      "epoch": 0.23331,
+      "grad_norm": 1.0376857219043731,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 23331
+    },
+    {
+      "epoch": 0.23332,
+      "grad_norm": 1.4910954574992075,
+      "learning_rate": 0.003,
+      "loss": 3.9893,
+      "step": 23332
+    },
+    {
+      "epoch": 0.23333,
+      "grad_norm": 0.9544497662541155,
+      "learning_rate": 0.003,
+      "loss": 3.9899,
+      "step": 23333
+    },
+    {
+      "epoch": 0.23334,
+      "grad_norm": 1.5962407743790379,
+      "learning_rate": 0.003,
+      "loss": 4.004,
+      "step": 23334
+    },
+    {
+      "epoch": 0.23335,
+      "grad_norm": 1.1475361983917478,
+      "learning_rate": 0.003,
+      "loss": 4.0207,
+      "step": 23335
+    },
+    {
+      "epoch": 0.23336,
+      "grad_norm": 1.5123244792737076,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 23336
+    },
+    {
+      "epoch": 0.23337,
+      "grad_norm": 1.364196998388134,
+      "learning_rate": 0.003,
+      "loss": 4.015,
+      "step": 23337
+    },
+    {
+      "epoch": 0.23338,
+      "grad_norm": 1.4579703820519598,
+      "learning_rate": 0.003,
+      "loss": 4.0162,
+      "step": 23338
+    },
+    {
+      "epoch": 0.23339,
+      "grad_norm": 1.2694215166608196,
+      "learning_rate": 0.003,
+      "loss": 4.0184,
+      "step": 23339
+    },
+    {
+      "epoch": 0.2334,
+      "grad_norm": 1.1728404627400868,
+      "learning_rate": 0.003,
+      "loss": 4.0244,
+      "step": 23340
+    },
+    {
+      "epoch": 0.23341,
+      "grad_norm": 1.2335330953816144,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 23341
+    },
+    {
+      "epoch": 0.23342,
+      "grad_norm": 1.275024472113929,
+      "learning_rate": 0.003,
+      "loss": 3.9888,
+      "step": 23342
+    },
+    {
+      "epoch": 0.23343,
+      "grad_norm": 1.1325516185615472,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 23343
+    },
+    {
+      "epoch": 0.23344,
+      "grad_norm": 1.3281751245819708,
+      "learning_rate": 0.003,
+      "loss": 4.004,
+      "step": 23344
+    },
+    {
+      "epoch": 0.23345,
+      "grad_norm": 1.136978853492433,
+      "learning_rate": 0.003,
+      "loss": 3.9969,
+      "step": 23345
+    },
+    {
+      "epoch": 0.23346,
+      "grad_norm": 1.5253210848863394,
+      "learning_rate": 0.003,
+      "loss": 4.0297,
+      "step": 23346
+    },
+    {
+      "epoch": 0.23347,
+      "grad_norm": 0.9464658818279054,
+      "learning_rate": 0.003,
+      "loss": 3.9558,
+      "step": 23347
+    },
+    {
+      "epoch": 0.23348,
+      "grad_norm": 1.4283589256056195,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 23348
+    },
+    {
+      "epoch": 0.23349,
+      "grad_norm": 1.254522988010052,
+      "learning_rate": 0.003,
+      "loss": 3.9667,
+      "step": 23349
+    },
+    {
+      "epoch": 0.2335,
+      "grad_norm": 1.1457854173109874,
+      "learning_rate": 0.003,
+      "loss": 3.9783,
+      "step": 23350
+    },
+    {
+      "epoch": 0.23351,
+      "grad_norm": 1.3866602768230665,
+      "learning_rate": 0.003,
+      "loss": 3.9799,
+      "step": 23351
+    },
+    {
+      "epoch": 0.23352,
+      "grad_norm": 1.1142652533481618,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 23352
+    },
+    {
+      "epoch": 0.23353,
+      "grad_norm": 1.554409031338567,
+      "learning_rate": 0.003,
+      "loss": 4.0121,
+      "step": 23353
+    },
+    {
+      "epoch": 0.23354,
+      "grad_norm": 1.0085720292706517,
+      "learning_rate": 0.003,
+      "loss": 4.0228,
+      "step": 23354
+    },
+    {
+      "epoch": 0.23355,
+      "grad_norm": 1.3386686527379994,
+      "learning_rate": 0.003,
+      "loss": 3.9799,
+      "step": 23355
+    },
+    {
+      "epoch": 0.23356,
+      "grad_norm": 1.0271250192529608,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 23356
+    },
+    {
+      "epoch": 0.23357,
+      "grad_norm": 1.9804381605109573,
+      "learning_rate": 0.003,
+      "loss": 4.0365,
+      "step": 23357
+    },
+    {
+      "epoch": 0.23358,
+      "grad_norm": 1.0516068324085377,
+      "learning_rate": 0.003,
+      "loss": 4.0118,
+      "step": 23358
+    },
+    {
+      "epoch": 0.23359,
+      "grad_norm": 1.641116460440755,
+      "learning_rate": 0.003,
+      "loss": 4.0182,
+      "step": 23359
+    },
+    {
+      "epoch": 0.2336,
+      "grad_norm": 1.2545939040321186,
+      "learning_rate": 0.003,
+      "loss": 4.0406,
+      "step": 23360
+    },
+    {
+      "epoch": 0.23361,
+      "grad_norm": 1.3035074749208606,
+      "learning_rate": 0.003,
+      "loss": 4.0379,
+      "step": 23361
+    },
+    {
+      "epoch": 0.23362,
+      "grad_norm": 1.4582108908866493,
+      "learning_rate": 0.003,
+      "loss": 4.0385,
+      "step": 23362
+    },
+    {
+      "epoch": 0.23363,
+      "grad_norm": 1.2639381840389354,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 23363
+    },
+    {
+      "epoch": 0.23364,
+      "grad_norm": 1.135717089470357,
+      "learning_rate": 0.003,
+      "loss": 4.03,
+      "step": 23364
+    },
+    {
+      "epoch": 0.23365,
+      "grad_norm": 1.2844340375101468,
+      "learning_rate": 0.003,
+      "loss": 4.006,
+      "step": 23365
+    },
+    {
+      "epoch": 0.23366,
+      "grad_norm": 1.3379000953148776,
+      "learning_rate": 0.003,
+      "loss": 3.9855,
+      "step": 23366
+    },
+    {
+      "epoch": 0.23367,
+      "grad_norm": 1.0765768639565114,
+      "learning_rate": 0.003,
+      "loss": 4.0305,
+      "step": 23367
+    },
+    {
+      "epoch": 0.23368,
+      "grad_norm": 1.3802805793396478,
+      "learning_rate": 0.003,
+      "loss": 3.997,
+      "step": 23368
+    },
+    {
+      "epoch": 0.23369,
+      "grad_norm": 1.063320957220296,
+      "learning_rate": 0.003,
+      "loss": 3.977,
+      "step": 23369
+    },
+    {
+      "epoch": 0.2337,
+      "grad_norm": 1.5263303180671814,
+      "learning_rate": 0.003,
+      "loss": 4.0029,
+      "step": 23370
+    },
+    {
+      "epoch": 0.23371,
+      "grad_norm": 1.2438007511814533,
+      "learning_rate": 0.003,
+      "loss": 4.0118,
+      "step": 23371
+    },
+    {
+      "epoch": 0.23372,
+      "grad_norm": 1.395974219603348,
+      "learning_rate": 0.003,
+      "loss": 4.0057,
+      "step": 23372
+    },
+    {
+      "epoch": 0.23373,
+      "grad_norm": 1.138691882387379,
+      "learning_rate": 0.003,
+      "loss": 4.0031,
+      "step": 23373
+    },
+    {
+      "epoch": 0.23374,
+      "grad_norm": 1.3241204866189684,
+      "learning_rate": 0.003,
+      "loss": 3.9904,
+      "step": 23374
+    },
+    {
+      "epoch": 0.23375,
+      "grad_norm": 1.047699732578175,
+      "learning_rate": 0.003,
+      "loss": 3.9851,
+      "step": 23375
+    },
+    {
+      "epoch": 0.23376,
+      "grad_norm": 1.511792313255881,
+      "learning_rate": 0.003,
+      "loss": 3.9934,
+      "step": 23376
+    },
+    {
+      "epoch": 0.23377,
+      "grad_norm": 1.085038314846417,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 23377
+    },
+    {
+      "epoch": 0.23378,
+      "grad_norm": 1.6608656967460116,
+      "learning_rate": 0.003,
+      "loss": 4.0373,
+      "step": 23378
+    },
+    {
+      "epoch": 0.23379,
+      "grad_norm": 1.0835517992591785,
+      "learning_rate": 0.003,
+      "loss": 4.0048,
+      "step": 23379
+    },
+    {
+      "epoch": 0.2338,
+      "grad_norm": 1.312462111621112,
+      "learning_rate": 0.003,
+      "loss": 4.0316,
+      "step": 23380
+    },
+    {
+      "epoch": 0.23381,
+      "grad_norm": 1.4069118340220683,
+      "learning_rate": 0.003,
+      "loss": 4.0289,
+      "step": 23381
+    },
+    {
+      "epoch": 0.23382,
+      "grad_norm": 1.1336329097124942,
+      "learning_rate": 0.003,
+      "loss": 4.0173,
+      "step": 23382
+    },
+    {
+      "epoch": 0.23383,
+      "grad_norm": 1.4551740148980983,
+      "learning_rate": 0.003,
+      "loss": 4.0003,
+      "step": 23383
+    },
+    {
+      "epoch": 0.23384,
+      "grad_norm": 1.2615934342950896,
+      "learning_rate": 0.003,
+      "loss": 3.9949,
+      "step": 23384
+    },
+    {
+      "epoch": 0.23385,
+      "grad_norm": 1.246979981861218,
+      "learning_rate": 0.003,
+      "loss": 4.0022,
+      "step": 23385
+    },
+    {
+      "epoch": 0.23386,
+      "grad_norm": 1.2753867048376397,
+      "learning_rate": 0.003,
+      "loss": 4.0058,
+      "step": 23386
+    },
+    {
+      "epoch": 0.23387,
+      "grad_norm": 1.203081605493527,
+      "learning_rate": 0.003,
+      "loss": 4.0025,
+      "step": 23387
+    },
+    {
+      "epoch": 0.23388,
+      "grad_norm": 1.5974069564512552,
+      "learning_rate": 0.003,
+      "loss": 4.0078,
+      "step": 23388
+    },
+    {
+      "epoch": 0.23389,
+      "grad_norm": 1.237753361789257,
+      "learning_rate": 0.003,
+      "loss": 4.0023,
+      "step": 23389
+    },
+    {
+      "epoch": 0.2339,
+      "grad_norm": 1.209492918087264,
+      "learning_rate": 0.003,
+      "loss": 4.0081,
+      "step": 23390
+    },
+    {
+      "epoch": 0.23391,
+      "grad_norm": 1.2125294279819325,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 23391
+    },
+    {
+      "epoch": 0.23392,
+      "grad_norm": 1.3603518076563226,
+      "learning_rate": 0.003,
+      "loss": 4.0427,
+      "step": 23392
+    },
+    {
+      "epoch": 0.23393,
+      "grad_norm": 1.0484226396017828,
+      "learning_rate": 0.003,
+      "loss": 4.0383,
+      "step": 23393
+    },
+    {
+      "epoch": 0.23394,
+      "grad_norm": 1.3229162112969415,
+      "learning_rate": 0.003,
+      "loss": 4.0142,
+      "step": 23394
+    },
+    {
+      "epoch": 0.23395,
+      "grad_norm": 1.3159922062278249,
+      "learning_rate": 0.003,
+      "loss": 4.0249,
+      "step": 23395
+    },
+    {
+      "epoch": 0.23396,
+      "grad_norm": 1.4022226873578263,
+      "learning_rate": 0.003,
+      "loss": 4.0065,
+      "step": 23396
+    },
+    {
+      "epoch": 0.23397,
+      "grad_norm": 1.2765608847428673,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 23397
+    },
+    {
+      "epoch": 0.23398,
+      "grad_norm": 1.4952698287014101,
+      "learning_rate": 0.003,
+      "loss": 4.0115,
+      "step": 23398
+    },
+    {
+      "epoch": 0.23399,
+      "grad_norm": 0.888259400562517,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 23399
+    },
+    {
+      "epoch": 0.234,
+      "grad_norm": 1.2448009518721466,
+      "learning_rate": 0.003,
+      "loss": 4.0065,
+      "step": 23400
+    },
+    {
+      "epoch": 0.23401,
+      "grad_norm": 1.42363266341178,
+      "learning_rate": 0.003,
+      "loss": 3.9991,
+      "step": 23401
+    },
+    {
+      "epoch": 0.23402,
+      "grad_norm": 1.1423266332064896,
+      "learning_rate": 0.003,
+      "loss": 4.0237,
+      "step": 23402
+    },
+    {
+      "epoch": 0.23403,
+      "grad_norm": 1.507993433628774,
+      "learning_rate": 0.003,
+      "loss": 4.0056,
+      "step": 23403
+    },
+    {
+      "epoch": 0.23404,
+      "grad_norm": 1.107577849813295,
+      "learning_rate": 0.003,
+      "loss": 3.977,
+      "step": 23404
+    },
+    {
+      "epoch": 0.23405,
+      "grad_norm": 1.4026631316679072,
+      "learning_rate": 0.003,
+      "loss": 3.9984,
+      "step": 23405
+    },
+    {
+      "epoch": 0.23406,
+      "grad_norm": 1.2366517915242572,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 23406
+    },
+    {
+      "epoch": 0.23407,
+      "grad_norm": 1.3693301719158344,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 23407
+    },
+    {
+      "epoch": 0.23408,
+      "grad_norm": 1.2733337489420642,
+      "learning_rate": 0.003,
+      "loss": 3.9884,
+      "step": 23408
+    },
+    {
+      "epoch": 0.23409,
+      "grad_norm": 1.3631811120418016,
+      "learning_rate": 0.003,
+      "loss": 3.9857,
+      "step": 23409
+    },
+    {
+      "epoch": 0.2341,
+      "grad_norm": 1.2904716725952443,
+      "learning_rate": 0.003,
+      "loss": 4.012,
+      "step": 23410
+    },
+    {
+      "epoch": 0.23411,
+      "grad_norm": 1.4275546667328023,
+      "learning_rate": 0.003,
+      "loss": 3.9995,
+      "step": 23411
+    },
+    {
+      "epoch": 0.23412,
+      "grad_norm": 1.1521829690405783,
+      "learning_rate": 0.003,
+      "loss": 3.9927,
+      "step": 23412
+    },
+    {
+      "epoch": 0.23413,
+      "grad_norm": 1.050759431497358,
+      "learning_rate": 0.003,
+      "loss": 4.0,
+      "step": 23413
+    },
+    {
+      "epoch": 0.23414,
+      "grad_norm": 1.1221328706694165,
+      "learning_rate": 0.003,
+      "loss": 4.0151,
+      "step": 23414
+    },
+    {
+      "epoch": 0.23415,
+      "grad_norm": 1.6616709267222591,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 23415
+    },
+    {
+      "epoch": 0.23416,
+      "grad_norm": 1.0389939645160031,
+      "learning_rate": 0.003,
+      "loss": 4.0212,
+      "step": 23416
+    },
+    {
+      "epoch": 0.23417,
+      "grad_norm": 1.6923811423226225,
+      "learning_rate": 0.003,
+      "loss": 4.0003,
+      "step": 23417
+    },
+    {
+      "epoch": 0.23418,
+      "grad_norm": 1.0843569644305082,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 23418
+    },
+    {
+      "epoch": 0.23419,
+      "grad_norm": 1.3138356365331605,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 23419
+    },
+    {
+      "epoch": 0.2342,
+      "grad_norm": 1.2338398904016028,
+      "learning_rate": 0.003,
+      "loss": 4.0356,
+      "step": 23420
+    },
+    {
+      "epoch": 0.23421,
+      "grad_norm": 1.2391646327897725,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 23421
+    },
+    {
+      "epoch": 0.23422,
+      "grad_norm": 1.2912747074432023,
+      "learning_rate": 0.003,
+      "loss": 3.9971,
+      "step": 23422
+    },
+    {
+      "epoch": 0.23423,
+      "grad_norm": 1.2956915725765183,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 23423
+    },
+    {
+      "epoch": 0.23424,
+      "grad_norm": 1.1090666279121069,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 23424
+    },
+    {
+      "epoch": 0.23425,
+      "grad_norm": 1.2662929644174712,
+      "learning_rate": 0.003,
+      "loss": 4.0278,
+      "step": 23425
+    },
+    {
+      "epoch": 0.23426,
+      "grad_norm": 1.2279586699982927,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 23426
+    },
+    {
+      "epoch": 0.23427,
+      "grad_norm": 1.442091196627373,
+      "learning_rate": 0.003,
+      "loss": 4.0126,
+      "step": 23427
+    },
+    {
+      "epoch": 0.23428,
+      "grad_norm": 1.0655939822982714,
+      "learning_rate": 0.003,
+      "loss": 3.9762,
+      "step": 23428
+    },
+    {
+      "epoch": 0.23429,
+      "grad_norm": 1.7344052657286135,
+      "learning_rate": 0.003,
+      "loss": 4.0302,
+      "step": 23429
+    },
+    {
+      "epoch": 0.2343,
+      "grad_norm": 1.0783075179397525,
+      "learning_rate": 0.003,
+      "loss": 3.9861,
+      "step": 23430
+    },
+    {
+      "epoch": 0.23431,
+      "grad_norm": 1.2923675798169427,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 23431
+    },
+    {
+      "epoch": 0.23432,
+      "grad_norm": 1.6371203012150652,
+      "learning_rate": 0.003,
+      "loss": 4.0061,
+      "step": 23432
+    },
+    {
+      "epoch": 0.23433,
+      "grad_norm": 0.978405423126986,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 23433
+    },
+    {
+      "epoch": 0.23434,
+      "grad_norm": 1.4720015394222241,
+      "learning_rate": 0.003,
+      "loss": 4.0048,
+      "step": 23434
+    },
+    {
+      "epoch": 0.23435,
+      "grad_norm": 1.2395205247561203,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 23435
+    },
+    {
+      "epoch": 0.23436,
+      "grad_norm": 1.2578067905873023,
+      "learning_rate": 0.003,
+      "loss": 3.9853,
+      "step": 23436
+    },
+    {
+      "epoch": 0.23437,
+      "grad_norm": 1.1326996881760658,
+      "learning_rate": 0.003,
+      "loss": 3.9977,
+      "step": 23437
+    },
+    {
+      "epoch": 0.23438,
+      "grad_norm": 1.3145903610986645,
+      "learning_rate": 0.003,
+      "loss": 4.0589,
+      "step": 23438
+    },
+    {
+      "epoch": 0.23439,
+      "grad_norm": 1.4563793142706365,
+      "learning_rate": 0.003,
+      "loss": 4.0084,
+      "step": 23439
+    },
+    {
+      "epoch": 0.2344,
+      "grad_norm": 1.0316023813015853,
+      "learning_rate": 0.003,
+      "loss": 4.0139,
+      "step": 23440
+    },
+    {
+      "epoch": 0.23441,
+      "grad_norm": 1.2936410035659212,
+      "learning_rate": 0.003,
+      "loss": 4.0115,
+      "step": 23441
+    },
+    {
+      "epoch": 0.23442,
+      "grad_norm": 1.173089479025539,
+      "learning_rate": 0.003,
+      "loss": 4.0316,
+      "step": 23442
+    },
+    {
+      "epoch": 0.23443,
+      "grad_norm": 1.3100705038927842,
+      "learning_rate": 0.003,
+      "loss": 4.0033,
+      "step": 23443
+    },
+    {
+      "epoch": 0.23444,
+      "grad_norm": 1.2572297864244522,
+      "learning_rate": 0.003,
+      "loss": 3.9796,
+      "step": 23444
+    },
+    {
+      "epoch": 0.23445,
+      "grad_norm": 1.446601259725187,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 23445
+    },
+    {
+      "epoch": 0.23446,
+      "grad_norm": 1.0771512368256708,
+      "learning_rate": 0.003,
+      "loss": 4.0293,
+      "step": 23446
+    },
+    {
+      "epoch": 0.23447,
+      "grad_norm": 1.4159622460463426,
+      "learning_rate": 0.003,
+      "loss": 4.012,
+      "step": 23447
+    },
+    {
+      "epoch": 0.23448,
+      "grad_norm": 1.2706008220089537,
+      "learning_rate": 0.003,
+      "loss": 3.978,
+      "step": 23448
+    },
+    {
+      "epoch": 0.23449,
+      "grad_norm": 1.3150830230645458,
+      "learning_rate": 0.003,
+      "loss": 4.0308,
+      "step": 23449
+    },
+    {
+      "epoch": 0.2345,
+      "grad_norm": 1.2885582777870277,
+      "learning_rate": 0.003,
+      "loss": 4.0105,
+      "step": 23450
+    },
+    {
+      "epoch": 0.23451,
+      "grad_norm": 1.4291894705132808,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 23451
+    },
+    {
+      "epoch": 0.23452,
+      "grad_norm": 1.1468613865993973,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 23452
+    },
+    {
+      "epoch": 0.23453,
+      "grad_norm": 1.5236804588959616,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 23453
+    },
+    {
+      "epoch": 0.23454,
+      "grad_norm": 1.0116512827563489,
+      "learning_rate": 0.003,
+      "loss": 4.0041,
+      "step": 23454
+    },
+    {
+      "epoch": 0.23455,
+      "grad_norm": 1.315126494617547,
+      "learning_rate": 0.003,
+      "loss": 3.9955,
+      "step": 23455
+    },
+    {
+      "epoch": 0.23456,
+      "grad_norm": 1.1672360139656088,
+      "learning_rate": 0.003,
+      "loss": 3.9956,
+      "step": 23456
+    },
+    {
+      "epoch": 0.23457,
+      "grad_norm": 1.46846081139708,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 23457
+    },
+    {
+      "epoch": 0.23458,
+      "grad_norm": 1.2958073012524514,
+      "learning_rate": 0.003,
+      "loss": 3.9695,
+      "step": 23458
+    },
+    {
+      "epoch": 0.23459,
+      "grad_norm": 1.1954688340007416,
+      "learning_rate": 0.003,
+      "loss": 3.9838,
+      "step": 23459
+    },
+    {
+      "epoch": 0.2346,
+      "grad_norm": 1.3864336335813092,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 23460
+    },
+    {
+      "epoch": 0.23461,
+      "grad_norm": 1.2270421901816697,
+      "learning_rate": 0.003,
+      "loss": 4.0051,
+      "step": 23461
+    },
+    {
+      "epoch": 0.23462,
+      "grad_norm": 1.3713261555011673,
+      "learning_rate": 0.003,
+      "loss": 4.0219,
+      "step": 23462
+    },
+    {
+      "epoch": 0.23463,
+      "grad_norm": 1.1233198317211937,
+      "learning_rate": 0.003,
+      "loss": 3.9857,
+      "step": 23463
+    },
+    {
+      "epoch": 0.23464,
+      "grad_norm": 1.4376470680795241,
+      "learning_rate": 0.003,
+      "loss": 4.0091,
+      "step": 23464
+    },
+    {
+      "epoch": 0.23465,
+      "grad_norm": 1.2503266210828128,
+      "learning_rate": 0.003,
+      "loss": 4.0128,
+      "step": 23465
+    },
+    {
+      "epoch": 0.23466,
+      "grad_norm": 1.327801092266407,
+      "learning_rate": 0.003,
+      "loss": 3.9997,
+      "step": 23466
+    },
+    {
+      "epoch": 0.23467,
+      "grad_norm": 1.2617592456562003,
+      "learning_rate": 0.003,
+      "loss": 4.0065,
+      "step": 23467
+    },
+    {
+      "epoch": 0.23468,
+      "grad_norm": 1.0173885199884025,
+      "learning_rate": 0.003,
+      "loss": 3.9848,
+      "step": 23468
+    },
+    {
+      "epoch": 0.23469,
+      "grad_norm": 1.5538371204750876,
+      "learning_rate": 0.003,
+      "loss": 3.978,
+      "step": 23469
+    },
+    {
+      "epoch": 0.2347,
+      "grad_norm": 0.9578831593909483,
+      "learning_rate": 0.003,
+      "loss": 3.9927,
+      "step": 23470
+    },
+    {
+      "epoch": 0.23471,
+      "grad_norm": 1.369328625859417,
+      "learning_rate": 0.003,
+      "loss": 4.0241,
+      "step": 23471
+    },
+    {
+      "epoch": 0.23472,
+      "grad_norm": 1.2781582932479487,
+      "learning_rate": 0.003,
+      "loss": 3.9999,
+      "step": 23472
+    },
+    {
+      "epoch": 0.23473,
+      "grad_norm": 1.4747330132720498,
+      "learning_rate": 0.003,
+      "loss": 4.0104,
+      "step": 23473
+    },
+    {
+      "epoch": 0.23474,
+      "grad_norm": 1.1112803336737396,
+      "learning_rate": 0.003,
+      "loss": 4.0136,
+      "step": 23474
+    },
+    {
+      "epoch": 0.23475,
+      "grad_norm": 1.2843651851213491,
+      "learning_rate": 0.003,
+      "loss": 3.9902,
+      "step": 23475
+    },
+    {
+      "epoch": 0.23476,
+      "grad_norm": 1.175774293862044,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 23476
+    },
+    {
+      "epoch": 0.23477,
+      "grad_norm": 1.5141228402132356,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 23477
+    },
+    {
+      "epoch": 0.23478,
+      "grad_norm": 1.3488654198647212,
+      "learning_rate": 0.003,
+      "loss": 4.0349,
+      "step": 23478
+    },
+    {
+      "epoch": 0.23479,
+      "grad_norm": 1.3571493270301889,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 23479
+    },
+    {
+      "epoch": 0.2348,
+      "grad_norm": 1.4529744813833727,
+      "learning_rate": 0.003,
+      "loss": 4.065,
+      "step": 23480
+    },
+    {
+      "epoch": 0.23481,
+      "grad_norm": 1.2056181406529247,
+      "learning_rate": 0.003,
+      "loss": 3.9998,
+      "step": 23481
+    },
+    {
+      "epoch": 0.23482,
+      "grad_norm": 1.12034265199902,
+      "learning_rate": 0.003,
+      "loss": 4.0063,
+      "step": 23482
+    },
+    {
+      "epoch": 0.23483,
+      "grad_norm": 1.226722921705432,
+      "learning_rate": 0.003,
+      "loss": 3.9836,
+      "step": 23483
+    },
+    {
+      "epoch": 0.23484,
+      "grad_norm": 1.1617240631548165,
+      "learning_rate": 0.003,
+      "loss": 3.9775,
+      "step": 23484
+    },
+    {
+      "epoch": 0.23485,
+      "grad_norm": 1.1565071993637739,
+      "learning_rate": 0.003,
+      "loss": 3.9707,
+      "step": 23485
+    },
+    {
+      "epoch": 0.23486,
+      "grad_norm": 1.298967753310141,
+      "learning_rate": 0.003,
+      "loss": 3.9978,
+      "step": 23486
+    },
+    {
+      "epoch": 0.23487,
+      "grad_norm": 1.2743829536048128,
+      "learning_rate": 0.003,
+      "loss": 4.0122,
+      "step": 23487
+    },
+    {
+      "epoch": 0.23488,
+      "grad_norm": 1.4478089304731034,
+      "learning_rate": 0.003,
+      "loss": 4.0048,
+      "step": 23488
+    },
+    {
+      "epoch": 0.23489,
+      "grad_norm": 1.1949496629192733,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 23489
+    },
+    {
+      "epoch": 0.2349,
+      "grad_norm": 1.4527075557983935,
+      "learning_rate": 0.003,
+      "loss": 4.0255,
+      "step": 23490
+    },
+    {
+      "epoch": 0.23491,
+      "grad_norm": 1.237849527353194,
+      "learning_rate": 0.003,
+      "loss": 4.0039,
+      "step": 23491
+    },
+    {
+      "epoch": 0.23492,
+      "grad_norm": 1.2655417202592745,
+      "learning_rate": 0.003,
+      "loss": 3.9611,
+      "step": 23492
+    },
+    {
+      "epoch": 0.23493,
+      "grad_norm": 1.3424357425490099,
+      "learning_rate": 0.003,
+      "loss": 3.9798,
+      "step": 23493
+    },
+    {
+      "epoch": 0.23494,
+      "grad_norm": 1.3460992901535174,
+      "learning_rate": 0.003,
+      "loss": 4.012,
+      "step": 23494
+    },
+    {
+      "epoch": 0.23495,
+      "grad_norm": 1.466898890226648,
+      "learning_rate": 0.003,
+      "loss": 4.0279,
+      "step": 23495
+    },
+    {
+      "epoch": 0.23496,
+      "grad_norm": 1.2730896235476457,
+      "learning_rate": 0.003,
+      "loss": 4.0016,
+      "step": 23496
+    },
+    {
+      "epoch": 0.23497,
+      "grad_norm": 1.2832835292627764,
+      "learning_rate": 0.003,
+      "loss": 3.9763,
+      "step": 23497
+    },
+    {
+      "epoch": 0.23498,
+      "grad_norm": 1.3712878713704353,
+      "learning_rate": 0.003,
+      "loss": 3.9824,
+      "step": 23498
+    },
+    {
+      "epoch": 0.23499,
+      "grad_norm": 1.2619450139218464,
+      "learning_rate": 0.003,
+      "loss": 3.9956,
+      "step": 23499
+    },
+    {
+      "epoch": 0.235,
+      "grad_norm": 1.2970982619172773,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 23500
+    },
+    {
+      "epoch": 0.23501,
+      "grad_norm": 1.2801829202090207,
+      "learning_rate": 0.003,
+      "loss": 3.9792,
+      "step": 23501
+    },
+    {
+      "epoch": 0.23502,
+      "grad_norm": 1.3745039149258296,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 23502
+    },
+    {
+      "epoch": 0.23503,
+      "grad_norm": 1.138420627225116,
+      "learning_rate": 0.003,
+      "loss": 3.9779,
+      "step": 23503
+    },
+    {
+      "epoch": 0.23504,
+      "grad_norm": 1.29587137977808,
+      "learning_rate": 0.003,
+      "loss": 3.989,
+      "step": 23504
+    },
+    {
+      "epoch": 0.23505,
+      "grad_norm": 1.152855220557722,
+      "learning_rate": 0.003,
+      "loss": 4.0375,
+      "step": 23505
+    },
+    {
+      "epoch": 0.23506,
+      "grad_norm": 1.266865283511403,
+      "learning_rate": 0.003,
+      "loss": 3.9798,
+      "step": 23506
+    },
+    {
+      "epoch": 0.23507,
+      "grad_norm": 1.2986025137051715,
+      "learning_rate": 0.003,
+      "loss": 3.9857,
+      "step": 23507
+    },
+    {
+      "epoch": 0.23508,
+      "grad_norm": 1.2743954869929413,
+      "learning_rate": 0.003,
+      "loss": 4.0269,
+      "step": 23508
+    },
+    {
+      "epoch": 0.23509,
+      "grad_norm": 1.2742478750722823,
+      "learning_rate": 0.003,
+      "loss": 3.9939,
+      "step": 23509
+    },
+    {
+      "epoch": 0.2351,
+      "grad_norm": 1.148664972789704,
+      "learning_rate": 0.003,
+      "loss": 3.9934,
+      "step": 23510
+    },
+    {
+      "epoch": 0.23511,
+      "grad_norm": 1.402782050698913,
+      "learning_rate": 0.003,
+      "loss": 4.0003,
+      "step": 23511
+    },
+    {
+      "epoch": 0.23512,
+      "grad_norm": 1.1550009171976936,
+      "learning_rate": 0.003,
+      "loss": 4.0146,
+      "step": 23512
+    },
+    {
+      "epoch": 0.23513,
+      "grad_norm": 1.1864618698473834,
+      "learning_rate": 0.003,
+      "loss": 3.9976,
+      "step": 23513
+    },
+    {
+      "epoch": 0.23514,
+      "grad_norm": 1.165123805928825,
+      "learning_rate": 0.003,
+      "loss": 3.9838,
+      "step": 23514
+    },
+    {
+      "epoch": 0.23515,
+      "grad_norm": 1.305209956442247,
+      "learning_rate": 0.003,
+      "loss": 4.0017,
+      "step": 23515
+    },
+    {
+      "epoch": 0.23516,
+      "grad_norm": 1.2905885578225258,
+      "learning_rate": 0.003,
+      "loss": 3.9844,
+      "step": 23516
+    },
+    {
+      "epoch": 0.23517,
+      "grad_norm": 1.3838617840380292,
+      "learning_rate": 0.003,
+      "loss": 3.9966,
+      "step": 23517
+    },
+    {
+      "epoch": 0.23518,
+      "grad_norm": 1.4920633741718672,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 23518
+    },
+    {
+      "epoch": 0.23519,
+      "grad_norm": 1.0860244281878548,
+      "learning_rate": 0.003,
+      "loss": 4.0006,
+      "step": 23519
+    },
+    {
+      "epoch": 0.2352,
+      "grad_norm": 1.4171266383791647,
+      "learning_rate": 0.003,
+      "loss": 4.0436,
+      "step": 23520
+    },
+    {
+      "epoch": 0.23521,
+      "grad_norm": 1.2765685037608308,
+      "learning_rate": 0.003,
+      "loss": 3.9814,
+      "step": 23521
+    },
+    {
+      "epoch": 0.23522,
+      "grad_norm": 1.198200434312808,
+      "learning_rate": 0.003,
+      "loss": 3.9779,
+      "step": 23522
+    },
+    {
+      "epoch": 0.23523,
+      "grad_norm": 1.565102249335088,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 23523
+    },
+    {
+      "epoch": 0.23524,
+      "grad_norm": 1.1937045748137083,
+      "learning_rate": 0.003,
+      "loss": 4.0547,
+      "step": 23524
+    },
+    {
+      "epoch": 0.23525,
+      "grad_norm": 1.2727117069667613,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 23525
+    },
+    {
+      "epoch": 0.23526,
+      "grad_norm": 1.2348346337856788,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 23526
+    },
+    {
+      "epoch": 0.23527,
+      "grad_norm": 1.2927555942855364,
+      "learning_rate": 0.003,
+      "loss": 3.9998,
+      "step": 23527
+    },
+    {
+      "epoch": 0.23528,
+      "grad_norm": 1.0578622404633207,
+      "learning_rate": 0.003,
+      "loss": 3.9685,
+      "step": 23528
+    },
+    {
+      "epoch": 0.23529,
+      "grad_norm": 1.4680663316831282,
+      "learning_rate": 0.003,
+      "loss": 4.0229,
+      "step": 23529
+    },
+    {
+      "epoch": 0.2353,
+      "grad_norm": 1.295724808322909,
+      "learning_rate": 0.003,
+      "loss": 4.0004,
+      "step": 23530
+    },
+    {
+      "epoch": 0.23531,
+      "grad_norm": 1.2279491441962092,
+      "learning_rate": 0.003,
+      "loss": 3.951,
+      "step": 23531
+    },
+    {
+      "epoch": 0.23532,
+      "grad_norm": 1.1588926539342554,
+      "learning_rate": 0.003,
+      "loss": 4.0351,
+      "step": 23532
+    },
+    {
+      "epoch": 0.23533,
+      "grad_norm": 1.3873326048228016,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 23533
+    },
+    {
+      "epoch": 0.23534,
+      "grad_norm": 1.202072562177967,
+      "learning_rate": 0.003,
+      "loss": 4.0233,
+      "step": 23534
+    },
+    {
+      "epoch": 0.23535,
+      "grad_norm": 1.2930076412816465,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 23535
+    },
+    {
+      "epoch": 0.23536,
+      "grad_norm": 1.381454558468072,
+      "learning_rate": 0.003,
+      "loss": 3.9826,
+      "step": 23536
+    },
+    {
+      "epoch": 0.23537,
+      "grad_norm": 1.208635463297595,
+      "learning_rate": 0.003,
+      "loss": 3.993,
+      "step": 23537
+    },
+    {
+      "epoch": 0.23538,
+      "grad_norm": 1.466219300350616,
+      "learning_rate": 0.003,
+      "loss": 3.9776,
+      "step": 23538
+    },
+    {
+      "epoch": 0.23539,
+      "grad_norm": 1.1394413914558932,
+      "learning_rate": 0.003,
+      "loss": 3.9972,
+      "step": 23539
+    },
+    {
+      "epoch": 0.2354,
+      "grad_norm": 1.5608338019244141,
+      "learning_rate": 0.003,
+      "loss": 4.0292,
+      "step": 23540
+    },
+    {
+      "epoch": 0.23541,
+      "grad_norm": 1.105746898748603,
+      "learning_rate": 0.003,
+      "loss": 4.0046,
+      "step": 23541
+    },
+    {
+      "epoch": 0.23542,
+      "grad_norm": 1.400743199137655,
+      "learning_rate": 0.003,
+      "loss": 4.0041,
+      "step": 23542
+    },
+    {
+      "epoch": 0.23543,
+      "grad_norm": 1.1943940083832705,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 23543
+    },
+    {
+      "epoch": 0.23544,
+      "grad_norm": 1.3610096055293204,
+      "learning_rate": 0.003,
+      "loss": 3.9889,
+      "step": 23544
+    },
+    {
+      "epoch": 0.23545,
+      "grad_norm": 1.2020283236368572,
+      "learning_rate": 0.003,
+      "loss": 3.9873,
+      "step": 23545
+    },
+    {
+      "epoch": 0.23546,
+      "grad_norm": 1.3560000644032266,
+      "learning_rate": 0.003,
+      "loss": 3.9809,
+      "step": 23546
+    },
+    {
+      "epoch": 0.23547,
+      "grad_norm": 1.2188970252643807,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 23547
+    },
+    {
+      "epoch": 0.23548,
+      "grad_norm": 1.2140175836285423,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 23548
+    },
+    {
+      "epoch": 0.23549,
+      "grad_norm": 1.301616829486912,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 23549
+    },
+    {
+      "epoch": 0.2355,
+      "grad_norm": 1.2715570223734547,
+      "learning_rate": 0.003,
+      "loss": 3.9933,
+      "step": 23550
+    },
+    {
+      "epoch": 0.23551,
+      "grad_norm": 1.231856246951655,
+      "learning_rate": 0.003,
+      "loss": 3.9847,
+      "step": 23551
+    },
+    {
+      "epoch": 0.23552,
+      "grad_norm": 1.2423485771410525,
+      "learning_rate": 0.003,
+      "loss": 4.0015,
+      "step": 23552
+    },
+    {
+      "epoch": 0.23553,
+      "grad_norm": 1.4344182757523662,
+      "learning_rate": 0.003,
+      "loss": 4.0084,
+      "step": 23553
+    },
+    {
+      "epoch": 0.23554,
+      "grad_norm": 1.3782923340544269,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 23554
+    },
+    {
+      "epoch": 0.23555,
+      "grad_norm": 1.5499522427711891,
+      "learning_rate": 0.003,
+      "loss": 4.0099,
+      "step": 23555
+    },
+    {
+      "epoch": 0.23556,
+      "grad_norm": 1.1888994091250291,
+      "learning_rate": 0.003,
+      "loss": 3.9752,
+      "step": 23556
+    },
+    {
+      "epoch": 0.23557,
+      "grad_norm": 1.2826770283253905,
+      "learning_rate": 0.003,
+      "loss": 3.9735,
+      "step": 23557
+    },
+    {
+      "epoch": 0.23558,
+      "grad_norm": 1.4078302040753254,
+      "learning_rate": 0.003,
+      "loss": 4.004,
+      "step": 23558
+    },
+    {
+      "epoch": 0.23559,
+      "grad_norm": 1.1809841503042717,
+      "learning_rate": 0.003,
+      "loss": 3.9741,
+      "step": 23559
+    },
+    {
+      "epoch": 0.2356,
+      "grad_norm": 1.2503671411891066,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 23560
+    },
+    {
+      "epoch": 0.23561,
+      "grad_norm": 1.3412203447825635,
+      "learning_rate": 0.003,
+      "loss": 4.0069,
+      "step": 23561
+    },
+    {
+      "epoch": 0.23562,
+      "grad_norm": 1.2093906684628304,
+      "learning_rate": 0.003,
+      "loss": 3.9934,
+      "step": 23562
+    },
+    {
+      "epoch": 0.23563,
+      "grad_norm": 1.1852896791304326,
+      "learning_rate": 0.003,
+      "loss": 4.0026,
+      "step": 23563
+    },
+    {
+      "epoch": 0.23564,
+      "grad_norm": 1.3383912130373163,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 23564
+    },
+    {
+      "epoch": 0.23565,
+      "grad_norm": 1.170069351946885,
+      "learning_rate": 0.003,
+      "loss": 3.9833,
+      "step": 23565
+    },
+    {
+      "epoch": 0.23566,
+      "grad_norm": 1.242835101270412,
+      "learning_rate": 0.003,
+      "loss": 3.9873,
+      "step": 23566
+    },
+    {
+      "epoch": 0.23567,
+      "grad_norm": 1.221558992866097,
+      "learning_rate": 0.003,
+      "loss": 3.9985,
+      "step": 23567
+    },
+    {
+      "epoch": 0.23568,
+      "grad_norm": 1.3879842199460382,
+      "learning_rate": 0.003,
+      "loss": 4.0089,
+      "step": 23568
+    },
+    {
+      "epoch": 0.23569,
+      "grad_norm": 1.3701079459192598,
+      "learning_rate": 0.003,
+      "loss": 4.0233,
+      "step": 23569
+    },
+    {
+      "epoch": 0.2357,
+      "grad_norm": 1.268009403281253,
+      "learning_rate": 0.003,
+      "loss": 3.9991,
+      "step": 23570
+    },
+    {
+      "epoch": 0.23571,
+      "grad_norm": 1.1932230149414695,
+      "learning_rate": 0.003,
+      "loss": 3.9975,
+      "step": 23571
+    },
+    {
+      "epoch": 0.23572,
+      "grad_norm": 1.3880481431939806,
+      "learning_rate": 0.003,
+      "loss": 3.9921,
+      "step": 23572
+    },
+    {
+      "epoch": 0.23573,
+      "grad_norm": 1.1422633570919594,
+      "learning_rate": 0.003,
+      "loss": 3.9997,
+      "step": 23573
+    },
+    {
+      "epoch": 0.23574,
+      "grad_norm": 1.6014319824287055,
+      "learning_rate": 0.003,
+      "loss": 3.9861,
+      "step": 23574
+    },
+    {
+      "epoch": 0.23575,
+      "grad_norm": 1.050658438413793,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 23575
+    },
+    {
+      "epoch": 0.23576,
+      "grad_norm": 1.6095175048452013,
+      "learning_rate": 0.003,
+      "loss": 4.0238,
+      "step": 23576
+    },
+    {
+      "epoch": 0.23577,
+      "grad_norm": 0.9847611052825685,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 23577
+    },
+    {
+      "epoch": 0.23578,
+      "grad_norm": 1.5997776164990412,
+      "learning_rate": 0.003,
+      "loss": 4.0174,
+      "step": 23578
+    },
+    {
+      "epoch": 0.23579,
+      "grad_norm": 1.4226494394347153,
+      "learning_rate": 0.003,
+      "loss": 3.9928,
+      "step": 23579
+    },
+    {
+      "epoch": 0.2358,
+      "grad_norm": 1.184292505277545,
+      "learning_rate": 0.003,
+      "loss": 3.9914,
+      "step": 23580
+    },
+    {
+      "epoch": 0.23581,
+      "grad_norm": 1.2991760997880477,
+      "learning_rate": 0.003,
+      "loss": 3.9999,
+      "step": 23581
+    },
+    {
+      "epoch": 0.23582,
+      "grad_norm": 1.229008172339804,
+      "learning_rate": 0.003,
+      "loss": 4.028,
+      "step": 23582
+    },
+    {
+      "epoch": 0.23583,
+      "grad_norm": 1.2182651359808503,
+      "learning_rate": 0.003,
+      "loss": 3.9749,
+      "step": 23583
+    },
+    {
+      "epoch": 0.23584,
+      "grad_norm": 1.287751488314706,
+      "learning_rate": 0.003,
+      "loss": 3.9988,
+      "step": 23584
+    },
+    {
+      "epoch": 0.23585,
+      "grad_norm": 1.3841987129833222,
+      "learning_rate": 0.003,
+      "loss": 3.9947,
+      "step": 23585
+    },
+    {
+      "epoch": 0.23586,
+      "grad_norm": 1.153429774359565,
+      "learning_rate": 0.003,
+      "loss": 3.9844,
+      "step": 23586
+    },
+    {
+      "epoch": 0.23587,
+      "grad_norm": 1.1811996954470134,
+      "learning_rate": 0.003,
+      "loss": 4.0047,
+      "step": 23587
+    },
+    {
+      "epoch": 0.23588,
+      "grad_norm": 1.3099570721000937,
+      "learning_rate": 0.003,
+      "loss": 3.9891,
+      "step": 23588
+    },
+    {
+      "epoch": 0.23589,
+      "grad_norm": 1.2919592481594284,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 23589
+    },
+    {
+      "epoch": 0.2359,
+      "grad_norm": 1.21369254367547,
+      "learning_rate": 0.003,
+      "loss": 4.0007,
+      "step": 23590
+    },
+    {
+      "epoch": 0.23591,
+      "grad_norm": 1.0823840183388644,
+      "learning_rate": 0.003,
+      "loss": 4.001,
+      "step": 23591
+    },
+    {
+      "epoch": 0.23592,
+      "grad_norm": 1.3799578942188362,
+      "learning_rate": 0.003,
+      "loss": 4.0354,
+      "step": 23592
+    },
+    {
+      "epoch": 0.23593,
+      "grad_norm": 1.258162229796661,
+      "learning_rate": 0.003,
+      "loss": 4.0033,
+      "step": 23593
+    },
+    {
+      "epoch": 0.23594,
+      "grad_norm": 1.304546249051914,
+      "learning_rate": 0.003,
+      "loss": 4.0215,
+      "step": 23594
+    },
+    {
+      "epoch": 0.23595,
+      "grad_norm": 1.2086315823734175,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 23595
+    },
+    {
+      "epoch": 0.23596,
+      "grad_norm": 1.33976591241992,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 23596
+    },
+    {
+      "epoch": 0.23597,
+      "grad_norm": 1.2355185056288922,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 23597
+    },
+    {
+      "epoch": 0.23598,
+      "grad_norm": 1.2408510604638432,
+      "learning_rate": 0.003,
+      "loss": 4.0142,
+      "step": 23598
+    },
+    {
+      "epoch": 0.23599,
+      "grad_norm": 1.366833445203147,
+      "learning_rate": 0.003,
+      "loss": 3.9966,
+      "step": 23599
+    },
+    {
+      "epoch": 0.236,
+      "grad_norm": 1.1816102008066853,
+      "learning_rate": 0.003,
+      "loss": 4.026,
+      "step": 23600
+    },
+    {
+      "epoch": 0.23601,
+      "grad_norm": 1.2715604974360597,
+      "learning_rate": 0.003,
+      "loss": 3.9816,
+      "step": 23601
+    },
+    {
+      "epoch": 0.23602,
+      "grad_norm": 1.101074374290445,
+      "learning_rate": 0.003,
+      "loss": 3.9972,
+      "step": 23602
+    },
+    {
+      "epoch": 0.23603,
+      "grad_norm": 1.414873012844617,
+      "learning_rate": 0.003,
+      "loss": 4.0334,
+      "step": 23603
+    },
+    {
+      "epoch": 0.23604,
+      "grad_norm": 1.0123564485359815,
+      "learning_rate": 0.003,
+      "loss": 3.9979,
+      "step": 23604
+    },
+    {
+      "epoch": 0.23605,
+      "grad_norm": 1.4487131295290725,
+      "learning_rate": 0.003,
+      "loss": 4.0236,
+      "step": 23605
+    },
+    {
+      "epoch": 0.23606,
+      "grad_norm": 1.0771694180004108,
+      "learning_rate": 0.003,
+      "loss": 3.9937,
+      "step": 23606
+    },
+    {
+      "epoch": 0.23607,
+      "grad_norm": 1.2885099151421044,
+      "learning_rate": 0.003,
+      "loss": 4.019,
+      "step": 23607
+    },
+    {
+      "epoch": 0.23608,
+      "grad_norm": 1.3765862797850723,
+      "learning_rate": 0.003,
+      "loss": 3.9831,
+      "step": 23608
+    },
+    {
+      "epoch": 0.23609,
+      "grad_norm": 1.1931152906126652,
+      "learning_rate": 0.003,
+      "loss": 4.0016,
+      "step": 23609
+    },
+    {
+      "epoch": 0.2361,
+      "grad_norm": 1.5586473645392018,
+      "learning_rate": 0.003,
+      "loss": 3.9952,
+      "step": 23610
+    },
+    {
+      "epoch": 0.23611,
+      "grad_norm": 0.970983667649775,
+      "learning_rate": 0.003,
+      "loss": 3.9833,
+      "step": 23611
+    },
+    {
+      "epoch": 0.23612,
+      "grad_norm": 1.329207270267995,
+      "learning_rate": 0.003,
+      "loss": 4.0059,
+      "step": 23612
+    },
+    {
+      "epoch": 0.23613,
+      "grad_norm": 1.4448878950842352,
+      "learning_rate": 0.003,
+      "loss": 4.0247,
+      "step": 23613
+    },
+    {
+      "epoch": 0.23614,
+      "grad_norm": 1.2871309565410285,
+      "learning_rate": 0.003,
+      "loss": 3.9844,
+      "step": 23614
+    },
+    {
+      "epoch": 0.23615,
+      "grad_norm": 1.3301854283117551,
+      "learning_rate": 0.003,
+      "loss": 4.0079,
+      "step": 23615
+    },
+    {
+      "epoch": 0.23616,
+      "grad_norm": 1.2106843046972136,
+      "learning_rate": 0.003,
+      "loss": 3.9873,
+      "step": 23616
+    },
+    {
+      "epoch": 0.23617,
+      "grad_norm": 1.3545838037987,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 23617
+    },
+    {
+      "epoch": 0.23618,
+      "grad_norm": 1.3373766077139857,
+      "learning_rate": 0.003,
+      "loss": 3.9755,
+      "step": 23618
+    },
+    {
+      "epoch": 0.23619,
+      "grad_norm": 1.2563753946844696,
+      "learning_rate": 0.003,
+      "loss": 3.9821,
+      "step": 23619
+    },
+    {
+      "epoch": 0.2362,
+      "grad_norm": 1.3629722608360904,
+      "learning_rate": 0.003,
+      "loss": 3.9995,
+      "step": 23620
+    },
+    {
+      "epoch": 0.23621,
+      "grad_norm": 1.18615688855962,
+      "learning_rate": 0.003,
+      "loss": 3.9932,
+      "step": 23621
+    },
+    {
+      "epoch": 0.23622,
+      "grad_norm": 1.2335359596489806,
+      "learning_rate": 0.003,
+      "loss": 3.9993,
+      "step": 23622
+    },
+    {
+      "epoch": 0.23623,
+      "grad_norm": 1.202163352908708,
+      "learning_rate": 0.003,
+      "loss": 3.9648,
+      "step": 23623
+    },
+    {
+      "epoch": 0.23624,
+      "grad_norm": 1.3289248626711947,
+      "learning_rate": 0.003,
+      "loss": 4.0098,
+      "step": 23624
+    },
+    {
+      "epoch": 0.23625,
+      "grad_norm": 1.3060688794769761,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 23625
+    },
+    {
+      "epoch": 0.23626,
+      "grad_norm": 1.1385584404381532,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 23626
+    },
+    {
+      "epoch": 0.23627,
+      "grad_norm": 1.635658112910564,
+      "learning_rate": 0.003,
+      "loss": 3.9958,
+      "step": 23627
+    },
+    {
+      "epoch": 0.23628,
+      "grad_norm": 1.066434806348444,
+      "learning_rate": 0.003,
+      "loss": 3.9789,
+      "step": 23628
+    },
+    {
+      "epoch": 0.23629,
+      "grad_norm": 1.5119556087433956,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 23629
+    },
+    {
+      "epoch": 0.2363,
+      "grad_norm": 1.0286589350593327,
+      "learning_rate": 0.003,
+      "loss": 4.0174,
+      "step": 23630
+    },
+    {
+      "epoch": 0.23631,
+      "grad_norm": 1.381457050375695,
+      "learning_rate": 0.003,
+      "loss": 3.9785,
+      "step": 23631
+    },
+    {
+      "epoch": 0.23632,
+      "grad_norm": 1.2059538660776128,
+      "learning_rate": 0.003,
+      "loss": 3.9997,
+      "step": 23632
+    },
+    {
+      "epoch": 0.23633,
+      "grad_norm": 1.252400678704544,
+      "learning_rate": 0.003,
+      "loss": 3.998,
+      "step": 23633
+    },
+    {
+      "epoch": 0.23634,
+      "grad_norm": 1.1659299886740584,
+      "learning_rate": 0.003,
+      "loss": 4.0413,
+      "step": 23634
+    },
+    {
+      "epoch": 0.23635,
+      "grad_norm": 1.360012191324688,
+      "learning_rate": 0.003,
+      "loss": 4.0104,
+      "step": 23635
+    },
+    {
+      "epoch": 0.23636,
+      "grad_norm": 1.1096716667807438,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 23636
+    },
+    {
+      "epoch": 0.23637,
+      "grad_norm": 1.4185656476316302,
+      "learning_rate": 0.003,
+      "loss": 4.0119,
+      "step": 23637
+    },
+    {
+      "epoch": 0.23638,
+      "grad_norm": 1.0128078194865069,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 23638
+    },
+    {
+      "epoch": 0.23639,
+      "grad_norm": 1.626658025938144,
+      "learning_rate": 0.003,
+      "loss": 4.0036,
+      "step": 23639
+    },
+    {
+      "epoch": 0.2364,
+      "grad_norm": 1.1020519323111801,
+      "learning_rate": 0.003,
+      "loss": 3.9828,
+      "step": 23640
+    },
+    {
+      "epoch": 0.23641,
+      "grad_norm": 1.718645455375407,
+      "learning_rate": 0.003,
+      "loss": 3.9938,
+      "step": 23641
+    },
+    {
+      "epoch": 0.23642,
+      "grad_norm": 1.0133090918971492,
+      "learning_rate": 0.003,
+      "loss": 4.0265,
+      "step": 23642
+    },
+    {
+      "epoch": 0.23643,
+      "grad_norm": 1.3479548544943352,
+      "learning_rate": 0.003,
+      "loss": 4.0454,
+      "step": 23643
+    },
+    {
+      "epoch": 0.23644,
+      "grad_norm": 1.2392564240430874,
+      "learning_rate": 0.003,
+      "loss": 4.04,
+      "step": 23644
+    },
+    {
+      "epoch": 0.23645,
+      "grad_norm": 1.1739574005366367,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 23645
+    },
+    {
+      "epoch": 0.23646,
+      "grad_norm": 1.2907658074955457,
+      "learning_rate": 0.003,
+      "loss": 4.0072,
+      "step": 23646
+    },
+    {
+      "epoch": 0.23647,
+      "grad_norm": 1.5875991538772,
+      "learning_rate": 0.003,
+      "loss": 4.0041,
+      "step": 23647
+    },
+    {
+      "epoch": 0.23648,
+      "grad_norm": 1.046158686680645,
+      "learning_rate": 0.003,
+      "loss": 4.0058,
+      "step": 23648
+    },
+    {
+      "epoch": 0.23649,
+      "grad_norm": 1.5361621962277203,
+      "learning_rate": 0.003,
+      "loss": 4.0222,
+      "step": 23649
+    },
+    {
+      "epoch": 0.2365,
+      "grad_norm": 1.0071093254701702,
+      "learning_rate": 0.003,
+      "loss": 4.0028,
+      "step": 23650
+    },
+    {
+      "epoch": 0.23651,
+      "grad_norm": 1.2756940458753911,
+      "learning_rate": 0.003,
+      "loss": 4.0129,
+      "step": 23651
+    },
+    {
+      "epoch": 0.23652,
+      "grad_norm": 1.2433308045521188,
+      "learning_rate": 0.003,
+      "loss": 3.9917,
+      "step": 23652
+    },
+    {
+      "epoch": 0.23653,
+      "grad_norm": 1.243879305170937,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 23653
+    },
+    {
+      "epoch": 0.23654,
+      "grad_norm": 1.3163085223735167,
+      "learning_rate": 0.003,
+      "loss": 3.984,
+      "step": 23654
+    },
+    {
+      "epoch": 0.23655,
+      "grad_norm": 1.1646714303935588,
+      "learning_rate": 0.003,
+      "loss": 4.0048,
+      "step": 23655
+    },
+    {
+      "epoch": 0.23656,
+      "grad_norm": 1.5163143616414805,
+      "learning_rate": 0.003,
+      "loss": 3.9922,
+      "step": 23656
+    },
+    {
+      "epoch": 0.23657,
+      "grad_norm": 1.44866990984894,
+      "learning_rate": 0.003,
+      "loss": 4.0092,
+      "step": 23657
+    },
+    {
+      "epoch": 0.23658,
+      "grad_norm": 1.3150513603894807,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 23658
+    },
+    {
+      "epoch": 0.23659,
+      "grad_norm": 1.4226349672380159,
+      "learning_rate": 0.003,
+      "loss": 4.0023,
+      "step": 23659
+    },
+    {
+      "epoch": 0.2366,
+      "grad_norm": 1.1980675480819953,
+      "learning_rate": 0.003,
+      "loss": 4.0257,
+      "step": 23660
+    },
+    {
+      "epoch": 0.23661,
+      "grad_norm": 1.426398223955001,
+      "learning_rate": 0.003,
+      "loss": 4.0002,
+      "step": 23661
+    },
+    {
+      "epoch": 0.23662,
+      "grad_norm": 1.2646535201667544,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 23662
+    },
+    {
+      "epoch": 0.23663,
+      "grad_norm": 1.2016790650271911,
+      "learning_rate": 0.003,
+      "loss": 4.001,
+      "step": 23663
+    },
+    {
+      "epoch": 0.23664,
+      "grad_norm": 1.217356639477127,
+      "learning_rate": 0.003,
+      "loss": 4.0104,
+      "step": 23664
+    },
+    {
+      "epoch": 0.23665,
+      "grad_norm": 1.1336560088489491,
+      "learning_rate": 0.003,
+      "loss": 4.0003,
+      "step": 23665
+    },
+    {
+      "epoch": 0.23666,
+      "grad_norm": 1.4735866239698967,
+      "learning_rate": 0.003,
+      "loss": 4.0642,
+      "step": 23666
+    },
+    {
+      "epoch": 0.23667,
+      "grad_norm": 1.0927304445082435,
+      "learning_rate": 0.003,
+      "loss": 4.0003,
+      "step": 23667
+    },
+    {
+      "epoch": 0.23668,
+      "grad_norm": 1.5020779460058329,
+      "learning_rate": 0.003,
+      "loss": 3.9812,
+      "step": 23668
+    },
+    {
+      "epoch": 0.23669,
+      "grad_norm": 1.0247477436691692,
+      "learning_rate": 0.003,
+      "loss": 4.0081,
+      "step": 23669
+    },
+    {
+      "epoch": 0.2367,
+      "grad_norm": 1.3066497947277553,
+      "learning_rate": 0.003,
+      "loss": 3.9769,
+      "step": 23670
+    },
+    {
+      "epoch": 0.23671,
+      "grad_norm": 1.357732484044677,
+      "learning_rate": 0.003,
+      "loss": 4.0112,
+      "step": 23671
+    },
+    {
+      "epoch": 0.23672,
+      "grad_norm": 1.242601839848695,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 23672
+    },
+    {
+      "epoch": 0.23673,
+      "grad_norm": 1.1613079799733756,
+      "learning_rate": 0.003,
+      "loss": 3.9735,
+      "step": 23673
+    },
+    {
+      "epoch": 0.23674,
+      "grad_norm": 1.4154536526116936,
+      "learning_rate": 0.003,
+      "loss": 4.0024,
+      "step": 23674
+    },
+    {
+      "epoch": 0.23675,
+      "grad_norm": 1.202972288784697,
+      "learning_rate": 0.003,
+      "loss": 3.9936,
+      "step": 23675
+    },
+    {
+      "epoch": 0.23676,
+      "grad_norm": 1.274889077606745,
+      "learning_rate": 0.003,
+      "loss": 4.0366,
+      "step": 23676
+    },
+    {
+      "epoch": 0.23677,
+      "grad_norm": 1.1885947660571277,
+      "learning_rate": 0.003,
+      "loss": 4.0027,
+      "step": 23677
+    },
+    {
+      "epoch": 0.23678,
+      "grad_norm": 1.174970086448457,
+      "learning_rate": 0.003,
+      "loss": 4.0139,
+      "step": 23678
+    },
+    {
+      "epoch": 0.23679,
+      "grad_norm": 1.580799669111463,
+      "learning_rate": 0.003,
+      "loss": 4.0031,
+      "step": 23679
+    },
+    {
+      "epoch": 0.2368,
+      "grad_norm": 1.2173769089537902,
+      "learning_rate": 0.003,
+      "loss": 3.9619,
+      "step": 23680
+    },
+    {
+      "epoch": 0.23681,
+      "grad_norm": 1.128301483318101,
+      "learning_rate": 0.003,
+      "loss": 3.9784,
+      "step": 23681
+    },
+    {
+      "epoch": 0.23682,
+      "grad_norm": 1.3808871612074007,
+      "learning_rate": 0.003,
+      "loss": 3.977,
+      "step": 23682
+    },
+    {
+      "epoch": 0.23683,
+      "grad_norm": 1.0611221296664095,
+      "learning_rate": 0.003,
+      "loss": 4.0166,
+      "step": 23683
+    },
+    {
+      "epoch": 0.23684,
+      "grad_norm": 1.6442762448254604,
+      "learning_rate": 0.003,
+      "loss": 4.0072,
+      "step": 23684
+    },
+    {
+      "epoch": 0.23685,
+      "grad_norm": 1.1073843437347886,
+      "learning_rate": 0.003,
+      "loss": 3.9905,
+      "step": 23685
+    },
+    {
+      "epoch": 0.23686,
+      "grad_norm": 1.2909204065658249,
+      "learning_rate": 0.003,
+      "loss": 4.0232,
+      "step": 23686
+    },
+    {
+      "epoch": 0.23687,
+      "grad_norm": 1.4935129290562437,
+      "learning_rate": 0.003,
+      "loss": 4.0039,
+      "step": 23687
+    },
+    {
+      "epoch": 0.23688,
+      "grad_norm": 1.2048684156514042,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 23688
+    },
+    {
+      "epoch": 0.23689,
+      "grad_norm": 1.4783876636759827,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 23689
+    },
+    {
+      "epoch": 0.2369,
+      "grad_norm": 1.1023167164548204,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 23690
+    },
+    {
+      "epoch": 0.23691,
+      "grad_norm": 1.3119558799116933,
+      "learning_rate": 0.003,
+      "loss": 3.9851,
+      "step": 23691
+    },
+    {
+      "epoch": 0.23692,
+      "grad_norm": 1.3372228085379114,
+      "learning_rate": 0.003,
+      "loss": 3.9992,
+      "step": 23692
+    },
+    {
+      "epoch": 0.23693,
+      "grad_norm": 1.3238636053728932,
+      "learning_rate": 0.003,
+      "loss": 3.9979,
+      "step": 23693
+    },
+    {
+      "epoch": 0.23694,
+      "grad_norm": 1.2674425012116501,
+      "learning_rate": 0.003,
+      "loss": 3.9655,
+      "step": 23694
+    },
+    {
+      "epoch": 0.23695,
+      "grad_norm": 1.10687452091599,
+      "learning_rate": 0.003,
+      "loss": 4.0105,
+      "step": 23695
+    },
+    {
+      "epoch": 0.23696,
+      "grad_norm": 1.3435857871803956,
+      "learning_rate": 0.003,
+      "loss": 4.0026,
+      "step": 23696
+    },
+    {
+      "epoch": 0.23697,
+      "grad_norm": 0.9186262720445029,
+      "learning_rate": 0.003,
+      "loss": 3.9813,
+      "step": 23697
+    },
+    {
+      "epoch": 0.23698,
+      "grad_norm": 1.2398654771802047,
+      "learning_rate": 0.003,
+      "loss": 3.9773,
+      "step": 23698
+    },
+    {
+      "epoch": 0.23699,
+      "grad_norm": 1.3481900148618633,
+      "learning_rate": 0.003,
+      "loss": 3.9768,
+      "step": 23699
+    },
+    {
+      "epoch": 0.237,
+      "grad_norm": 1.2357116250178362,
+      "learning_rate": 0.003,
+      "loss": 3.9826,
+      "step": 23700
+    },
+    {
+      "epoch": 0.23701,
+      "grad_norm": 1.4411010033965794,
+      "learning_rate": 0.003,
+      "loss": 3.9753,
+      "step": 23701
+    },
+    {
+      "epoch": 0.23702,
+      "grad_norm": 1.2601890394047148,
+      "learning_rate": 0.003,
+      "loss": 3.9999,
+      "step": 23702
+    },
+    {
+      "epoch": 0.23703,
+      "grad_norm": 1.228539801139052,
+      "learning_rate": 0.003,
+      "loss": 3.9925,
+      "step": 23703
+    },
+    {
+      "epoch": 0.23704,
+      "grad_norm": 1.3628835054272948,
+      "learning_rate": 0.003,
+      "loss": 4.005,
+      "step": 23704
+    },
+    {
+      "epoch": 0.23705,
+      "grad_norm": 1.0503301202695428,
+      "learning_rate": 0.003,
+      "loss": 3.9706,
+      "step": 23705
+    },
+    {
+      "epoch": 0.23706,
+      "grad_norm": 1.4096427160245864,
+      "learning_rate": 0.003,
+      "loss": 3.975,
+      "step": 23706
+    },
+    {
+      "epoch": 0.23707,
+      "grad_norm": 1.11449172418957,
+      "learning_rate": 0.003,
+      "loss": 4.0002,
+      "step": 23707
+    },
+    {
+      "epoch": 0.23708,
+      "grad_norm": 1.5832580080155554,
+      "learning_rate": 0.003,
+      "loss": 4.0155,
+      "step": 23708
+    },
+    {
+      "epoch": 0.23709,
+      "grad_norm": 1.1576926769980513,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 23709
+    },
+    {
+      "epoch": 0.2371,
+      "grad_norm": 1.1957231931865127,
+      "learning_rate": 0.003,
+      "loss": 3.991,
+      "step": 23710
+    },
+    {
+      "epoch": 0.23711,
+      "grad_norm": 1.1520072939793973,
+      "learning_rate": 0.003,
+      "loss": 4.0096,
+      "step": 23711
+    },
+    {
+      "epoch": 0.23712,
+      "grad_norm": 1.4473245844192775,
+      "learning_rate": 0.003,
+      "loss": 4.0113,
+      "step": 23712
+    },
+    {
+      "epoch": 0.23713,
+      "grad_norm": 1.1892912607227752,
+      "learning_rate": 0.003,
+      "loss": 3.9977,
+      "step": 23713
+    },
+    {
+      "epoch": 0.23714,
+      "grad_norm": 1.4162118020325232,
+      "learning_rate": 0.003,
+      "loss": 4.0232,
+      "step": 23714
+    },
+    {
+      "epoch": 0.23715,
+      "grad_norm": 1.110121381998269,
+      "learning_rate": 0.003,
+      "loss": 3.9918,
+      "step": 23715
+    },
+    {
+      "epoch": 0.23716,
+      "grad_norm": 1.5712124900502407,
+      "learning_rate": 0.003,
+      "loss": 4.012,
+      "step": 23716
+    },
+    {
+      "epoch": 0.23717,
+      "grad_norm": 1.0798039208247672,
+      "learning_rate": 0.003,
+      "loss": 4.0093,
+      "step": 23717
+    },
+    {
+      "epoch": 0.23718,
+      "grad_norm": 1.4352342683708683,
+      "learning_rate": 0.003,
+      "loss": 4.0264,
+      "step": 23718
+    },
+    {
+      "epoch": 0.23719,
+      "grad_norm": 1.3165661125333052,
+      "learning_rate": 0.003,
+      "loss": 4.0246,
+      "step": 23719
+    },
+    {
+      "epoch": 0.2372,
+      "grad_norm": 1.3242524977249703,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 23720
+    },
+    {
+      "epoch": 0.23721,
+      "grad_norm": 1.1458447895240702,
+      "learning_rate": 0.003,
+      "loss": 3.9702,
+      "step": 23721
+    },
+    {
+      "epoch": 0.23722,
+      "grad_norm": 1.268491547002357,
+      "learning_rate": 0.003,
+      "loss": 4.0036,
+      "step": 23722
+    },
+    {
+      "epoch": 0.23723,
+      "grad_norm": 1.495941099877952,
+      "learning_rate": 0.003,
+      "loss": 4.0238,
+      "step": 23723
+    },
+    {
+      "epoch": 0.23724,
+      "grad_norm": 1.1479542365407407,
+      "learning_rate": 0.003,
+      "loss": 4.0153,
+      "step": 23724
+    },
+    {
+      "epoch": 0.23725,
+      "grad_norm": 1.272452687494301,
+      "learning_rate": 0.003,
+      "loss": 4.008,
+      "step": 23725
+    },
+    {
+      "epoch": 0.23726,
+      "grad_norm": 1.1530177078355586,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 23726
+    },
+    {
+      "epoch": 0.23727,
+      "grad_norm": 1.2602756521656502,
+      "learning_rate": 0.003,
+      "loss": 4.0058,
+      "step": 23727
+    },
+    {
+      "epoch": 0.23728,
+      "grad_norm": 1.2840905013797947,
+      "learning_rate": 0.003,
+      "loss": 3.9591,
+      "step": 23728
+    },
+    {
+      "epoch": 0.23729,
+      "grad_norm": 1.075419417616779,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 23729
+    },
+    {
+      "epoch": 0.2373,
+      "grad_norm": 1.4609867705642152,
+      "learning_rate": 0.003,
+      "loss": 3.9969,
+      "step": 23730
+    },
+    {
+      "epoch": 0.23731,
+      "grad_norm": 1.2119872638825755,
+      "learning_rate": 0.003,
+      "loss": 3.9983,
+      "step": 23731
+    },
+    {
+      "epoch": 0.23732,
+      "grad_norm": 1.145973530333096,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 23732
+    },
+    {
+      "epoch": 0.23733,
+      "grad_norm": 1.2225565091249917,
+      "learning_rate": 0.003,
+      "loss": 4.0184,
+      "step": 23733
+    },
+    {
+      "epoch": 0.23734,
+      "grad_norm": 1.390671282648059,
+      "learning_rate": 0.003,
+      "loss": 4.0007,
+      "step": 23734
+    },
+    {
+      "epoch": 0.23735,
+      "grad_norm": 1.1887245992946691,
+      "learning_rate": 0.003,
+      "loss": 3.9996,
+      "step": 23735
+    },
+    {
+      "epoch": 0.23736,
+      "grad_norm": 1.3833448200461425,
+      "learning_rate": 0.003,
+      "loss": 3.9764,
+      "step": 23736
+    },
+    {
+      "epoch": 0.23737,
+      "grad_norm": 1.219908025042158,
+      "learning_rate": 0.003,
+      "loss": 3.9908,
+      "step": 23737
+    },
+    {
+      "epoch": 0.23738,
+      "grad_norm": 1.4566317652728162,
+      "learning_rate": 0.003,
+      "loss": 4.0132,
+      "step": 23738
+    },
+    {
+      "epoch": 0.23739,
+      "grad_norm": 1.1988745437781103,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 23739
+    },
+    {
+      "epoch": 0.2374,
+      "grad_norm": 1.3085047479971064,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 23740
+    },
+    {
+      "epoch": 0.23741,
+      "grad_norm": 1.0416882987696887,
+      "learning_rate": 0.003,
+      "loss": 3.9869,
+      "step": 23741
+    },
+    {
+      "epoch": 0.23742,
+      "grad_norm": 1.5426639929210149,
+      "learning_rate": 0.003,
+      "loss": 4.0082,
+      "step": 23742
+    },
+    {
+      "epoch": 0.23743,
+      "grad_norm": 1.295427908125432,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 23743
+    },
+    {
+      "epoch": 0.23744,
+      "grad_norm": 0.9977579760344626,
+      "learning_rate": 0.003,
+      "loss": 3.997,
+      "step": 23744
+    },
+    {
+      "epoch": 0.23745,
+      "grad_norm": 1.5494190508884615,
+      "learning_rate": 0.003,
+      "loss": 4.0126,
+      "step": 23745
+    },
+    {
+      "epoch": 0.23746,
+      "grad_norm": 1.187878771829957,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 23746
+    },
+    {
+      "epoch": 0.23747,
+      "grad_norm": 1.5040425617138797,
+      "learning_rate": 0.003,
+      "loss": 4.0059,
+      "step": 23747
+    },
+    {
+      "epoch": 0.23748,
+      "grad_norm": 1.2430370496507221,
+      "learning_rate": 0.003,
+      "loss": 3.9833,
+      "step": 23748
+    },
+    {
+      "epoch": 0.23749,
+      "grad_norm": 1.184008163521529,
+      "learning_rate": 0.003,
+      "loss": 3.9903,
+      "step": 23749
+    },
+    {
+      "epoch": 0.2375,
+      "grad_norm": 1.2510561565475387,
+      "learning_rate": 0.003,
+      "loss": 4.0063,
+      "step": 23750
+    },
+    {
+      "epoch": 0.23751,
+      "grad_norm": 1.3686692299532852,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 23751
+    },
+    {
+      "epoch": 0.23752,
+      "grad_norm": 1.3160634989335436,
+      "learning_rate": 0.003,
+      "loss": 3.997,
+      "step": 23752
+    },
+    {
+      "epoch": 0.23753,
+      "grad_norm": 1.2892575521953087,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 23753
+    },
+    {
+      "epoch": 0.23754,
+      "grad_norm": 1.118409116178597,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 23754
+    },
+    {
+      "epoch": 0.23755,
+      "grad_norm": 1.2339645136033683,
+      "learning_rate": 0.003,
+      "loss": 3.9913,
+      "step": 23755
+    },
+    {
+      "epoch": 0.23756,
+      "grad_norm": 1.1542323963428938,
+      "learning_rate": 0.003,
+      "loss": 3.995,
+      "step": 23756
+    },
+    {
+      "epoch": 0.23757,
+      "grad_norm": 1.1681717965603868,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 23757
+    },
+    {
+      "epoch": 0.23758,
+      "grad_norm": 1.2938551169476007,
+      "learning_rate": 0.003,
+      "loss": 4.0126,
+      "step": 23758
+    },
+    {
+      "epoch": 0.23759,
+      "grad_norm": 1.0704216970014842,
+      "learning_rate": 0.003,
+      "loss": 3.9944,
+      "step": 23759
+    },
+    {
+      "epoch": 0.2376,
+      "grad_norm": 1.5550701351800087,
+      "learning_rate": 0.003,
+      "loss": 4.0022,
+      "step": 23760
+    },
+    {
+      "epoch": 0.23761,
+      "grad_norm": 1.078228297794741,
+      "learning_rate": 0.003,
+      "loss": 4.019,
+      "step": 23761
+    },
+    {
+      "epoch": 0.23762,
+      "grad_norm": 1.3558740311877582,
+      "learning_rate": 0.003,
+      "loss": 3.9956,
+      "step": 23762
+    },
+    {
+      "epoch": 0.23763,
+      "grad_norm": 1.149500007486366,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 23763
+    },
+    {
+      "epoch": 0.23764,
+      "grad_norm": 1.2603984862959847,
+      "learning_rate": 0.003,
+      "loss": 3.967,
+      "step": 23764
+    },
+    {
+      "epoch": 0.23765,
+      "grad_norm": 1.4941055287247673,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 23765
+    },
+    {
+      "epoch": 0.23766,
+      "grad_norm": 1.1883679696427059,
+      "learning_rate": 0.003,
+      "loss": 3.9913,
+      "step": 23766
+    },
+    {
+      "epoch": 0.23767,
+      "grad_norm": 1.3765766349667532,
+      "learning_rate": 0.003,
+      "loss": 3.9776,
+      "step": 23767
+    },
+    {
+      "epoch": 0.23768,
+      "grad_norm": 1.3818787387083442,
+      "learning_rate": 0.003,
+      "loss": 3.9956,
+      "step": 23768
+    },
+    {
+      "epoch": 0.23769,
+      "grad_norm": 1.7070657895566905,
+      "learning_rate": 0.003,
+      "loss": 4.0109,
+      "step": 23769
+    },
+    {
+      "epoch": 0.2377,
+      "grad_norm": 0.9463645347705723,
+      "learning_rate": 0.003,
+      "loss": 3.9913,
+      "step": 23770
+    },
+    {
+      "epoch": 0.23771,
+      "grad_norm": 1.4141831680917243,
+      "learning_rate": 0.003,
+      "loss": 3.9887,
+      "step": 23771
+    },
+    {
+      "epoch": 0.23772,
+      "grad_norm": 1.23715932840379,
+      "learning_rate": 0.003,
+      "loss": 3.982,
+      "step": 23772
+    },
+    {
+      "epoch": 0.23773,
+      "grad_norm": 1.4592866102139086,
+      "learning_rate": 0.003,
+      "loss": 3.986,
+      "step": 23773
+    },
+    {
+      "epoch": 0.23774,
+      "grad_norm": 1.139427616994017,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 23774
+    },
+    {
+      "epoch": 0.23775,
+      "grad_norm": 1.5873758961117734,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 23775
+    },
+    {
+      "epoch": 0.23776,
+      "grad_norm": 1.0762739564606911,
+      "learning_rate": 0.003,
+      "loss": 4.001,
+      "step": 23776
+    },
+    {
+      "epoch": 0.23777,
+      "grad_norm": 1.3301250947333254,
+      "learning_rate": 0.003,
+      "loss": 4.0053,
+      "step": 23777
+    },
+    {
+      "epoch": 0.23778,
+      "grad_norm": 1.2240745774613127,
+      "learning_rate": 0.003,
+      "loss": 3.9887,
+      "step": 23778
+    },
+    {
+      "epoch": 0.23779,
+      "grad_norm": 1.3453697769770225,
+      "learning_rate": 0.003,
+      "loss": 3.9861,
+      "step": 23779
+    },
+    {
+      "epoch": 0.2378,
+      "grad_norm": 1.2850159619382935,
+      "learning_rate": 0.003,
+      "loss": 3.9578,
+      "step": 23780
+    },
+    {
+      "epoch": 0.23781,
+      "grad_norm": 1.2762998057649435,
+      "learning_rate": 0.003,
+      "loss": 4.0207,
+      "step": 23781
+    },
+    {
+      "epoch": 0.23782,
+      "grad_norm": 1.1377767546840278,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 23782
+    },
+    {
+      "epoch": 0.23783,
+      "grad_norm": 1.132793806834327,
+      "learning_rate": 0.003,
+      "loss": 3.9993,
+      "step": 23783
+    },
+    {
+      "epoch": 0.23784,
+      "grad_norm": 1.2500561050669747,
+      "learning_rate": 0.003,
+      "loss": 4.0018,
+      "step": 23784
+    },
+    {
+      "epoch": 0.23785,
+      "grad_norm": 1.314526106498092,
+      "learning_rate": 0.003,
+      "loss": 3.9781,
+      "step": 23785
+    },
+    {
+      "epoch": 0.23786,
+      "grad_norm": 1.3605124538030828,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 23786
+    },
+    {
+      "epoch": 0.23787,
+      "grad_norm": 1.277338269607527,
+      "learning_rate": 0.003,
+      "loss": 4.0014,
+      "step": 23787
+    },
+    {
+      "epoch": 0.23788,
+      "grad_norm": 1.3186417473270358,
+      "learning_rate": 0.003,
+      "loss": 4.0057,
+      "step": 23788
+    },
+    {
+      "epoch": 0.23789,
+      "grad_norm": 1.252857904957948,
+      "learning_rate": 0.003,
+      "loss": 3.9842,
+      "step": 23789
+    },
+    {
+      "epoch": 0.2379,
+      "grad_norm": 1.3392459726053192,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 23790
+    },
+    {
+      "epoch": 0.23791,
+      "grad_norm": 1.2196677339423951,
+      "learning_rate": 0.003,
+      "loss": 3.976,
+      "step": 23791
+    },
+    {
+      "epoch": 0.23792,
+      "grad_norm": 1.3214836455019192,
+      "learning_rate": 0.003,
+      "loss": 3.9866,
+      "step": 23792
+    },
+    {
+      "epoch": 0.23793,
+      "grad_norm": 1.101638695861977,
+      "learning_rate": 0.003,
+      "loss": 3.9841,
+      "step": 23793
+    },
+    {
+      "epoch": 0.23794,
+      "grad_norm": 1.3085280467344573,
+      "learning_rate": 0.003,
+      "loss": 4.0224,
+      "step": 23794
+    },
+    {
+      "epoch": 0.23795,
+      "grad_norm": 1.2420590053361065,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 23795
+    },
+    {
+      "epoch": 0.23796,
+      "grad_norm": 1.4313400705001962,
+      "learning_rate": 0.003,
+      "loss": 4.0324,
+      "step": 23796
+    },
+    {
+      "epoch": 0.23797,
+      "grad_norm": 1.1850692089426982,
+      "learning_rate": 0.003,
+      "loss": 3.9782,
+      "step": 23797
+    },
+    {
+      "epoch": 0.23798,
+      "grad_norm": 1.40144126498327,
+      "learning_rate": 0.003,
+      "loss": 3.9811,
+      "step": 23798
+    },
+    {
+      "epoch": 0.23799,
+      "grad_norm": 1.1968733292887153,
+      "learning_rate": 0.003,
+      "loss": 4.0188,
+      "step": 23799
+    },
+    {
+      "epoch": 0.238,
+      "grad_norm": 1.4609791527213651,
+      "learning_rate": 0.003,
+      "loss": 3.9927,
+      "step": 23800
+    },
+    {
+      "epoch": 0.23801,
+      "grad_norm": 1.1414475533071524,
+      "learning_rate": 0.003,
+      "loss": 4.0406,
+      "step": 23801
+    },
+    {
+      "epoch": 0.23802,
+      "grad_norm": 1.4831459809631626,
+      "learning_rate": 0.003,
+      "loss": 3.9991,
+      "step": 23802
+    },
+    {
+      "epoch": 0.23803,
+      "grad_norm": 1.0815418519354887,
+      "learning_rate": 0.003,
+      "loss": 4.0104,
+      "step": 23803
+    },
+    {
+      "epoch": 0.23804,
+      "grad_norm": 1.3571836217197124,
+      "learning_rate": 0.003,
+      "loss": 3.9997,
+      "step": 23804
+    },
+    {
+      "epoch": 0.23805,
+      "grad_norm": 1.27555829439947,
+      "learning_rate": 0.003,
+      "loss": 3.9914,
+      "step": 23805
+    },
+    {
+      "epoch": 0.23806,
+      "grad_norm": 1.351248866290867,
+      "learning_rate": 0.003,
+      "loss": 4.0001,
+      "step": 23806
+    },
+    {
+      "epoch": 0.23807,
+      "grad_norm": 1.1260115749814525,
+      "learning_rate": 0.003,
+      "loss": 3.9843,
+      "step": 23807
+    },
+    {
+      "epoch": 0.23808,
+      "grad_norm": 1.4012186727828906,
+      "learning_rate": 0.003,
+      "loss": 3.9842,
+      "step": 23808
+    },
+    {
+      "epoch": 0.23809,
+      "grad_norm": 1.4510202009835547,
+      "learning_rate": 0.003,
+      "loss": 4.0025,
+      "step": 23809
+    },
+    {
+      "epoch": 0.2381,
+      "grad_norm": 1.1529865777466848,
+      "learning_rate": 0.003,
+      "loss": 4.0261,
+      "step": 23810
+    },
+    {
+      "epoch": 0.23811,
+      "grad_norm": 1.2603242770863627,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 23811
+    },
+    {
+      "epoch": 0.23812,
+      "grad_norm": 1.328506365616654,
+      "learning_rate": 0.003,
+      "loss": 4.0109,
+      "step": 23812
+    },
+    {
+      "epoch": 0.23813,
+      "grad_norm": 1.0192859834135166,
+      "learning_rate": 0.003,
+      "loss": 4.0015,
+      "step": 23813
+    },
+    {
+      "epoch": 0.23814,
+      "grad_norm": 1.325337889115548,
+      "learning_rate": 0.003,
+      "loss": 4.0305,
+      "step": 23814
+    },
+    {
+      "epoch": 0.23815,
+      "grad_norm": 1.2490764730702109,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 23815
+    },
+    {
+      "epoch": 0.23816,
+      "grad_norm": 1.2539073206413693,
+      "learning_rate": 0.003,
+      "loss": 3.9896,
+      "step": 23816
+    },
+    {
+      "epoch": 0.23817,
+      "grad_norm": 1.3381144239027118,
+      "learning_rate": 0.003,
+      "loss": 4.0453,
+      "step": 23817
+    },
+    {
+      "epoch": 0.23818,
+      "grad_norm": 1.3448473101598803,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 23818
+    },
+    {
+      "epoch": 0.23819,
+      "grad_norm": 1.2578605176157693,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 23819
+    },
+    {
+      "epoch": 0.2382,
+      "grad_norm": 1.1649514796569875,
+      "learning_rate": 0.003,
+      "loss": 3.9981,
+      "step": 23820
+    },
+    {
+      "epoch": 0.23821,
+      "grad_norm": 1.5040376036500005,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 23821
+    },
+    {
+      "epoch": 0.23822,
+      "grad_norm": 1.1760317793203696,
+      "learning_rate": 0.003,
+      "loss": 4.0167,
+      "step": 23822
+    },
+    {
+      "epoch": 0.23823,
+      "grad_norm": 1.2263343284846993,
+      "learning_rate": 0.003,
+      "loss": 4.0056,
+      "step": 23823
+    },
+    {
+      "epoch": 0.23824,
+      "grad_norm": 1.3416783991077672,
+      "learning_rate": 0.003,
+      "loss": 4.0078,
+      "step": 23824
+    },
+    {
+      "epoch": 0.23825,
+      "grad_norm": 1.2498742733724832,
+      "learning_rate": 0.003,
+      "loss": 3.9894,
+      "step": 23825
+    },
+    {
+      "epoch": 0.23826,
+      "grad_norm": 1.3132751995551477,
+      "learning_rate": 0.003,
+      "loss": 3.9698,
+      "step": 23826
+    },
+    {
+      "epoch": 0.23827,
+      "grad_norm": 1.0603833492101007,
+      "learning_rate": 0.003,
+      "loss": 3.9713,
+      "step": 23827
+    },
+    {
+      "epoch": 0.23828,
+      "grad_norm": 1.3368467897123395,
+      "learning_rate": 0.003,
+      "loss": 3.9851,
+      "step": 23828
+    },
+    {
+      "epoch": 0.23829,
+      "grad_norm": 1.1920495309951704,
+      "learning_rate": 0.003,
+      "loss": 3.9909,
+      "step": 23829
+    },
+    {
+      "epoch": 0.2383,
+      "grad_norm": 1.3626370861187267,
+      "learning_rate": 0.003,
+      "loss": 4.0184,
+      "step": 23830
+    },
+    {
+      "epoch": 0.23831,
+      "grad_norm": 1.1310851161221007,
+      "learning_rate": 0.003,
+      "loss": 3.9875,
+      "step": 23831
+    },
+    {
+      "epoch": 0.23832,
+      "grad_norm": 1.2923988381281586,
+      "learning_rate": 0.003,
+      "loss": 4.0018,
+      "step": 23832
+    },
+    {
+      "epoch": 0.23833,
+      "grad_norm": 1.113407853967652,
+      "learning_rate": 0.003,
+      "loss": 3.9682,
+      "step": 23833
+    },
+    {
+      "epoch": 0.23834,
+      "grad_norm": 1.4814895425810017,
+      "learning_rate": 0.003,
+      "loss": 3.9705,
+      "step": 23834
+    },
+    {
+      "epoch": 0.23835,
+      "grad_norm": 1.151094046034595,
+      "learning_rate": 0.003,
+      "loss": 3.9688,
+      "step": 23835
+    },
+    {
+      "epoch": 0.23836,
+      "grad_norm": 1.4135941367652078,
+      "learning_rate": 0.003,
+      "loss": 4.004,
+      "step": 23836
+    },
+    {
+      "epoch": 0.23837,
+      "grad_norm": 1.157202006793271,
+      "learning_rate": 0.003,
+      "loss": 4.0407,
+      "step": 23837
+    },
+    {
+      "epoch": 0.23838,
+      "grad_norm": 1.4317224366665096,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 23838
+    },
+    {
+      "epoch": 0.23839,
+      "grad_norm": 1.4870878808259236,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 23839
+    },
+    {
+      "epoch": 0.2384,
+      "grad_norm": 1.2826470763376046,
+      "learning_rate": 0.003,
+      "loss": 3.9805,
+      "step": 23840
+    },
+    {
+      "epoch": 0.23841,
+      "grad_norm": 1.4241985823299168,
+      "learning_rate": 0.003,
+      "loss": 4.0037,
+      "step": 23841
+    },
+    {
+      "epoch": 0.23842,
+      "grad_norm": 1.0877134307547314,
+      "learning_rate": 0.003,
+      "loss": 4.0116,
+      "step": 23842
+    },
+    {
+      "epoch": 0.23843,
+      "grad_norm": 1.3016333167457974,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 23843
+    },
+    {
+      "epoch": 0.23844,
+      "grad_norm": 1.140532089073963,
+      "learning_rate": 0.003,
+      "loss": 4.0123,
+      "step": 23844
+    },
+    {
+      "epoch": 0.23845,
+      "grad_norm": 1.3775023028677214,
+      "learning_rate": 0.003,
+      "loss": 4.0134,
+      "step": 23845
+    },
+    {
+      "epoch": 0.23846,
+      "grad_norm": 1.131496417904938,
+      "learning_rate": 0.003,
+      "loss": 3.97,
+      "step": 23846
+    },
+    {
+      "epoch": 0.23847,
+      "grad_norm": 1.6115671439524917,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 23847
+    },
+    {
+      "epoch": 0.23848,
+      "grad_norm": 1.123664547886629,
+      "learning_rate": 0.003,
+      "loss": 3.9776,
+      "step": 23848
+    },
+    {
+      "epoch": 0.23849,
+      "grad_norm": 1.4611085131603094,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 23849
+    },
+    {
+      "epoch": 0.2385,
+      "grad_norm": 1.3154194058745667,
+      "learning_rate": 0.003,
+      "loss": 4.0091,
+      "step": 23850
+    },
+    {
+      "epoch": 0.23851,
+      "grad_norm": 1.3281681927410758,
+      "learning_rate": 0.003,
+      "loss": 4.0218,
+      "step": 23851
+    },
+    {
+      "epoch": 0.23852,
+      "grad_norm": 1.301941215584366,
+      "learning_rate": 0.003,
+      "loss": 4.0042,
+      "step": 23852
+    },
+    {
+      "epoch": 0.23853,
+      "grad_norm": 1.401631737130536,
+      "learning_rate": 0.003,
+      "loss": 4.0188,
+      "step": 23853
+    },
+    {
+      "epoch": 0.23854,
+      "grad_norm": 1.2859481634637935,
+      "learning_rate": 0.003,
+      "loss": 3.9958,
+      "step": 23854
+    },
+    {
+      "epoch": 0.23855,
+      "grad_norm": 1.2972695819812963,
+      "learning_rate": 0.003,
+      "loss": 3.9915,
+      "step": 23855
+    },
+    {
+      "epoch": 0.23856,
+      "grad_norm": 1.3831087478087878,
+      "learning_rate": 0.003,
+      "loss": 3.9801,
+      "step": 23856
+    },
+    {
+      "epoch": 0.23857,
+      "grad_norm": 1.1279041536055205,
+      "learning_rate": 0.003,
+      "loss": 4.0303,
+      "step": 23857
+    },
+    {
+      "epoch": 0.23858,
+      "grad_norm": 1.4418041690632963,
+      "learning_rate": 0.003,
+      "loss": 4.0501,
+      "step": 23858
+    },
+    {
+      "epoch": 0.23859,
+      "grad_norm": 1.213154948682009,
+      "learning_rate": 0.003,
+      "loss": 3.9899,
+      "step": 23859
+    },
+    {
+      "epoch": 0.2386,
+      "grad_norm": 1.7030202815124584,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 23860
+    },
+    {
+      "epoch": 0.23861,
+      "grad_norm": 1.0461809835461595,
+      "learning_rate": 0.003,
+      "loss": 3.9696,
+      "step": 23861
+    },
+    {
+      "epoch": 0.23862,
+      "grad_norm": 1.4240732866021655,
+      "learning_rate": 0.003,
+      "loss": 3.9855,
+      "step": 23862
+    },
+    {
+      "epoch": 0.23863,
+      "grad_norm": 1.174524742854935,
+      "learning_rate": 0.003,
+      "loss": 4.0107,
+      "step": 23863
+    },
+    {
+      "epoch": 0.23864,
+      "grad_norm": 1.3070301014979582,
+      "learning_rate": 0.003,
+      "loss": 3.9934,
+      "step": 23864
+    },
+    {
+      "epoch": 0.23865,
+      "grad_norm": 1.238959584480321,
+      "learning_rate": 0.003,
+      "loss": 4.0,
+      "step": 23865
+    },
+    {
+      "epoch": 0.23866,
+      "grad_norm": 1.2630747651768004,
+      "learning_rate": 0.003,
+      "loss": 3.9975,
+      "step": 23866
+    },
+    {
+      "epoch": 0.23867,
+      "grad_norm": 1.4607220124638765,
+      "learning_rate": 0.003,
+      "loss": 4.0308,
+      "step": 23867
+    },
+    {
+      "epoch": 0.23868,
+      "grad_norm": 1.1840524053316157,
+      "learning_rate": 0.003,
+      "loss": 3.9939,
+      "step": 23868
+    },
+    {
+      "epoch": 0.23869,
+      "grad_norm": 1.0867469995197978,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 23869
+    },
+    {
+      "epoch": 0.2387,
+      "grad_norm": 1.4998824232891295,
+      "learning_rate": 0.003,
+      "loss": 4.0391,
+      "step": 23870
+    },
+    {
+      "epoch": 0.23871,
+      "grad_norm": 1.066016113290617,
+      "learning_rate": 0.003,
+      "loss": 3.9882,
+      "step": 23871
+    },
+    {
+      "epoch": 0.23872,
+      "grad_norm": 1.39084873936716,
+      "learning_rate": 0.003,
+      "loss": 4.0112,
+      "step": 23872
+    },
+    {
+      "epoch": 0.23873,
+      "grad_norm": 1.0379607300317715,
+      "learning_rate": 0.003,
+      "loss": 3.9871,
+      "step": 23873
+    },
+    {
+      "epoch": 0.23874,
+      "grad_norm": 1.4718968884960668,
+      "learning_rate": 0.003,
+      "loss": 4.0018,
+      "step": 23874
+    },
+    {
+      "epoch": 0.23875,
+      "grad_norm": 1.0389726641116324,
+      "learning_rate": 0.003,
+      "loss": 3.9965,
+      "step": 23875
+    },
+    {
+      "epoch": 0.23876,
+      "grad_norm": 1.5777513160286392,
+      "learning_rate": 0.003,
+      "loss": 3.9945,
+      "step": 23876
+    },
+    {
+      "epoch": 0.23877,
+      "grad_norm": 1.1556996821210561,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 23877
+    },
+    {
+      "epoch": 0.23878,
+      "grad_norm": 1.515723548844844,
+      "learning_rate": 0.003,
+      "loss": 3.9858,
+      "step": 23878
+    },
+    {
+      "epoch": 0.23879,
+      "grad_norm": 1.2001528976562297,
+      "learning_rate": 0.003,
+      "loss": 3.9944,
+      "step": 23879
+    },
+    {
+      "epoch": 0.2388,
+      "grad_norm": 1.527347676431328,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 23880
+    },
+    {
+      "epoch": 0.23881,
+      "grad_norm": 1.0106354289705037,
+      "learning_rate": 0.003,
+      "loss": 4.0151,
+      "step": 23881
+    },
+    {
+      "epoch": 0.23882,
+      "grad_norm": 1.4082189034418586,
+      "learning_rate": 0.003,
+      "loss": 3.9958,
+      "step": 23882
+    },
+    {
+      "epoch": 0.23883,
+      "grad_norm": 1.5239718264509141,
+      "learning_rate": 0.003,
+      "loss": 4.0436,
+      "step": 23883
+    },
+    {
+      "epoch": 0.23884,
+      "grad_norm": 0.8896191817719431,
+      "learning_rate": 0.003,
+      "loss": 4.0015,
+      "step": 23884
+    },
+    {
+      "epoch": 0.23885,
+      "grad_norm": 1.2191294923085603,
+      "learning_rate": 0.003,
+      "loss": 4.0177,
+      "step": 23885
+    },
+    {
+      "epoch": 0.23886,
+      "grad_norm": 1.5141094092348555,
+      "learning_rate": 0.003,
+      "loss": 3.9854,
+      "step": 23886
+    },
+    {
+      "epoch": 0.23887,
+      "grad_norm": 1.15537871684465,
+      "learning_rate": 0.003,
+      "loss": 4.0028,
+      "step": 23887
+    },
+    {
+      "epoch": 0.23888,
+      "grad_norm": 1.367690513138021,
+      "learning_rate": 0.003,
+      "loss": 3.9905,
+      "step": 23888
+    },
+    {
+      "epoch": 0.23889,
+      "grad_norm": 1.2350965745923215,
+      "learning_rate": 0.003,
+      "loss": 3.9814,
+      "step": 23889
+    },
+    {
+      "epoch": 0.2389,
+      "grad_norm": 1.4464903065036159,
+      "learning_rate": 0.003,
+      "loss": 4.0308,
+      "step": 23890
+    },
+    {
+      "epoch": 0.23891,
+      "grad_norm": 1.1464474657049788,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 23891
+    },
+    {
+      "epoch": 0.23892,
+      "grad_norm": 1.3634864234187367,
+      "learning_rate": 0.003,
+      "loss": 3.9856,
+      "step": 23892
+    },
+    {
+      "epoch": 0.23893,
+      "grad_norm": 1.1673603595499331,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 23893
+    },
+    {
+      "epoch": 0.23894,
+      "grad_norm": 1.345129258240151,
+      "learning_rate": 0.003,
+      "loss": 4.0362,
+      "step": 23894
+    },
+    {
+      "epoch": 0.23895,
+      "grad_norm": 1.273151923861541,
+      "learning_rate": 0.003,
+      "loss": 4.0068,
+      "step": 23895
+    },
+    {
+      "epoch": 0.23896,
+      "grad_norm": 1.4006945043884886,
+      "learning_rate": 0.003,
+      "loss": 3.9888,
+      "step": 23896
+    },
+    {
+      "epoch": 0.23897,
+      "grad_norm": 1.2097991025461234,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 23897
+    },
+    {
+      "epoch": 0.23898,
+      "grad_norm": 1.5096343896822244,
+      "learning_rate": 0.003,
+      "loss": 4.021,
+      "step": 23898
+    },
+    {
+      "epoch": 0.23899,
+      "grad_norm": 1.2890645356872177,
+      "learning_rate": 0.003,
+      "loss": 3.9817,
+      "step": 23899
+    },
+    {
+      "epoch": 0.239,
+      "grad_norm": 1.1509892348615256,
+      "learning_rate": 0.003,
+      "loss": 3.9849,
+      "step": 23900
+    },
+    {
+      "epoch": 0.23901,
+      "grad_norm": 1.171120047182133,
+      "learning_rate": 0.003,
+      "loss": 4.0042,
+      "step": 23901
+    },
+    {
+      "epoch": 0.23902,
+      "grad_norm": 1.4772866293166056,
+      "learning_rate": 0.003,
+      "loss": 4.0051,
+      "step": 23902
+    },
+    {
+      "epoch": 0.23903,
+      "grad_norm": 0.9549553174445348,
+      "learning_rate": 0.003,
+      "loss": 4.0247,
+      "step": 23903
+    },
+    {
+      "epoch": 0.23904,
+      "grad_norm": 1.4397665887359545,
+      "learning_rate": 0.003,
+      "loss": 4.0022,
+      "step": 23904
+    },
+    {
+      "epoch": 0.23905,
+      "grad_norm": 0.9744736837111275,
+      "learning_rate": 0.003,
+      "loss": 3.9884,
+      "step": 23905
+    },
+    {
+      "epoch": 0.23906,
+      "grad_norm": 1.6810590319267402,
+      "learning_rate": 0.003,
+      "loss": 4.0099,
+      "step": 23906
+    },
+    {
+      "epoch": 0.23907,
+      "grad_norm": 0.9622346894238222,
+      "learning_rate": 0.003,
+      "loss": 4.0069,
+      "step": 23907
+    },
+    {
+      "epoch": 0.23908,
+      "grad_norm": 1.272078001826868,
+      "learning_rate": 0.003,
+      "loss": 4.0126,
+      "step": 23908
+    },
+    {
+      "epoch": 0.23909,
+      "grad_norm": 1.2440297524192536,
+      "learning_rate": 0.003,
+      "loss": 4.0036,
+      "step": 23909
+    },
+    {
+      "epoch": 0.2391,
+      "grad_norm": 1.2734208817635764,
+      "learning_rate": 0.003,
+      "loss": 4.0164,
+      "step": 23910
+    },
+    {
+      "epoch": 0.23911,
+      "grad_norm": 1.299467515547751,
+      "learning_rate": 0.003,
+      "loss": 4.0061,
+      "step": 23911
+    },
+    {
+      "epoch": 0.23912,
+      "grad_norm": 1.3683288970141578,
+      "learning_rate": 0.003,
+      "loss": 4.0104,
+      "step": 23912
+    },
+    {
+      "epoch": 0.23913,
+      "grad_norm": 1.203087997487004,
+      "learning_rate": 0.003,
+      "loss": 3.9921,
+      "step": 23913
+    },
+    {
+      "epoch": 0.23914,
+      "grad_norm": 1.3082641053335504,
+      "learning_rate": 0.003,
+      "loss": 4.0154,
+      "step": 23914
+    },
+    {
+      "epoch": 0.23915,
+      "grad_norm": 1.2755705910527568,
+      "learning_rate": 0.003,
+      "loss": 3.9956,
+      "step": 23915
+    },
+    {
+      "epoch": 0.23916,
+      "grad_norm": 1.2592440803859928,
+      "learning_rate": 0.003,
+      "loss": 3.9983,
+      "step": 23916
+    },
+    {
+      "epoch": 0.23917,
+      "grad_norm": 1.2051272002898448,
+      "learning_rate": 0.003,
+      "loss": 3.9799,
+      "step": 23917
+    },
+    {
+      "epoch": 0.23918,
+      "grad_norm": 1.4684475509030477,
+      "learning_rate": 0.003,
+      "loss": 3.9863,
+      "step": 23918
+    },
+    {
+      "epoch": 0.23919,
+      "grad_norm": 1.158159387729342,
+      "learning_rate": 0.003,
+      "loss": 4.0393,
+      "step": 23919
+    },
+    {
+      "epoch": 0.2392,
+      "grad_norm": 1.2928757310614192,
+      "learning_rate": 0.003,
+      "loss": 3.9943,
+      "step": 23920
+    },
+    {
+      "epoch": 0.23921,
+      "grad_norm": 1.3487339846232758,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 23921
+    },
+    {
+      "epoch": 0.23922,
+      "grad_norm": 1.1152181359956383,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 23922
+    },
+    {
+      "epoch": 0.23923,
+      "grad_norm": 1.3822092918070903,
+      "learning_rate": 0.003,
+      "loss": 3.9746,
+      "step": 23923
+    },
+    {
+      "epoch": 0.23924,
+      "grad_norm": 1.2155026604463237,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 23924
+    },
+    {
+      "epoch": 0.23925,
+      "grad_norm": 1.3781400389425287,
+      "learning_rate": 0.003,
+      "loss": 4.0013,
+      "step": 23925
+    },
+    {
+      "epoch": 0.23926,
+      "grad_norm": 0.8928675811032448,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 23926
+    },
+    {
+      "epoch": 0.23927,
+      "grad_norm": 1.315221141078099,
+      "learning_rate": 0.003,
+      "loss": 4.0116,
+      "step": 23927
+    },
+    {
+      "epoch": 0.23928,
+      "grad_norm": 1.4569815097032202,
+      "learning_rate": 0.003,
+      "loss": 4.0036,
+      "step": 23928
+    },
+    {
+      "epoch": 0.23929,
+      "grad_norm": 1.3006694431427306,
+      "learning_rate": 0.003,
+      "loss": 4.0096,
+      "step": 23929
+    },
+    {
+      "epoch": 0.2393,
+      "grad_norm": 1.5167617603477914,
+      "learning_rate": 0.003,
+      "loss": 3.9862,
+      "step": 23930
+    },
+    {
+      "epoch": 0.23931,
+      "grad_norm": 1.150680716148014,
+      "learning_rate": 0.003,
+      "loss": 4.0358,
+      "step": 23931
+    },
+    {
+      "epoch": 0.23932,
+      "grad_norm": 1.124747299027125,
+      "learning_rate": 0.003,
+      "loss": 3.9988,
+      "step": 23932
+    },
+    {
+      "epoch": 0.23933,
+      "grad_norm": 1.4823379283199323,
+      "learning_rate": 0.003,
+      "loss": 3.9895,
+      "step": 23933
+    },
+    {
+      "epoch": 0.23934,
+      "grad_norm": 1.152260022002289,
+      "learning_rate": 0.003,
+      "loss": 3.9756,
+      "step": 23934
+    },
+    {
+      "epoch": 0.23935,
+      "grad_norm": 1.3691934667563777,
+      "learning_rate": 0.003,
+      "loss": 4.0029,
+      "step": 23935
+    },
+    {
+      "epoch": 0.23936,
+      "grad_norm": 1.1202925120696428,
+      "learning_rate": 0.003,
+      "loss": 3.9925,
+      "step": 23936
+    },
+    {
+      "epoch": 0.23937,
+      "grad_norm": 1.4256944946282164,
+      "learning_rate": 0.003,
+      "loss": 3.9901,
+      "step": 23937
+    },
+    {
+      "epoch": 0.23938,
+      "grad_norm": 1.1593995120098435,
+      "learning_rate": 0.003,
+      "loss": 4.026,
+      "step": 23938
+    },
+    {
+      "epoch": 0.23939,
+      "grad_norm": 1.5364176508443157,
+      "learning_rate": 0.003,
+      "loss": 3.9915,
+      "step": 23939
+    },
+    {
+      "epoch": 0.2394,
+      "grad_norm": 0.9833085886885832,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 23940
+    },
+    {
+      "epoch": 0.23941,
+      "grad_norm": 1.5672839691208282,
+      "learning_rate": 0.003,
+      "loss": 3.9733,
+      "step": 23941
+    },
+    {
+      "epoch": 0.23942,
+      "grad_norm": 1.1404438177104954,
+      "learning_rate": 0.003,
+      "loss": 4.0048,
+      "step": 23942
+    },
+    {
+      "epoch": 0.23943,
+      "grad_norm": 1.3999286059332403,
+      "learning_rate": 0.003,
+      "loss": 3.9783,
+      "step": 23943
+    },
+    {
+      "epoch": 0.23944,
+      "grad_norm": 1.2425584532092881,
+      "learning_rate": 0.003,
+      "loss": 3.9953,
+      "step": 23944
+    },
+    {
+      "epoch": 0.23945,
+      "grad_norm": 1.3234960774267563,
+      "learning_rate": 0.003,
+      "loss": 3.989,
+      "step": 23945
+    },
+    {
+      "epoch": 0.23946,
+      "grad_norm": 1.1773838726906851,
+      "learning_rate": 0.003,
+      "loss": 3.9773,
+      "step": 23946
+    },
+    {
+      "epoch": 0.23947,
+      "grad_norm": 1.2863264109252082,
+      "learning_rate": 0.003,
+      "loss": 4.0085,
+      "step": 23947
+    },
+    {
+      "epoch": 0.23948,
+      "grad_norm": 1.317683005253722,
+      "learning_rate": 0.003,
+      "loss": 3.9993,
+      "step": 23948
+    },
+    {
+      "epoch": 0.23949,
+      "grad_norm": 1.2563552974087808,
+      "learning_rate": 0.003,
+      "loss": 3.9974,
+      "step": 23949
+    },
+    {
+      "epoch": 0.2395,
+      "grad_norm": 1.213902341051248,
+      "learning_rate": 0.003,
+      "loss": 4.0107,
+      "step": 23950
+    },
+    {
+      "epoch": 0.23951,
+      "grad_norm": 1.488147202820488,
+      "learning_rate": 0.003,
+      "loss": 4.0263,
+      "step": 23951
+    },
+    {
+      "epoch": 0.23952,
+      "grad_norm": 1.2047496609978003,
+      "learning_rate": 0.003,
+      "loss": 3.9817,
+      "step": 23952
+    },
+    {
+      "epoch": 0.23953,
+      "grad_norm": 1.4240211637591471,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 23953
+    },
+    {
+      "epoch": 0.23954,
+      "grad_norm": 0.9789773746991961,
+      "learning_rate": 0.003,
+      "loss": 3.9921,
+      "step": 23954
+    },
+    {
+      "epoch": 0.23955,
+      "grad_norm": 1.326832044035758,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 23955
+    },
+    {
+      "epoch": 0.23956,
+      "grad_norm": 1.2282685365484372,
+      "learning_rate": 0.003,
+      "loss": 4.0265,
+      "step": 23956
+    },
+    {
+      "epoch": 0.23957,
+      "grad_norm": 1.2717312134754588,
+      "learning_rate": 0.003,
+      "loss": 3.9794,
+      "step": 23957
+    },
+    {
+      "epoch": 0.23958,
+      "grad_norm": 1.0937130777036899,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 23958
+    },
+    {
+      "epoch": 0.23959,
+      "grad_norm": 1.6099380959637635,
+      "learning_rate": 0.003,
+      "loss": 4.0184,
+      "step": 23959
+    },
+    {
+      "epoch": 0.2396,
+      "grad_norm": 1.069722450329914,
+      "learning_rate": 0.003,
+      "loss": 3.9929,
+      "step": 23960
+    },
+    {
+      "epoch": 0.23961,
+      "grad_norm": 1.4457576413937474,
+      "learning_rate": 0.003,
+      "loss": 3.9817,
+      "step": 23961
+    },
+    {
+      "epoch": 0.23962,
+      "grad_norm": 1.2681740538396482,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 23962
+    },
+    {
+      "epoch": 0.23963,
+      "grad_norm": 1.4328916468534574,
+      "learning_rate": 0.003,
+      "loss": 3.9685,
+      "step": 23963
+    },
+    {
+      "epoch": 0.23964,
+      "grad_norm": 1.2882102444285062,
+      "learning_rate": 0.003,
+      "loss": 4.0036,
+      "step": 23964
+    },
+    {
+      "epoch": 0.23965,
+      "grad_norm": 1.2304342250956426,
+      "learning_rate": 0.003,
+      "loss": 3.9937,
+      "step": 23965
+    },
+    {
+      "epoch": 0.23966,
+      "grad_norm": 1.204336053986734,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 23966
+    },
+    {
+      "epoch": 0.23967,
+      "grad_norm": 1.3788907009040465,
+      "learning_rate": 0.003,
+      "loss": 4.0162,
+      "step": 23967
+    },
+    {
+      "epoch": 0.23968,
+      "grad_norm": 1.158113033901222,
+      "learning_rate": 0.003,
+      "loss": 4.0311,
+      "step": 23968
+    },
+    {
+      "epoch": 0.23969,
+      "grad_norm": 1.475461625447766,
+      "learning_rate": 0.003,
+      "loss": 4.0074,
+      "step": 23969
+    },
+    {
+      "epoch": 0.2397,
+      "grad_norm": 1.060934044971538,
+      "learning_rate": 0.003,
+      "loss": 3.9895,
+      "step": 23970
+    },
+    {
+      "epoch": 0.23971,
+      "grad_norm": 1.3371869632867168,
+      "learning_rate": 0.003,
+      "loss": 4.0251,
+      "step": 23971
+    },
+    {
+      "epoch": 0.23972,
+      "grad_norm": 1.1075098247951936,
+      "learning_rate": 0.003,
+      "loss": 3.9856,
+      "step": 23972
+    },
+    {
+      "epoch": 0.23973,
+      "grad_norm": 1.418151835014132,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 23973
+    },
+    {
+      "epoch": 0.23974,
+      "grad_norm": 1.466181059086056,
+      "learning_rate": 0.003,
+      "loss": 4.0206,
+      "step": 23974
+    },
+    {
+      "epoch": 0.23975,
+      "grad_norm": 1.1548714414912111,
+      "learning_rate": 0.003,
+      "loss": 3.9906,
+      "step": 23975
+    },
+    {
+      "epoch": 0.23976,
+      "grad_norm": 1.524954327626514,
+      "learning_rate": 0.003,
+      "loss": 4.0237,
+      "step": 23976
+    },
+    {
+      "epoch": 0.23977,
+      "grad_norm": 1.1120589293214385,
+      "learning_rate": 0.003,
+      "loss": 3.9975,
+      "step": 23977
+    },
+    {
+      "epoch": 0.23978,
+      "grad_norm": 1.4321580706059025,
+      "learning_rate": 0.003,
+      "loss": 3.9781,
+      "step": 23978
+    },
+    {
+      "epoch": 0.23979,
+      "grad_norm": 1.1827882570221855,
+      "learning_rate": 0.003,
+      "loss": 4.0149,
+      "step": 23979
+    },
+    {
+      "epoch": 0.2398,
+      "grad_norm": 1.2889633422278628,
+      "learning_rate": 0.003,
+      "loss": 3.9929,
+      "step": 23980
+    },
+    {
+      "epoch": 0.23981,
+      "grad_norm": 1.5021114452323376,
+      "learning_rate": 0.003,
+      "loss": 4.0394,
+      "step": 23981
+    },
+    {
+      "epoch": 0.23982,
+      "grad_norm": 1.0763779100996955,
+      "learning_rate": 0.003,
+      "loss": 3.9951,
+      "step": 23982
+    },
+    {
+      "epoch": 0.23983,
+      "grad_norm": 1.4117735538153608,
+      "learning_rate": 0.003,
+      "loss": 3.9944,
+      "step": 23983
+    },
+    {
+      "epoch": 0.23984,
+      "grad_norm": 1.0315469512777935,
+      "learning_rate": 0.003,
+      "loss": 4.0052,
+      "step": 23984
+    },
+    {
+      "epoch": 0.23985,
+      "grad_norm": 1.5275055099519483,
+      "learning_rate": 0.003,
+      "loss": 3.9834,
+      "step": 23985
+    },
+    {
+      "epoch": 0.23986,
+      "grad_norm": 1.1757729049989296,
+      "learning_rate": 0.003,
+      "loss": 3.9857,
+      "step": 23986
+    },
+    {
+      "epoch": 0.23987,
+      "grad_norm": 1.5326669153698,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 23987
+    },
+    {
+      "epoch": 0.23988,
+      "grad_norm": 1.2261586608934456,
+      "learning_rate": 0.003,
+      "loss": 4.0194,
+      "step": 23988
+    },
+    {
+      "epoch": 0.23989,
+      "grad_norm": 1.2491512705001777,
+      "learning_rate": 0.003,
+      "loss": 3.9782,
+      "step": 23989
+    },
+    {
+      "epoch": 0.2399,
+      "grad_norm": 1.3167256713972384,
+      "learning_rate": 0.003,
+      "loss": 4.0342,
+      "step": 23990
+    },
+    {
+      "epoch": 0.23991,
+      "grad_norm": 1.4125871573376303,
+      "learning_rate": 0.003,
+      "loss": 3.9964,
+      "step": 23991
+    },
+    {
+      "epoch": 0.23992,
+      "grad_norm": 1.2911632776782254,
+      "learning_rate": 0.003,
+      "loss": 3.9933,
+      "step": 23992
+    },
+    {
+      "epoch": 0.23993,
+      "grad_norm": 1.4360137360743892,
+      "learning_rate": 0.003,
+      "loss": 3.995,
+      "step": 23993
+    },
+    {
+      "epoch": 0.23994,
+      "grad_norm": 1.0703050713005995,
+      "learning_rate": 0.003,
+      "loss": 4.0048,
+      "step": 23994
+    },
+    {
+      "epoch": 0.23995,
+      "grad_norm": 1.3868102205855282,
+      "learning_rate": 0.003,
+      "loss": 3.9908,
+      "step": 23995
+    },
+    {
+      "epoch": 0.23996,
+      "grad_norm": 1.1623924419835336,
+      "learning_rate": 0.003,
+      "loss": 4.0024,
+      "step": 23996
+    },
+    {
+      "epoch": 0.23997,
+      "grad_norm": 1.3520291821715311,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 23997
+    },
+    {
+      "epoch": 0.23998,
+      "grad_norm": 1.2270730872947457,
+      "learning_rate": 0.003,
+      "loss": 4.0,
+      "step": 23998
+    },
+    {
+      "epoch": 0.23999,
+      "grad_norm": 1.1601135126069164,
+      "learning_rate": 0.003,
+      "loss": 3.9806,
+      "step": 23999
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 1.227205962813098,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 24000
+    },
+    {
+      "epoch": 0.24001,
+      "grad_norm": 1.373279078711094,
+      "learning_rate": 0.003,
+      "loss": 3.9908,
+      "step": 24001
+    },
+    {
+      "epoch": 0.24002,
+      "grad_norm": 1.4199869996328591,
+      "learning_rate": 0.003,
+      "loss": 3.996,
+      "step": 24002
+    },
+    {
+      "epoch": 0.24003,
+      "grad_norm": 1.2805094004626818,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 24003
+    },
+    {
+      "epoch": 0.24004,
+      "grad_norm": 1.1390534981562308,
+      "learning_rate": 0.003,
+      "loss": 4.0077,
+      "step": 24004
+    },
+    {
+      "epoch": 0.24005,
+      "grad_norm": 1.3204221376350147,
+      "learning_rate": 0.003,
+      "loss": 3.9881,
+      "step": 24005
+    },
+    {
+      "epoch": 0.24006,
+      "grad_norm": 1.2811128557714402,
+      "learning_rate": 0.003,
+      "loss": 3.9929,
+      "step": 24006
+    },
+    {
+      "epoch": 0.24007,
+      "grad_norm": 1.225952474232812,
+      "learning_rate": 0.003,
+      "loss": 3.9833,
+      "step": 24007
+    },
+    {
+      "epoch": 0.24008,
+      "grad_norm": 1.1516027233162458,
+      "learning_rate": 0.003,
+      "loss": 4.0097,
+      "step": 24008
+    },
+    {
+      "epoch": 0.24009,
+      "grad_norm": 1.3091187124576122,
+      "learning_rate": 0.003,
+      "loss": 4.0144,
+      "step": 24009
+    },
+    {
+      "epoch": 0.2401,
+      "grad_norm": 1.4427300998238237,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 24010
+    },
+    {
+      "epoch": 0.24011,
+      "grad_norm": 1.1125196878213897,
+      "learning_rate": 0.003,
+      "loss": 3.986,
+      "step": 24011
+    },
+    {
+      "epoch": 0.24012,
+      "grad_norm": 1.4809070897712622,
+      "learning_rate": 0.003,
+      "loss": 3.9982,
+      "step": 24012
+    },
+    {
+      "epoch": 0.24013,
+      "grad_norm": 1.225594425916136,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 24013
+    },
+    {
+      "epoch": 0.24014,
+      "grad_norm": 1.2225020685191963,
+      "learning_rate": 0.003,
+      "loss": 3.9951,
+      "step": 24014
+    },
+    {
+      "epoch": 0.24015,
+      "grad_norm": 1.4658939969389877,
+      "learning_rate": 0.003,
+      "loss": 3.9779,
+      "step": 24015
+    },
+    {
+      "epoch": 0.24016,
+      "grad_norm": 1.0796989528461853,
+      "learning_rate": 0.003,
+      "loss": 3.9966,
+      "step": 24016
+    },
+    {
+      "epoch": 0.24017,
+      "grad_norm": 1.4716962252925458,
+      "learning_rate": 0.003,
+      "loss": 4.0023,
+      "step": 24017
+    },
+    {
+      "epoch": 0.24018,
+      "grad_norm": 0.9451107757569338,
+      "learning_rate": 0.003,
+      "loss": 3.9978,
+      "step": 24018
+    },
+    {
+      "epoch": 0.24019,
+      "grad_norm": 1.3681596536733631,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 24019
+    },
+    {
+      "epoch": 0.2402,
+      "grad_norm": 1.2187238988502807,
+      "learning_rate": 0.003,
+      "loss": 4.0096,
+      "step": 24020
+    },
+    {
+      "epoch": 0.24021,
+      "grad_norm": 1.1166329698351354,
+      "learning_rate": 0.003,
+      "loss": 3.9754,
+      "step": 24021
+    },
+    {
+      "epoch": 0.24022,
+      "grad_norm": 1.574887725487201,
+      "learning_rate": 0.003,
+      "loss": 4.0278,
+      "step": 24022
+    },
+    {
+      "epoch": 0.24023,
+      "grad_norm": 1.4034913898125905,
+      "learning_rate": 0.003,
+      "loss": 3.9925,
+      "step": 24023
+    },
+    {
+      "epoch": 0.24024,
+      "grad_norm": 1.2509228534941288,
+      "learning_rate": 0.003,
+      "loss": 3.9947,
+      "step": 24024
+    },
+    {
+      "epoch": 0.24025,
+      "grad_norm": 1.2791921199356213,
+      "learning_rate": 0.003,
+      "loss": 3.997,
+      "step": 24025
+    },
+    {
+      "epoch": 0.24026,
+      "grad_norm": 1.1227471905407065,
+      "learning_rate": 0.003,
+      "loss": 3.9945,
+      "step": 24026
+    },
+    {
+      "epoch": 0.24027,
+      "grad_norm": 1.3296418609760456,
+      "learning_rate": 0.003,
+      "loss": 4.0308,
+      "step": 24027
+    },
+    {
+      "epoch": 0.24028,
+      "grad_norm": 1.1535135622862795,
+      "learning_rate": 0.003,
+      "loss": 4.0267,
+      "step": 24028
+    },
+    {
+      "epoch": 0.24029,
+      "grad_norm": 1.4281644818489123,
+      "learning_rate": 0.003,
+      "loss": 3.9829,
+      "step": 24029
+    },
+    {
+      "epoch": 0.2403,
+      "grad_norm": 0.9601332853349601,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 24030
+    },
+    {
+      "epoch": 0.24031,
+      "grad_norm": 1.45713534724327,
+      "learning_rate": 0.003,
+      "loss": 4.0042,
+      "step": 24031
+    },
+    {
+      "epoch": 0.24032,
+      "grad_norm": 1.1085066395342955,
+      "learning_rate": 0.003,
+      "loss": 3.9579,
+      "step": 24032
+    },
+    {
+      "epoch": 0.24033,
+      "grad_norm": 1.3824764007252042,
+      "learning_rate": 0.003,
+      "loss": 4.0116,
+      "step": 24033
+    },
+    {
+      "epoch": 0.24034,
+      "grad_norm": 1.0997260948512073,
+      "learning_rate": 0.003,
+      "loss": 4.0321,
+      "step": 24034
+    },
+    {
+      "epoch": 0.24035,
+      "grad_norm": 1.1742490503369791,
+      "learning_rate": 0.003,
+      "loss": 4.006,
+      "step": 24035
+    },
+    {
+      "epoch": 0.24036,
+      "grad_norm": 1.2903858828049104,
+      "learning_rate": 0.003,
+      "loss": 3.9997,
+      "step": 24036
+    },
+    {
+      "epoch": 0.24037,
+      "grad_norm": 1.3375004322493362,
+      "learning_rate": 0.003,
+      "loss": 3.9952,
+      "step": 24037
+    },
+    {
+      "epoch": 0.24038,
+      "grad_norm": 1.0998417363358672,
+      "learning_rate": 0.003,
+      "loss": 3.9899,
+      "step": 24038
+    },
+    {
+      "epoch": 0.24039,
+      "grad_norm": 1.459071663572048,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 24039
+    },
+    {
+      "epoch": 0.2404,
+      "grad_norm": 1.137862987349615,
+      "learning_rate": 0.003,
+      "loss": 4.0059,
+      "step": 24040
+    },
+    {
+      "epoch": 0.24041,
+      "grad_norm": 1.547128757752543,
+      "learning_rate": 0.003,
+      "loss": 3.9969,
+      "step": 24041
+    },
+    {
+      "epoch": 0.24042,
+      "grad_norm": 1.4391366974989743,
+      "learning_rate": 0.003,
+      "loss": 3.9984,
+      "step": 24042
+    },
+    {
+      "epoch": 0.24043,
+      "grad_norm": 1.160535287304703,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 24043
+    },
+    {
+      "epoch": 0.24044,
+      "grad_norm": 1.2578504957230645,
+      "learning_rate": 0.003,
+      "loss": 3.9872,
+      "step": 24044
+    },
+    {
+      "epoch": 0.24045,
+      "grad_norm": 1.36257593936573,
+      "learning_rate": 0.003,
+      "loss": 4.0033,
+      "step": 24045
+    },
+    {
+      "epoch": 0.24046,
+      "grad_norm": 1.2194187861213934,
+      "learning_rate": 0.003,
+      "loss": 3.9958,
+      "step": 24046
+    },
+    {
+      "epoch": 0.24047,
+      "grad_norm": 1.2203177585425915,
+      "learning_rate": 0.003,
+      "loss": 4.0162,
+      "step": 24047
+    },
+    {
+      "epoch": 0.24048,
+      "grad_norm": 1.2431590793981622,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 24048
+    },
+    {
+      "epoch": 0.24049,
+      "grad_norm": 1.2944551962915465,
+      "learning_rate": 0.003,
+      "loss": 4.0016,
+      "step": 24049
+    },
+    {
+      "epoch": 0.2405,
+      "grad_norm": 1.2444253234654583,
+      "learning_rate": 0.003,
+      "loss": 3.9984,
+      "step": 24050
+    },
+    {
+      "epoch": 0.24051,
+      "grad_norm": 1.4102845031437927,
+      "learning_rate": 0.003,
+      "loss": 4.0096,
+      "step": 24051
+    },
+    {
+      "epoch": 0.24052,
+      "grad_norm": 1.2671622697161102,
+      "learning_rate": 0.003,
+      "loss": 4.0013,
+      "step": 24052
+    },
+    {
+      "epoch": 0.24053,
+      "grad_norm": 1.3713149595813448,
+      "learning_rate": 0.003,
+      "loss": 4.0024,
+      "step": 24053
+    },
+    {
+      "epoch": 0.24054,
+      "grad_norm": 1.2011199579132417,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 24054
+    },
+    {
+      "epoch": 0.24055,
+      "grad_norm": 1.2490699334262272,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 24055
+    },
+    {
+      "epoch": 0.24056,
+      "grad_norm": 1.411567607677573,
+      "learning_rate": 0.003,
+      "loss": 3.9879,
+      "step": 24056
+    },
+    {
+      "epoch": 0.24057,
+      "grad_norm": 1.0637641102897148,
+      "learning_rate": 0.003,
+      "loss": 3.9696,
+      "step": 24057
+    },
+    {
+      "epoch": 0.24058,
+      "grad_norm": 1.3639425111998302,
+      "learning_rate": 0.003,
+      "loss": 3.9891,
+      "step": 24058
+    },
+    {
+      "epoch": 0.24059,
+      "grad_norm": 1.187441418554796,
+      "learning_rate": 0.003,
+      "loss": 4.0155,
+      "step": 24059
+    },
+    {
+      "epoch": 0.2406,
+      "grad_norm": 1.246093063071109,
+      "learning_rate": 0.003,
+      "loss": 4.0033,
+      "step": 24060
+    },
+    {
+      "epoch": 0.24061,
+      "grad_norm": 1.3240345214008316,
+      "learning_rate": 0.003,
+      "loss": 3.998,
+      "step": 24061
+    },
+    {
+      "epoch": 0.24062,
+      "grad_norm": 1.1774354420619235,
+      "learning_rate": 0.003,
+      "loss": 3.9917,
+      "step": 24062
+    },
+    {
+      "epoch": 0.24063,
+      "grad_norm": 1.6527100466427562,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 24063
+    },
+    {
+      "epoch": 0.24064,
+      "grad_norm": 1.3872573757872138,
+      "learning_rate": 0.003,
+      "loss": 3.9709,
+      "step": 24064
+    },
+    {
+      "epoch": 0.24065,
+      "grad_norm": 1.2655424237177486,
+      "learning_rate": 0.003,
+      "loss": 4.0399,
+      "step": 24065
+    },
+    {
+      "epoch": 0.24066,
+      "grad_norm": 1.3302649995042262,
+      "learning_rate": 0.003,
+      "loss": 4.0092,
+      "step": 24066
+    },
+    {
+      "epoch": 0.24067,
+      "grad_norm": 1.0760954951016388,
+      "learning_rate": 0.003,
+      "loss": 4.003,
+      "step": 24067
+    },
+    {
+      "epoch": 0.24068,
+      "grad_norm": 1.3354457667875717,
+      "learning_rate": 0.003,
+      "loss": 3.9779,
+      "step": 24068
+    },
+    {
+      "epoch": 0.24069,
+      "grad_norm": 1.0597106322029932,
+      "learning_rate": 0.003,
+      "loss": 4.0283,
+      "step": 24069
+    },
+    {
+      "epoch": 0.2407,
+      "grad_norm": 1.6671486672323337,
+      "learning_rate": 0.003,
+      "loss": 4.0146,
+      "step": 24070
+    },
+    {
+      "epoch": 0.24071,
+      "grad_norm": 1.043589586441463,
+      "learning_rate": 0.003,
+      "loss": 3.9991,
+      "step": 24071
+    },
+    {
+      "epoch": 0.24072,
+      "grad_norm": 1.51734764833904,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 24072
+    },
+    {
+      "epoch": 0.24073,
+      "grad_norm": 1.290401048607176,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 24073
+    },
+    {
+      "epoch": 0.24074,
+      "grad_norm": 1.8324586819586945,
+      "learning_rate": 0.003,
+      "loss": 4.059,
+      "step": 24074
+    },
+    {
+      "epoch": 0.24075,
+      "grad_norm": 1.441692545685776,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 24075
+    },
+    {
+      "epoch": 0.24076,
+      "grad_norm": 1.226927291920295,
+      "learning_rate": 0.003,
+      "loss": 4.062,
+      "step": 24076
+    },
+    {
+      "epoch": 0.24077,
+      "grad_norm": 1.2877086828260538,
+      "learning_rate": 0.003,
+      "loss": 4.0019,
+      "step": 24077
+    },
+    {
+      "epoch": 0.24078,
+      "grad_norm": 1.0904862198541498,
+      "learning_rate": 0.003,
+      "loss": 4.0253,
+      "step": 24078
+    },
+    {
+      "epoch": 0.24079,
+      "grad_norm": 1.2265600554693614,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 24079
+    },
+    {
+      "epoch": 0.2408,
+      "grad_norm": 1.2560806618454798,
+      "learning_rate": 0.003,
+      "loss": 4.0146,
+      "step": 24080
+    },
+    {
+      "epoch": 0.24081,
+      "grad_norm": 1.302247421086446,
+      "learning_rate": 0.003,
+      "loss": 4.0018,
+      "step": 24081
+    },
+    {
+      "epoch": 0.24082,
+      "grad_norm": 1.3595123350917104,
+      "learning_rate": 0.003,
+      "loss": 3.9967,
+      "step": 24082
+    },
+    {
+      "epoch": 0.24083,
+      "grad_norm": 1.163898138502864,
+      "learning_rate": 0.003,
+      "loss": 4.0118,
+      "step": 24083
+    },
+    {
+      "epoch": 0.24084,
+      "grad_norm": 1.222153005419956,
+      "learning_rate": 0.003,
+      "loss": 4.0419,
+      "step": 24084
+    },
+    {
+      "epoch": 0.24085,
+      "grad_norm": 1.2409171057705983,
+      "learning_rate": 0.003,
+      "loss": 4.0209,
+      "step": 24085
+    },
+    {
+      "epoch": 0.24086,
+      "grad_norm": 1.1873530840351598,
+      "learning_rate": 0.003,
+      "loss": 3.9812,
+      "step": 24086
+    },
+    {
+      "epoch": 0.24087,
+      "grad_norm": 1.170422047612421,
+      "learning_rate": 0.003,
+      "loss": 4.0088,
+      "step": 24087
+    },
+    {
+      "epoch": 0.24088,
+      "grad_norm": 1.2130993878256828,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 24088
+    },
+    {
+      "epoch": 0.24089,
+      "grad_norm": 1.2678472456678336,
+      "learning_rate": 0.003,
+      "loss": 3.9978,
+      "step": 24089
+    },
+    {
+      "epoch": 0.2409,
+      "grad_norm": 1.3821291868175654,
+      "learning_rate": 0.003,
+      "loss": 3.9881,
+      "step": 24090
+    },
+    {
+      "epoch": 0.24091,
+      "grad_norm": 1.319036745740246,
+      "learning_rate": 0.003,
+      "loss": 4.0056,
+      "step": 24091
+    },
+    {
+      "epoch": 0.24092,
+      "grad_norm": 1.4217349262109664,
+      "learning_rate": 0.003,
+      "loss": 3.9847,
+      "step": 24092
+    },
+    {
+      "epoch": 0.24093,
+      "grad_norm": 1.2816874225366839,
+      "learning_rate": 0.003,
+      "loss": 3.9613,
+      "step": 24093
+    },
+    {
+      "epoch": 0.24094,
+      "grad_norm": 1.2223405739528284,
+      "learning_rate": 0.003,
+      "loss": 3.9732,
+      "step": 24094
+    },
+    {
+      "epoch": 0.24095,
+      "grad_norm": 1.3793074710579973,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 24095
+    },
+    {
+      "epoch": 0.24096,
+      "grad_norm": 1.181334384087193,
+      "learning_rate": 0.003,
+      "loss": 3.9781,
+      "step": 24096
+    },
+    {
+      "epoch": 0.24097,
+      "grad_norm": 1.2882932812844592,
+      "learning_rate": 0.003,
+      "loss": 4.007,
+      "step": 24097
+    },
+    {
+      "epoch": 0.24098,
+      "grad_norm": 1.308456812477707,
+      "learning_rate": 0.003,
+      "loss": 4.0048,
+      "step": 24098
+    },
+    {
+      "epoch": 0.24099,
+      "grad_norm": 1.3110691181125789,
+      "learning_rate": 0.003,
+      "loss": 4.0077,
+      "step": 24099
+    },
+    {
+      "epoch": 0.241,
+      "grad_norm": 1.0990896662229253,
+      "learning_rate": 0.003,
+      "loss": 4.0001,
+      "step": 24100
+    },
+    {
+      "epoch": 0.24101,
+      "grad_norm": 1.2411208640557185,
+      "learning_rate": 0.003,
+      "loss": 3.9866,
+      "step": 24101
+    },
+    {
+      "epoch": 0.24102,
+      "grad_norm": 1.3087370765059578,
+      "learning_rate": 0.003,
+      "loss": 3.9884,
+      "step": 24102
+    },
+    {
+      "epoch": 0.24103,
+      "grad_norm": 1.3454353940424721,
+      "learning_rate": 0.003,
+      "loss": 4.02,
+      "step": 24103
+    },
+    {
+      "epoch": 0.24104,
+      "grad_norm": 1.3794296801105574,
+      "learning_rate": 0.003,
+      "loss": 3.9629,
+      "step": 24104
+    },
+    {
+      "epoch": 0.24105,
+      "grad_norm": 1.23399893438939,
+      "learning_rate": 0.003,
+      "loss": 3.9916,
+      "step": 24105
+    },
+    {
+      "epoch": 0.24106,
+      "grad_norm": 1.1953608537890468,
+      "learning_rate": 0.003,
+      "loss": 4.0308,
+      "step": 24106
+    },
+    {
+      "epoch": 0.24107,
+      "grad_norm": 1.517565596815311,
+      "learning_rate": 0.003,
+      "loss": 3.9849,
+      "step": 24107
+    },
+    {
+      "epoch": 0.24108,
+      "grad_norm": 1.0338675553916554,
+      "learning_rate": 0.003,
+      "loss": 4.0046,
+      "step": 24108
+    },
+    {
+      "epoch": 0.24109,
+      "grad_norm": 1.680122120160264,
+      "learning_rate": 0.003,
+      "loss": 3.9846,
+      "step": 24109
+    },
+    {
+      "epoch": 0.2411,
+      "grad_norm": 1.079819076011532,
+      "learning_rate": 0.003,
+      "loss": 4.0134,
+      "step": 24110
+    },
+    {
+      "epoch": 0.24111,
+      "grad_norm": 1.4920599970319457,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 24111
+    },
+    {
+      "epoch": 0.24112,
+      "grad_norm": 1.0530399259980436,
+      "learning_rate": 0.003,
+      "loss": 4.0065,
+      "step": 24112
+    },
+    {
+      "epoch": 0.24113,
+      "grad_norm": 1.457230640360859,
+      "learning_rate": 0.003,
+      "loss": 4.0305,
+      "step": 24113
+    },
+    {
+      "epoch": 0.24114,
+      "grad_norm": 1.311826407735413,
+      "learning_rate": 0.003,
+      "loss": 3.9937,
+      "step": 24114
+    },
+    {
+      "epoch": 0.24115,
+      "grad_norm": 1.2381704602887063,
+      "learning_rate": 0.003,
+      "loss": 3.9818,
+      "step": 24115
+    },
+    {
+      "epoch": 0.24116,
+      "grad_norm": 1.1664833808549595,
+      "learning_rate": 0.003,
+      "loss": 3.9954,
+      "step": 24116
+    },
+    {
+      "epoch": 0.24117,
+      "grad_norm": 1.320167308891772,
+      "learning_rate": 0.003,
+      "loss": 3.9977,
+      "step": 24117
+    },
+    {
+      "epoch": 0.24118,
+      "grad_norm": 1.2951666489100153,
+      "learning_rate": 0.003,
+      "loss": 3.985,
+      "step": 24118
+    },
+    {
+      "epoch": 0.24119,
+      "grad_norm": 1.2593990767354946,
+      "learning_rate": 0.003,
+      "loss": 4.0229,
+      "step": 24119
+    },
+    {
+      "epoch": 0.2412,
+      "grad_norm": 1.1587512442028352,
+      "learning_rate": 0.003,
+      "loss": 3.9896,
+      "step": 24120
+    },
+    {
+      "epoch": 0.24121,
+      "grad_norm": 1.4249693987329992,
+      "learning_rate": 0.003,
+      "loss": 3.9947,
+      "step": 24121
+    },
+    {
+      "epoch": 0.24122,
+      "grad_norm": 1.17351986576644,
+      "learning_rate": 0.003,
+      "loss": 3.9601,
+      "step": 24122
+    },
+    {
+      "epoch": 0.24123,
+      "grad_norm": 1.6011969955800005,
+      "learning_rate": 0.003,
+      "loss": 4.0096,
+      "step": 24123
+    },
+    {
+      "epoch": 0.24124,
+      "grad_norm": 1.3189179583121202,
+      "learning_rate": 0.003,
+      "loss": 3.9956,
+      "step": 24124
+    },
+    {
+      "epoch": 0.24125,
+      "grad_norm": 1.290448954536397,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 24125
+    },
+    {
+      "epoch": 0.24126,
+      "grad_norm": 1.1530973230011488,
+      "learning_rate": 0.003,
+      "loss": 3.9955,
+      "step": 24126
+    },
+    {
+      "epoch": 0.24127,
+      "grad_norm": 1.1826620637347764,
+      "learning_rate": 0.003,
+      "loss": 3.967,
+      "step": 24127
+    },
+    {
+      "epoch": 0.24128,
+      "grad_norm": 1.3964320846613578,
+      "learning_rate": 0.003,
+      "loss": 4.0247,
+      "step": 24128
+    },
+    {
+      "epoch": 0.24129,
+      "grad_norm": 0.9666291338867005,
+      "learning_rate": 0.003,
+      "loss": 4.0078,
+      "step": 24129
+    },
+    {
+      "epoch": 0.2413,
+      "grad_norm": 1.2384271984717812,
+      "learning_rate": 0.003,
+      "loss": 4.0019,
+      "step": 24130
+    },
+    {
+      "epoch": 0.24131,
+      "grad_norm": 1.2240522864896297,
+      "learning_rate": 0.003,
+      "loss": 4.008,
+      "step": 24131
+    },
+    {
+      "epoch": 0.24132,
+      "grad_norm": 1.0894954168368032,
+      "learning_rate": 0.003,
+      "loss": 3.9807,
+      "step": 24132
+    },
+    {
+      "epoch": 0.24133,
+      "grad_norm": 1.512404193094899,
+      "learning_rate": 0.003,
+      "loss": 3.985,
+      "step": 24133
+    },
+    {
+      "epoch": 0.24134,
+      "grad_norm": 1.1197287175829125,
+      "learning_rate": 0.003,
+      "loss": 3.9905,
+      "step": 24134
+    },
+    {
+      "epoch": 0.24135,
+      "grad_norm": 1.4834400513551278,
+      "learning_rate": 0.003,
+      "loss": 4.0088,
+      "step": 24135
+    },
+    {
+      "epoch": 0.24136,
+      "grad_norm": 1.051064703421356,
+      "learning_rate": 0.003,
+      "loss": 4.0022,
+      "step": 24136
+    },
+    {
+      "epoch": 0.24137,
+      "grad_norm": 1.4948225753719204,
+      "learning_rate": 0.003,
+      "loss": 3.9918,
+      "step": 24137
+    },
+    {
+      "epoch": 0.24138,
+      "grad_norm": 1.125568220081877,
+      "learning_rate": 0.003,
+      "loss": 3.9846,
+      "step": 24138
+    },
+    {
+      "epoch": 0.24139,
+      "grad_norm": 1.3106369315158188,
+      "learning_rate": 0.003,
+      "loss": 3.9981,
+      "step": 24139
+    },
+    {
+      "epoch": 0.2414,
+      "grad_norm": 1.2270936188136845,
+      "learning_rate": 0.003,
+      "loss": 3.9931,
+      "step": 24140
+    },
+    {
+      "epoch": 0.24141,
+      "grad_norm": 1.217376254524214,
+      "learning_rate": 0.003,
+      "loss": 4.0039,
+      "step": 24141
+    },
+    {
+      "epoch": 0.24142,
+      "grad_norm": 1.350824399921246,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 24142
+    },
+    {
+      "epoch": 0.24143,
+      "grad_norm": 1.2782913527529243,
+      "learning_rate": 0.003,
+      "loss": 4.0121,
+      "step": 24143
+    },
+    {
+      "epoch": 0.24144,
+      "grad_norm": 1.2509801181562796,
+      "learning_rate": 0.003,
+      "loss": 3.9999,
+      "step": 24144
+    },
+    {
+      "epoch": 0.24145,
+      "grad_norm": 1.4850770930022679,
+      "learning_rate": 0.003,
+      "loss": 3.9981,
+      "step": 24145
+    },
+    {
+      "epoch": 0.24146,
+      "grad_norm": 1.2440424721432985,
+      "learning_rate": 0.003,
+      "loss": 4.002,
+      "step": 24146
+    },
+    {
+      "epoch": 0.24147,
+      "grad_norm": 1.2594792898388485,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 24147
+    },
+    {
+      "epoch": 0.24148,
+      "grad_norm": 1.222322475826804,
+      "learning_rate": 0.003,
+      "loss": 3.9817,
+      "step": 24148
+    },
+    {
+      "epoch": 0.24149,
+      "grad_norm": 1.1542473639584763,
+      "learning_rate": 0.003,
+      "loss": 3.9967,
+      "step": 24149
+    },
+    {
+      "epoch": 0.2415,
+      "grad_norm": 1.2498502347174663,
+      "learning_rate": 0.003,
+      "loss": 3.9895,
+      "step": 24150
+    },
+    {
+      "epoch": 0.24151,
+      "grad_norm": 1.2395465187751582,
+      "learning_rate": 0.003,
+      "loss": 3.9906,
+      "step": 24151
+    },
+    {
+      "epoch": 0.24152,
+      "grad_norm": 1.4225103415302256,
+      "learning_rate": 0.003,
+      "loss": 4.0003,
+      "step": 24152
+    },
+    {
+      "epoch": 0.24153,
+      "grad_norm": 1.4011719412907169,
+      "learning_rate": 0.003,
+      "loss": 3.9962,
+      "step": 24153
+    },
+    {
+      "epoch": 0.24154,
+      "grad_norm": 1.1441888312607136,
+      "learning_rate": 0.003,
+      "loss": 4.0009,
+      "step": 24154
+    },
+    {
+      "epoch": 0.24155,
+      "grad_norm": 1.327692940712204,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 24155
+    },
+    {
+      "epoch": 0.24156,
+      "grad_norm": 1.4461020211018745,
+      "learning_rate": 0.003,
+      "loss": 4.0042,
+      "step": 24156
+    },
+    {
+      "epoch": 0.24157,
+      "grad_norm": 1.2933828833590288,
+      "learning_rate": 0.003,
+      "loss": 4.002,
+      "step": 24157
+    },
+    {
+      "epoch": 0.24158,
+      "grad_norm": 1.3247004793893185,
+      "learning_rate": 0.003,
+      "loss": 3.9871,
+      "step": 24158
+    },
+    {
+      "epoch": 0.24159,
+      "grad_norm": 1.2305449573149332,
+      "learning_rate": 0.003,
+      "loss": 4.0042,
+      "step": 24159
+    },
+    {
+      "epoch": 0.2416,
+      "grad_norm": 1.3660761136910773,
+      "learning_rate": 0.003,
+      "loss": 3.9998,
+      "step": 24160
+    },
+    {
+      "epoch": 0.24161,
+      "grad_norm": 1.2830549152737192,
+      "learning_rate": 0.003,
+      "loss": 3.9937,
+      "step": 24161
+    },
+    {
+      "epoch": 0.24162,
+      "grad_norm": 1.2361590601900971,
+      "learning_rate": 0.003,
+      "loss": 4.0188,
+      "step": 24162
+    },
+    {
+      "epoch": 0.24163,
+      "grad_norm": 1.2619457498477704,
+      "learning_rate": 0.003,
+      "loss": 4.0069,
+      "step": 24163
+    },
+    {
+      "epoch": 0.24164,
+      "grad_norm": 1.2206062099137558,
+      "learning_rate": 0.003,
+      "loss": 3.964,
+      "step": 24164
+    },
+    {
+      "epoch": 0.24165,
+      "grad_norm": 1.301816189846531,
+      "learning_rate": 0.003,
+      "loss": 4.0041,
+      "step": 24165
+    },
+    {
+      "epoch": 0.24166,
+      "grad_norm": 1.5295606001768185,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 24166
+    },
+    {
+      "epoch": 0.24167,
+      "grad_norm": 1.0898280529300612,
+      "learning_rate": 0.003,
+      "loss": 3.9865,
+      "step": 24167
+    },
+    {
+      "epoch": 0.24168,
+      "grad_norm": 1.3289237186342768,
+      "learning_rate": 0.003,
+      "loss": 4.043,
+      "step": 24168
+    },
+    {
+      "epoch": 0.24169,
+      "grad_norm": 1.05018881791721,
+      "learning_rate": 0.003,
+      "loss": 3.9805,
+      "step": 24169
+    },
+    {
+      "epoch": 0.2417,
+      "grad_norm": 1.345294324927424,
+      "learning_rate": 0.003,
+      "loss": 3.9996,
+      "step": 24170
+    },
+    {
+      "epoch": 0.24171,
+      "grad_norm": 1.2551936261349765,
+      "learning_rate": 0.003,
+      "loss": 3.9873,
+      "step": 24171
+    },
+    {
+      "epoch": 0.24172,
+      "grad_norm": 1.458424786857544,
+      "learning_rate": 0.003,
+      "loss": 3.9862,
+      "step": 24172
+    },
+    {
+      "epoch": 0.24173,
+      "grad_norm": 1.3382087209627367,
+      "learning_rate": 0.003,
+      "loss": 3.9975,
+      "step": 24173
+    },
+    {
+      "epoch": 0.24174,
+      "grad_norm": 1.3832094885126691,
+      "learning_rate": 0.003,
+      "loss": 3.9694,
+      "step": 24174
+    },
+    {
+      "epoch": 0.24175,
+      "grad_norm": 1.1766255861862356,
+      "learning_rate": 0.003,
+      "loss": 3.9973,
+      "step": 24175
+    },
+    {
+      "epoch": 0.24176,
+      "grad_norm": 1.390750899722205,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 24176
+    },
+    {
+      "epoch": 0.24177,
+      "grad_norm": 1.0943146212335126,
+      "learning_rate": 0.003,
+      "loss": 3.9947,
+      "step": 24177
+    },
+    {
+      "epoch": 0.24178,
+      "grad_norm": 1.3529723830987217,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 24178
+    },
+    {
+      "epoch": 0.24179,
+      "grad_norm": 1.1342602218953872,
+      "learning_rate": 0.003,
+      "loss": 3.9759,
+      "step": 24179
+    },
+    {
+      "epoch": 0.2418,
+      "grad_norm": 1.2069915185802274,
+      "learning_rate": 0.003,
+      "loss": 3.9914,
+      "step": 24180
+    },
+    {
+      "epoch": 0.24181,
+      "grad_norm": 1.2100751479764567,
+      "learning_rate": 0.003,
+      "loss": 3.9718,
+      "step": 24181
+    },
+    {
+      "epoch": 0.24182,
+      "grad_norm": 1.4054719298479592,
+      "learning_rate": 0.003,
+      "loss": 4.0057,
+      "step": 24182
+    },
+    {
+      "epoch": 0.24183,
+      "grad_norm": 1.0105782851901894,
+      "learning_rate": 0.003,
+      "loss": 3.9754,
+      "step": 24183
+    },
+    {
+      "epoch": 0.24184,
+      "grad_norm": 1.3241078678052367,
+      "learning_rate": 0.003,
+      "loss": 4.0074,
+      "step": 24184
+    },
+    {
+      "epoch": 0.24185,
+      "grad_norm": 1.1455738469584729,
+      "learning_rate": 0.003,
+      "loss": 3.9793,
+      "step": 24185
+    },
+    {
+      "epoch": 0.24186,
+      "grad_norm": 1.3106100036520203,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 24186
+    },
+    {
+      "epoch": 0.24187,
+      "grad_norm": 1.2475536766156146,
+      "learning_rate": 0.003,
+      "loss": 4.0153,
+      "step": 24187
+    },
+    {
+      "epoch": 0.24188,
+      "grad_norm": 1.3992104686638014,
+      "learning_rate": 0.003,
+      "loss": 4.0252,
+      "step": 24188
+    },
+    {
+      "epoch": 0.24189,
+      "grad_norm": 1.2078384758490144,
+      "learning_rate": 0.003,
+      "loss": 4.0021,
+      "step": 24189
+    },
+    {
+      "epoch": 0.2419,
+      "grad_norm": 1.2972821666243688,
+      "learning_rate": 0.003,
+      "loss": 4.018,
+      "step": 24190
+    },
+    {
+      "epoch": 0.24191,
+      "grad_norm": 1.2399980115118858,
+      "learning_rate": 0.003,
+      "loss": 4.0022,
+      "step": 24191
+    },
+    {
+      "epoch": 0.24192,
+      "grad_norm": 1.2819185940105668,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 24192
+    },
+    {
+      "epoch": 0.24193,
+      "grad_norm": 1.3250477086069232,
+      "learning_rate": 0.003,
+      "loss": 4.0,
+      "step": 24193
+    },
+    {
+      "epoch": 0.24194,
+      "grad_norm": 1.0089381957636598,
+      "learning_rate": 0.003,
+      "loss": 3.9852,
+      "step": 24194
+    },
+    {
+      "epoch": 0.24195,
+      "grad_norm": 1.5135865411071787,
+      "learning_rate": 0.003,
+      "loss": 3.9908,
+      "step": 24195
+    },
+    {
+      "epoch": 0.24196,
+      "grad_norm": 1.1124622384173501,
+      "learning_rate": 0.003,
+      "loss": 4.0435,
+      "step": 24196
+    },
+    {
+      "epoch": 0.24197,
+      "grad_norm": 1.6354441763795529,
+      "learning_rate": 0.003,
+      "loss": 3.9984,
+      "step": 24197
+    },
+    {
+      "epoch": 0.24198,
+      "grad_norm": 1.1424460247478212,
+      "learning_rate": 0.003,
+      "loss": 3.9955,
+      "step": 24198
+    },
+    {
+      "epoch": 0.24199,
+      "grad_norm": 1.2714981166300368,
+      "learning_rate": 0.003,
+      "loss": 4.0046,
+      "step": 24199
+    },
+    {
+      "epoch": 0.242,
+      "grad_norm": 1.282921704309831,
+      "learning_rate": 0.003,
+      "loss": 4.0298,
+      "step": 24200
+    },
+    {
+      "epoch": 0.24201,
+      "grad_norm": 1.5530604233598977,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 24201
+    },
+    {
+      "epoch": 0.24202,
+      "grad_norm": 1.2926991151160696,
+      "learning_rate": 0.003,
+      "loss": 3.9962,
+      "step": 24202
+    },
+    {
+      "epoch": 0.24203,
+      "grad_norm": 1.3331459521570448,
+      "learning_rate": 0.003,
+      "loss": 3.9929,
+      "step": 24203
+    },
+    {
+      "epoch": 0.24204,
+      "grad_norm": 1.3736301850195587,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 24204
+    },
+    {
+      "epoch": 0.24205,
+      "grad_norm": 1.0597872275977052,
+      "learning_rate": 0.003,
+      "loss": 3.9829,
+      "step": 24205
+    },
+    {
+      "epoch": 0.24206,
+      "grad_norm": 1.3577186538588275,
+      "learning_rate": 0.003,
+      "loss": 3.9698,
+      "step": 24206
+    },
+    {
+      "epoch": 0.24207,
+      "grad_norm": 1.3329070595863608,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 24207
+    },
+    {
+      "epoch": 0.24208,
+      "grad_norm": 1.3549883593083465,
+      "learning_rate": 0.003,
+      "loss": 4.0143,
+      "step": 24208
+    },
+    {
+      "epoch": 0.24209,
+      "grad_norm": 1.4249514541751218,
+      "learning_rate": 0.003,
+      "loss": 3.9883,
+      "step": 24209
+    },
+    {
+      "epoch": 0.2421,
+      "grad_norm": 1.2554446779718282,
+      "learning_rate": 0.003,
+      "loss": 4.0142,
+      "step": 24210
+    },
+    {
+      "epoch": 0.24211,
+      "grad_norm": 1.2415654378670002,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 24211
+    },
+    {
+      "epoch": 0.24212,
+      "grad_norm": 1.4210593433681382,
+      "learning_rate": 0.003,
+      "loss": 3.9855,
+      "step": 24212
+    },
+    {
+      "epoch": 0.24213,
+      "grad_norm": 1.0826297263828262,
+      "learning_rate": 0.003,
+      "loss": 4.0263,
+      "step": 24213
+    },
+    {
+      "epoch": 0.24214,
+      "grad_norm": 1.4270304021470281,
+      "learning_rate": 0.003,
+      "loss": 4.0188,
+      "step": 24214
+    },
+    {
+      "epoch": 0.24215,
+      "grad_norm": 1.0196608721756761,
+      "learning_rate": 0.003,
+      "loss": 3.9994,
+      "step": 24215
+    },
+    {
+      "epoch": 0.24216,
+      "grad_norm": 1.3993911409198008,
+      "learning_rate": 0.003,
+      "loss": 3.9873,
+      "step": 24216
+    },
+    {
+      "epoch": 0.24217,
+      "grad_norm": 1.1095018822835812,
+      "learning_rate": 0.003,
+      "loss": 4.003,
+      "step": 24217
+    },
+    {
+      "epoch": 0.24218,
+      "grad_norm": 1.397362132945264,
+      "learning_rate": 0.003,
+      "loss": 4.003,
+      "step": 24218
+    },
+    {
+      "epoch": 0.24219,
+      "grad_norm": 1.176821546171383,
+      "learning_rate": 0.003,
+      "loss": 3.9929,
+      "step": 24219
+    },
+    {
+      "epoch": 0.2422,
+      "grad_norm": 1.1951327192458712,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 24220
+    },
+    {
+      "epoch": 0.24221,
+      "grad_norm": 1.406464672554473,
+      "learning_rate": 0.003,
+      "loss": 3.9894,
+      "step": 24221
+    },
+    {
+      "epoch": 0.24222,
+      "grad_norm": 1.480346552419561,
+      "learning_rate": 0.003,
+      "loss": 3.9831,
+      "step": 24222
+    },
+    {
+      "epoch": 0.24223,
+      "grad_norm": 1.4481149770728632,
+      "learning_rate": 0.003,
+      "loss": 3.9809,
+      "step": 24223
+    },
+    {
+      "epoch": 0.24224,
+      "grad_norm": 1.086886352987646,
+      "learning_rate": 0.003,
+      "loss": 3.9932,
+      "step": 24224
+    },
+    {
+      "epoch": 0.24225,
+      "grad_norm": 1.2427138898744148,
+      "learning_rate": 0.003,
+      "loss": 4.0056,
+      "step": 24225
+    },
+    {
+      "epoch": 0.24226,
+      "grad_norm": 1.0181420982800222,
+      "learning_rate": 0.003,
+      "loss": 3.9965,
+      "step": 24226
+    },
+    {
+      "epoch": 0.24227,
+      "grad_norm": 1.3676344478625133,
+      "learning_rate": 0.003,
+      "loss": 4.028,
+      "step": 24227
+    },
+    {
+      "epoch": 0.24228,
+      "grad_norm": 1.1254447922314936,
+      "learning_rate": 0.003,
+      "loss": 4.0058,
+      "step": 24228
+    },
+    {
+      "epoch": 0.24229,
+      "grad_norm": 1.3583829365953681,
+      "learning_rate": 0.003,
+      "loss": 3.9869,
+      "step": 24229
+    },
+    {
+      "epoch": 0.2423,
+      "grad_norm": 1.4050310721887904,
+      "learning_rate": 0.003,
+      "loss": 4.0099,
+      "step": 24230
+    },
+    {
+      "epoch": 0.24231,
+      "grad_norm": 1.1438891805413836,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 24231
+    },
+    {
+      "epoch": 0.24232,
+      "grad_norm": 1.3079967830377055,
+      "learning_rate": 0.003,
+      "loss": 3.9746,
+      "step": 24232
+    },
+    {
+      "epoch": 0.24233,
+      "grad_norm": 1.1124583087580002,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 24233
+    },
+    {
+      "epoch": 0.24234,
+      "grad_norm": 1.2922358225045152,
+      "learning_rate": 0.003,
+      "loss": 4.0267,
+      "step": 24234
+    },
+    {
+      "epoch": 0.24235,
+      "grad_norm": 1.1092143665895888,
+      "learning_rate": 0.003,
+      "loss": 3.9585,
+      "step": 24235
+    },
+    {
+      "epoch": 0.24236,
+      "grad_norm": 1.580912137481568,
+      "learning_rate": 0.003,
+      "loss": 3.9983,
+      "step": 24236
+    },
+    {
+      "epoch": 0.24237,
+      "grad_norm": 1.0071440104460954,
+      "learning_rate": 0.003,
+      "loss": 3.9898,
+      "step": 24237
+    },
+    {
+      "epoch": 0.24238,
+      "grad_norm": 1.5819348231049781,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 24238
+    },
+    {
+      "epoch": 0.24239,
+      "grad_norm": 1.1659322289400957,
+      "learning_rate": 0.003,
+      "loss": 4.0223,
+      "step": 24239
+    },
+    {
+      "epoch": 0.2424,
+      "grad_norm": 1.5389806602289353,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 24240
+    },
+    {
+      "epoch": 0.24241,
+      "grad_norm": 1.2672588268892024,
+      "learning_rate": 0.003,
+      "loss": 4.0091,
+      "step": 24241
+    },
+    {
+      "epoch": 0.24242,
+      "grad_norm": 1.4036300620957065,
+      "learning_rate": 0.003,
+      "loss": 4.0096,
+      "step": 24242
+    },
+    {
+      "epoch": 0.24243,
+      "grad_norm": 1.1775111147811212,
+      "learning_rate": 0.003,
+      "loss": 3.9956,
+      "step": 24243
+    },
+    {
+      "epoch": 0.24244,
+      "grad_norm": 1.522693526218825,
+      "learning_rate": 0.003,
+      "loss": 3.9967,
+      "step": 24244
+    },
+    {
+      "epoch": 0.24245,
+      "grad_norm": 1.4014419087320238,
+      "learning_rate": 0.003,
+      "loss": 3.9927,
+      "step": 24245
+    },
+    {
+      "epoch": 0.24246,
+      "grad_norm": 1.4126587575267704,
+      "learning_rate": 0.003,
+      "loss": 4.0258,
+      "step": 24246
+    },
+    {
+      "epoch": 0.24247,
+      "grad_norm": 1.2498324933083844,
+      "learning_rate": 0.003,
+      "loss": 4.0157,
+      "step": 24247
+    },
+    {
+      "epoch": 0.24248,
+      "grad_norm": 1.2626038204574848,
+      "learning_rate": 0.003,
+      "loss": 4.0258,
+      "step": 24248
+    },
+    {
+      "epoch": 0.24249,
+      "grad_norm": 1.188801076068019,
+      "learning_rate": 0.003,
+      "loss": 3.9997,
+      "step": 24249
+    },
+    {
+      "epoch": 0.2425,
+      "grad_norm": 1.3030013119141315,
+      "learning_rate": 0.003,
+      "loss": 3.9907,
+      "step": 24250
+    },
+    {
+      "epoch": 0.24251,
+      "grad_norm": 1.247500690440646,
+      "learning_rate": 0.003,
+      "loss": 4.0072,
+      "step": 24251
+    },
+    {
+      "epoch": 0.24252,
+      "grad_norm": 1.3779174020672684,
+      "learning_rate": 0.003,
+      "loss": 3.984,
+      "step": 24252
+    },
+    {
+      "epoch": 0.24253,
+      "grad_norm": 1.2977502087860697,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 24253
+    },
+    {
+      "epoch": 0.24254,
+      "grad_norm": 1.2989538736770816,
+      "learning_rate": 0.003,
+      "loss": 3.9943,
+      "step": 24254
+    },
+    {
+      "epoch": 0.24255,
+      "grad_norm": 1.0112682071072827,
+      "learning_rate": 0.003,
+      "loss": 3.9975,
+      "step": 24255
+    },
+    {
+      "epoch": 0.24256,
+      "grad_norm": 1.223671203799566,
+      "learning_rate": 0.003,
+      "loss": 3.9856,
+      "step": 24256
+    },
+    {
+      "epoch": 0.24257,
+      "grad_norm": 1.2328841137294158,
+      "learning_rate": 0.003,
+      "loss": 3.9821,
+      "step": 24257
+    },
+    {
+      "epoch": 0.24258,
+      "grad_norm": 1.173124335947438,
+      "learning_rate": 0.003,
+      "loss": 3.9873,
+      "step": 24258
+    },
+    {
+      "epoch": 0.24259,
+      "grad_norm": 1.44321597893089,
+      "learning_rate": 0.003,
+      "loss": 3.9626,
+      "step": 24259
+    },
+    {
+      "epoch": 0.2426,
+      "grad_norm": 1.0409958265783836,
+      "learning_rate": 0.003,
+      "loss": 3.9949,
+      "step": 24260
+    },
+    {
+      "epoch": 0.24261,
+      "grad_norm": 1.4113568787183663,
+      "learning_rate": 0.003,
+      "loss": 4.0095,
+      "step": 24261
+    },
+    {
+      "epoch": 0.24262,
+      "grad_norm": 1.0441273336220465,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 24262
+    },
+    {
+      "epoch": 0.24263,
+      "grad_norm": 1.500155681422747,
+      "learning_rate": 0.003,
+      "loss": 4.0223,
+      "step": 24263
+    },
+    {
+      "epoch": 0.24264,
+      "grad_norm": 1.2792869549418553,
+      "learning_rate": 0.003,
+      "loss": 4.0455,
+      "step": 24264
+    },
+    {
+      "epoch": 0.24265,
+      "grad_norm": 1.316300758861995,
+      "learning_rate": 0.003,
+      "loss": 3.9861,
+      "step": 24265
+    },
+    {
+      "epoch": 0.24266,
+      "grad_norm": 1.3085208308967156,
+      "learning_rate": 0.003,
+      "loss": 3.9852,
+      "step": 24266
+    },
+    {
+      "epoch": 0.24267,
+      "grad_norm": 1.1884597760130713,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 24267
+    },
+    {
+      "epoch": 0.24268,
+      "grad_norm": 1.3706254658457726,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 24268
+    },
+    {
+      "epoch": 0.24269,
+      "grad_norm": 1.1418910667561124,
+      "learning_rate": 0.003,
+      "loss": 4.0036,
+      "step": 24269
+    },
+    {
+      "epoch": 0.2427,
+      "grad_norm": 1.3286987167784483,
+      "learning_rate": 0.003,
+      "loss": 4.0069,
+      "step": 24270
+    },
+    {
+      "epoch": 0.24271,
+      "grad_norm": 1.1042820419071429,
+      "learning_rate": 0.003,
+      "loss": 3.9818,
+      "step": 24271
+    },
+    {
+      "epoch": 0.24272,
+      "grad_norm": 1.3213486811412363,
+      "learning_rate": 0.003,
+      "loss": 3.991,
+      "step": 24272
+    },
+    {
+      "epoch": 0.24273,
+      "grad_norm": 1.1902761910934856,
+      "learning_rate": 0.003,
+      "loss": 3.9926,
+      "step": 24273
+    },
+    {
+      "epoch": 0.24274,
+      "grad_norm": 1.567635346410316,
+      "learning_rate": 0.003,
+      "loss": 4.0241,
+      "step": 24274
+    },
+    {
+      "epoch": 0.24275,
+      "grad_norm": 1.2005589523602915,
+      "learning_rate": 0.003,
+      "loss": 3.99,
+      "step": 24275
+    },
+    {
+      "epoch": 0.24276,
+      "grad_norm": 1.0966954702850007,
+      "learning_rate": 0.003,
+      "loss": 3.9907,
+      "step": 24276
+    },
+    {
+      "epoch": 0.24277,
+      "grad_norm": 1.3073977353357855,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 24277
+    },
+    {
+      "epoch": 0.24278,
+      "grad_norm": 1.2326235968708692,
+      "learning_rate": 0.003,
+      "loss": 3.9932,
+      "step": 24278
+    },
+    {
+      "epoch": 0.24279,
+      "grad_norm": 1.2010886610998857,
+      "learning_rate": 0.003,
+      "loss": 4.0016,
+      "step": 24279
+    },
+    {
+      "epoch": 0.2428,
+      "grad_norm": 1.2098808314499239,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 24280
+    },
+    {
+      "epoch": 0.24281,
+      "grad_norm": 1.3213920987918337,
+      "learning_rate": 0.003,
+      "loss": 4.0172,
+      "step": 24281
+    },
+    {
+      "epoch": 0.24282,
+      "grad_norm": 1.1858270858463267,
+      "learning_rate": 0.003,
+      "loss": 3.978,
+      "step": 24282
+    },
+    {
+      "epoch": 0.24283,
+      "grad_norm": 1.2319408535087508,
+      "learning_rate": 0.003,
+      "loss": 3.9887,
+      "step": 24283
+    },
+    {
+      "epoch": 0.24284,
+      "grad_norm": 1.4694003904655955,
+      "learning_rate": 0.003,
+      "loss": 4.0109,
+      "step": 24284
+    },
+    {
+      "epoch": 0.24285,
+      "grad_norm": 1.1194655920888879,
+      "learning_rate": 0.003,
+      "loss": 3.9709,
+      "step": 24285
+    },
+    {
+      "epoch": 0.24286,
+      "grad_norm": 1.2272982979212796,
+      "learning_rate": 0.003,
+      "loss": 3.9998,
+      "step": 24286
+    },
+    {
+      "epoch": 0.24287,
+      "grad_norm": 1.313632308661914,
+      "learning_rate": 0.003,
+      "loss": 3.9953,
+      "step": 24287
+    },
+    {
+      "epoch": 0.24288,
+      "grad_norm": 1.2156624105395946,
+      "learning_rate": 0.003,
+      "loss": 3.9732,
+      "step": 24288
+    },
+    {
+      "epoch": 0.24289,
+      "grad_norm": 1.3604433769520767,
+      "learning_rate": 0.003,
+      "loss": 3.9971,
+      "step": 24289
+    },
+    {
+      "epoch": 0.2429,
+      "grad_norm": 1.1918012715723885,
+      "learning_rate": 0.003,
+      "loss": 4.0057,
+      "step": 24290
+    },
+    {
+      "epoch": 0.24291,
+      "grad_norm": 1.4210136212683475,
+      "learning_rate": 0.003,
+      "loss": 3.9888,
+      "step": 24291
+    },
+    {
+      "epoch": 0.24292,
+      "grad_norm": 1.0618839514006881,
+      "learning_rate": 0.003,
+      "loss": 4.007,
+      "step": 24292
+    },
+    {
+      "epoch": 0.24293,
+      "grad_norm": 1.4356886998736293,
+      "learning_rate": 0.003,
+      "loss": 3.9624,
+      "step": 24293
+    },
+    {
+      "epoch": 0.24294,
+      "grad_norm": 1.1501206023642496,
+      "learning_rate": 0.003,
+      "loss": 3.9903,
+      "step": 24294
+    },
+    {
+      "epoch": 0.24295,
+      "grad_norm": 1.2759834488148594,
+      "learning_rate": 0.003,
+      "loss": 4.0027,
+      "step": 24295
+    },
+    {
+      "epoch": 0.24296,
+      "grad_norm": 1.373149376159708,
+      "learning_rate": 0.003,
+      "loss": 3.9884,
+      "step": 24296
+    },
+    {
+      "epoch": 0.24297,
+      "grad_norm": 1.418141680211911,
+      "learning_rate": 0.003,
+      "loss": 3.9963,
+      "step": 24297
+    },
+    {
+      "epoch": 0.24298,
+      "grad_norm": 1.153091629878988,
+      "learning_rate": 0.003,
+      "loss": 3.9918,
+      "step": 24298
+    },
+    {
+      "epoch": 0.24299,
+      "grad_norm": 1.4037006163417747,
+      "learning_rate": 0.003,
+      "loss": 3.9879,
+      "step": 24299
+    },
+    {
+      "epoch": 0.243,
+      "grad_norm": 1.1925564570567497,
+      "learning_rate": 0.003,
+      "loss": 3.9959,
+      "step": 24300
+    },
+    {
+      "epoch": 0.24301,
+      "grad_norm": 1.40308492908008,
+      "learning_rate": 0.003,
+      "loss": 3.9689,
+      "step": 24301
+    },
+    {
+      "epoch": 0.24302,
+      "grad_norm": 1.3105859567718003,
+      "learning_rate": 0.003,
+      "loss": 3.9902,
+      "step": 24302
+    },
+    {
+      "epoch": 0.24303,
+      "grad_norm": 1.218534319371849,
+      "learning_rate": 0.003,
+      "loss": 4.0204,
+      "step": 24303
+    },
+    {
+      "epoch": 0.24304,
+      "grad_norm": 1.2852184695667128,
+      "learning_rate": 0.003,
+      "loss": 3.9969,
+      "step": 24304
+    },
+    {
+      "epoch": 0.24305,
+      "grad_norm": 1.1507495401616317,
+      "learning_rate": 0.003,
+      "loss": 4.0065,
+      "step": 24305
+    },
+    {
+      "epoch": 0.24306,
+      "grad_norm": 1.5141694171720639,
+      "learning_rate": 0.003,
+      "loss": 3.9879,
+      "step": 24306
+    },
+    {
+      "epoch": 0.24307,
+      "grad_norm": 1.3689362995845964,
+      "learning_rate": 0.003,
+      "loss": 4.0353,
+      "step": 24307
+    },
+    {
+      "epoch": 0.24308,
+      "grad_norm": 1.5031421120255246,
+      "learning_rate": 0.003,
+      "loss": 3.9825,
+      "step": 24308
+    },
+    {
+      "epoch": 0.24309,
+      "grad_norm": 1.0452218607412926,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 24309
+    },
+    {
+      "epoch": 0.2431,
+      "grad_norm": 1.2385693845724137,
+      "learning_rate": 0.003,
+      "loss": 3.9906,
+      "step": 24310
+    },
+    {
+      "epoch": 0.24311,
+      "grad_norm": 1.2751466154762958,
+      "learning_rate": 0.003,
+      "loss": 3.9924,
+      "step": 24311
+    },
+    {
+      "epoch": 0.24312,
+      "grad_norm": 1.1588864812669948,
+      "learning_rate": 0.003,
+      "loss": 3.9687,
+      "step": 24312
+    },
+    {
+      "epoch": 0.24313,
+      "grad_norm": 1.3283726644502272,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 24313
+    },
+    {
+      "epoch": 0.24314,
+      "grad_norm": 1.2177853899290105,
+      "learning_rate": 0.003,
+      "loss": 3.9773,
+      "step": 24314
+    },
+    {
+      "epoch": 0.24315,
+      "grad_norm": 1.5908274177197597,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 24315
+    },
+    {
+      "epoch": 0.24316,
+      "grad_norm": 1.2017672334308052,
+      "learning_rate": 0.003,
+      "loss": 3.9994,
+      "step": 24316
+    },
+    {
+      "epoch": 0.24317,
+      "grad_norm": 1.2503171458732973,
+      "learning_rate": 0.003,
+      "loss": 4.0361,
+      "step": 24317
+    },
+    {
+      "epoch": 0.24318,
+      "grad_norm": 1.1592633803465389,
+      "learning_rate": 0.003,
+      "loss": 3.995,
+      "step": 24318
+    },
+    {
+      "epoch": 0.24319,
+      "grad_norm": 1.270896830243061,
+      "learning_rate": 0.003,
+      "loss": 3.9902,
+      "step": 24319
+    },
+    {
+      "epoch": 0.2432,
+      "grad_norm": 1.2852044049316775,
+      "learning_rate": 0.003,
+      "loss": 4.0068,
+      "step": 24320
+    },
+    {
+      "epoch": 0.24321,
+      "grad_norm": 1.1289722479443345,
+      "learning_rate": 0.003,
+      "loss": 3.9755,
+      "step": 24321
+    },
+    {
+      "epoch": 0.24322,
+      "grad_norm": 1.511993927353488,
+      "learning_rate": 0.003,
+      "loss": 4.0226,
+      "step": 24322
+    },
+    {
+      "epoch": 0.24323,
+      "grad_norm": 1.2435203173683225,
+      "learning_rate": 0.003,
+      "loss": 3.9898,
+      "step": 24323
+    },
+    {
+      "epoch": 0.24324,
+      "grad_norm": 1.2670008471230054,
+      "learning_rate": 0.003,
+      "loss": 3.9978,
+      "step": 24324
+    },
+    {
+      "epoch": 0.24325,
+      "grad_norm": 1.3988713647496716,
+      "learning_rate": 0.003,
+      "loss": 3.9901,
+      "step": 24325
+    },
+    {
+      "epoch": 0.24326,
+      "grad_norm": 1.1989574579224378,
+      "learning_rate": 0.003,
+      "loss": 3.9959,
+      "step": 24326
+    },
+    {
+      "epoch": 0.24327,
+      "grad_norm": 1.3251032016198945,
+      "learning_rate": 0.003,
+      "loss": 3.9998,
+      "step": 24327
+    },
+    {
+      "epoch": 0.24328,
+      "grad_norm": 1.2338473684311857,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 24328
+    },
+    {
+      "epoch": 0.24329,
+      "grad_norm": 1.3960675696266212,
+      "learning_rate": 0.003,
+      "loss": 4.0072,
+      "step": 24329
+    },
+    {
+      "epoch": 0.2433,
+      "grad_norm": 1.2752366750987398,
+      "learning_rate": 0.003,
+      "loss": 3.9917,
+      "step": 24330
+    },
+    {
+      "epoch": 0.24331,
+      "grad_norm": 1.1981873816500621,
+      "learning_rate": 0.003,
+      "loss": 3.9737,
+      "step": 24331
+    },
+    {
+      "epoch": 0.24332,
+      "grad_norm": 1.4074443858614525,
+      "learning_rate": 0.003,
+      "loss": 4.0273,
+      "step": 24332
+    },
+    {
+      "epoch": 0.24333,
+      "grad_norm": 1.5293059600816015,
+      "learning_rate": 0.003,
+      "loss": 3.9854,
+      "step": 24333
+    },
+    {
+      "epoch": 0.24334,
+      "grad_norm": 0.9823305104797105,
+      "learning_rate": 0.003,
+      "loss": 3.9917,
+      "step": 24334
+    },
+    {
+      "epoch": 0.24335,
+      "grad_norm": 1.3622104334442773,
+      "learning_rate": 0.003,
+      "loss": 4.004,
+      "step": 24335
+    },
+    {
+      "epoch": 0.24336,
+      "grad_norm": 1.2185726968881212,
+      "learning_rate": 0.003,
+      "loss": 3.9801,
+      "step": 24336
+    },
+    {
+      "epoch": 0.24337,
+      "grad_norm": 1.1605444142293981,
+      "learning_rate": 0.003,
+      "loss": 3.9895,
+      "step": 24337
+    },
+    {
+      "epoch": 0.24338,
+      "grad_norm": 1.281828120744372,
+      "learning_rate": 0.003,
+      "loss": 4.0235,
+      "step": 24338
+    },
+    {
+      "epoch": 0.24339,
+      "grad_norm": 1.3591347295733012,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 24339
+    },
+    {
+      "epoch": 0.2434,
+      "grad_norm": 1.3247094939167927,
+      "learning_rate": 0.003,
+      "loss": 3.983,
+      "step": 24340
+    },
+    {
+      "epoch": 0.24341,
+      "grad_norm": 1.2246542472827677,
+      "learning_rate": 0.003,
+      "loss": 3.975,
+      "step": 24341
+    },
+    {
+      "epoch": 0.24342,
+      "grad_norm": 1.3080626594209288,
+      "learning_rate": 0.003,
+      "loss": 3.9816,
+      "step": 24342
+    },
+    {
+      "epoch": 0.24343,
+      "grad_norm": 1.4378310191285855,
+      "learning_rate": 0.003,
+      "loss": 3.9948,
+      "step": 24343
+    },
+    {
+      "epoch": 0.24344,
+      "grad_norm": 1.5318215241625899,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 24344
+    },
+    {
+      "epoch": 0.24345,
+      "grad_norm": 1.0738007728475194,
+      "learning_rate": 0.003,
+      "loss": 3.9961,
+      "step": 24345
+    },
+    {
+      "epoch": 0.24346,
+      "grad_norm": 1.313594316042918,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 24346
+    },
+    {
+      "epoch": 0.24347,
+      "grad_norm": 1.2235624479272187,
+      "learning_rate": 0.003,
+      "loss": 4.0002,
+      "step": 24347
+    },
+    {
+      "epoch": 0.24348,
+      "grad_norm": 1.1292325507753453,
+      "learning_rate": 0.003,
+      "loss": 4.0016,
+      "step": 24348
+    },
+    {
+      "epoch": 0.24349,
+      "grad_norm": 1.394689425890914,
+      "learning_rate": 0.003,
+      "loss": 4.016,
+      "step": 24349
+    },
+    {
+      "epoch": 0.2435,
+      "grad_norm": 1.3069454231798265,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 24350
+    },
+    {
+      "epoch": 0.24351,
+      "grad_norm": 1.185003909473256,
+      "learning_rate": 0.003,
+      "loss": 3.9853,
+      "step": 24351
+    },
+    {
+      "epoch": 0.24352,
+      "grad_norm": 1.3113412084498477,
+      "learning_rate": 0.003,
+      "loss": 4.02,
+      "step": 24352
+    },
+    {
+      "epoch": 0.24353,
+      "grad_norm": 1.115870032158001,
+      "learning_rate": 0.003,
+      "loss": 3.9957,
+      "step": 24353
+    },
+    {
+      "epoch": 0.24354,
+      "grad_norm": 1.502411615129219,
+      "learning_rate": 0.003,
+      "loss": 4.0146,
+      "step": 24354
+    },
+    {
+      "epoch": 0.24355,
+      "grad_norm": 1.0942168062859534,
+      "learning_rate": 0.003,
+      "loss": 4.0061,
+      "step": 24355
+    },
+    {
+      "epoch": 0.24356,
+      "grad_norm": 1.3589652682721036,
+      "learning_rate": 0.003,
+      "loss": 4.0264,
+      "step": 24356
+    },
+    {
+      "epoch": 0.24357,
+      "grad_norm": 1.1377925398927595,
+      "learning_rate": 0.003,
+      "loss": 4.019,
+      "step": 24357
+    },
+    {
+      "epoch": 0.24358,
+      "grad_norm": 1.182331949901294,
+      "learning_rate": 0.003,
+      "loss": 3.9888,
+      "step": 24358
+    },
+    {
+      "epoch": 0.24359,
+      "grad_norm": 1.3634158908075005,
+      "learning_rate": 0.003,
+      "loss": 3.9953,
+      "step": 24359
+    },
+    {
+      "epoch": 0.2436,
+      "grad_norm": 1.2765530427200424,
+      "learning_rate": 0.003,
+      "loss": 3.9949,
+      "step": 24360
+    },
+    {
+      "epoch": 0.24361,
+      "grad_norm": 1.1308114449518833,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 24361
+    },
+    {
+      "epoch": 0.24362,
+      "grad_norm": 1.4290241616859394,
+      "learning_rate": 0.003,
+      "loss": 3.9954,
+      "step": 24362
+    },
+    {
+      "epoch": 0.24363,
+      "grad_norm": 1.0381113257081345,
+      "learning_rate": 0.003,
+      "loss": 4.0017,
+      "step": 24363
+    },
+    {
+      "epoch": 0.24364,
+      "grad_norm": 1.5590705825757918,
+      "learning_rate": 0.003,
+      "loss": 3.9989,
+      "step": 24364
+    },
+    {
+      "epoch": 0.24365,
+      "grad_norm": 1.3268581512786095,
+      "learning_rate": 0.003,
+      "loss": 3.9958,
+      "step": 24365
+    },
+    {
+      "epoch": 0.24366,
+      "grad_norm": 1.5048389428683266,
+      "learning_rate": 0.003,
+      "loss": 4.0153,
+      "step": 24366
+    },
+    {
+      "epoch": 0.24367,
+      "grad_norm": 1.1339578323430541,
+      "learning_rate": 0.003,
+      "loss": 4.0041,
+      "step": 24367
+    },
+    {
+      "epoch": 0.24368,
+      "grad_norm": 1.406364278069037,
+      "learning_rate": 0.003,
+      "loss": 4.0299,
+      "step": 24368
+    },
+    {
+      "epoch": 0.24369,
+      "grad_norm": 1.2613402679252856,
+      "learning_rate": 0.003,
+      "loss": 4.0151,
+      "step": 24369
+    },
+    {
+      "epoch": 0.2437,
+      "grad_norm": 1.3078550941344107,
+      "learning_rate": 0.003,
+      "loss": 3.976,
+      "step": 24370
+    },
+    {
+      "epoch": 0.24371,
+      "grad_norm": 1.1169504497228377,
+      "learning_rate": 0.003,
+      "loss": 4.0187,
+      "step": 24371
+    },
+    {
+      "epoch": 0.24372,
+      "grad_norm": 1.3223866962154005,
+      "learning_rate": 0.003,
+      "loss": 4.0115,
+      "step": 24372
+    },
+    {
+      "epoch": 0.24373,
+      "grad_norm": 1.1402993419498877,
+      "learning_rate": 0.003,
+      "loss": 4.0111,
+      "step": 24373
+    },
+    {
+      "epoch": 0.24374,
+      "grad_norm": 1.5073023453780978,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 24374
+    },
+    {
+      "epoch": 0.24375,
+      "grad_norm": 1.3130387017275593,
+      "learning_rate": 0.003,
+      "loss": 3.9961,
+      "step": 24375
+    },
+    {
+      "epoch": 0.24376,
+      "grad_norm": 1.197736562000046,
+      "learning_rate": 0.003,
+      "loss": 3.9788,
+      "step": 24376
+    },
+    {
+      "epoch": 0.24377,
+      "grad_norm": 1.3706528218008787,
+      "learning_rate": 0.003,
+      "loss": 3.9851,
+      "step": 24377
+    },
+    {
+      "epoch": 0.24378,
+      "grad_norm": 1.1471397951686368,
+      "learning_rate": 0.003,
+      "loss": 3.9685,
+      "step": 24378
+    },
+    {
+      "epoch": 0.24379,
+      "grad_norm": 1.2665475020333121,
+      "learning_rate": 0.003,
+      "loss": 3.9918,
+      "step": 24379
+    },
+    {
+      "epoch": 0.2438,
+      "grad_norm": 1.2405058567798786,
+      "learning_rate": 0.003,
+      "loss": 3.9958,
+      "step": 24380
+    },
+    {
+      "epoch": 0.24381,
+      "grad_norm": 1.2763989552852328,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 24381
+    },
+    {
+      "epoch": 0.24382,
+      "grad_norm": 1.2819422450417008,
+      "learning_rate": 0.003,
+      "loss": 4.0065,
+      "step": 24382
+    },
+    {
+      "epoch": 0.24383,
+      "grad_norm": 1.4062546736109662,
+      "learning_rate": 0.003,
+      "loss": 3.9852,
+      "step": 24383
+    },
+    {
+      "epoch": 0.24384,
+      "grad_norm": 1.1927112034912102,
+      "learning_rate": 0.003,
+      "loss": 4.0031,
+      "step": 24384
+    },
+    {
+      "epoch": 0.24385,
+      "grad_norm": 1.2721420137833226,
+      "learning_rate": 0.003,
+      "loss": 3.9851,
+      "step": 24385
+    },
+    {
+      "epoch": 0.24386,
+      "grad_norm": 1.2534050656995195,
+      "learning_rate": 0.003,
+      "loss": 4.0113,
+      "step": 24386
+    },
+    {
+      "epoch": 0.24387,
+      "grad_norm": 1.3244435530928822,
+      "learning_rate": 0.003,
+      "loss": 4.0019,
+      "step": 24387
+    },
+    {
+      "epoch": 0.24388,
+      "grad_norm": 1.2616847657069525,
+      "learning_rate": 0.003,
+      "loss": 3.9821,
+      "step": 24388
+    },
+    {
+      "epoch": 0.24389,
+      "grad_norm": 1.2271866068145514,
+      "learning_rate": 0.003,
+      "loss": 4.0098,
+      "step": 24389
+    },
+    {
+      "epoch": 0.2439,
+      "grad_norm": 1.3865900981505386,
+      "learning_rate": 0.003,
+      "loss": 3.9796,
+      "step": 24390
+    },
+    {
+      "epoch": 0.24391,
+      "grad_norm": 1.452410080119915,
+      "learning_rate": 0.003,
+      "loss": 3.986,
+      "step": 24391
+    },
+    {
+      "epoch": 0.24392,
+      "grad_norm": 1.1099791273002877,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 24392
+    },
+    {
+      "epoch": 0.24393,
+      "grad_norm": 1.395517868593229,
+      "learning_rate": 0.003,
+      "loss": 3.987,
+      "step": 24393
+    },
+    {
+      "epoch": 0.24394,
+      "grad_norm": 1.223442272491398,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 24394
+    },
+    {
+      "epoch": 0.24395,
+      "grad_norm": 1.5095400418472722,
+      "learning_rate": 0.003,
+      "loss": 3.9697,
+      "step": 24395
+    },
+    {
+      "epoch": 0.24396,
+      "grad_norm": 1.0740584995420714,
+      "learning_rate": 0.003,
+      "loss": 4.006,
+      "step": 24396
+    },
+    {
+      "epoch": 0.24397,
+      "grad_norm": 1.5712188433030259,
+      "learning_rate": 0.003,
+      "loss": 4.0375,
+      "step": 24397
+    },
+    {
+      "epoch": 0.24398,
+      "grad_norm": 0.9913594148643636,
+      "learning_rate": 0.003,
+      "loss": 3.9863,
+      "step": 24398
+    },
+    {
+      "epoch": 0.24399,
+      "grad_norm": 1.6322503106068742,
+      "learning_rate": 0.003,
+      "loss": 4.0269,
+      "step": 24399
+    },
+    {
+      "epoch": 0.244,
+      "grad_norm": 1.1321171926136195,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 24400
+    },
+    {
+      "epoch": 0.24401,
+      "grad_norm": 1.3767302861060162,
+      "learning_rate": 0.003,
+      "loss": 3.9798,
+      "step": 24401
+    },
+    {
+      "epoch": 0.24402,
+      "grad_norm": 1.1879700957527368,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 24402
+    },
+    {
+      "epoch": 0.24403,
+      "grad_norm": 1.5901289481262721,
+      "learning_rate": 0.003,
+      "loss": 4.0355,
+      "step": 24403
+    },
+    {
+      "epoch": 0.24404,
+      "grad_norm": 1.2849556050127822,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 24404
+    },
+    {
+      "epoch": 0.24405,
+      "grad_norm": 1.3608707343300581,
+      "learning_rate": 0.003,
+      "loss": 4.0061,
+      "step": 24405
+    },
+    {
+      "epoch": 0.24406,
+      "grad_norm": 1.3196568968819455,
+      "learning_rate": 0.003,
+      "loss": 4.0188,
+      "step": 24406
+    },
+    {
+      "epoch": 0.24407,
+      "grad_norm": 1.0128030447019938,
+      "learning_rate": 0.003,
+      "loss": 4.0061,
+      "step": 24407
+    },
+    {
+      "epoch": 0.24408,
+      "grad_norm": 1.298169699589074,
+      "learning_rate": 0.003,
+      "loss": 4.0098,
+      "step": 24408
+    },
+    {
+      "epoch": 0.24409,
+      "grad_norm": 1.1529502331112569,
+      "learning_rate": 0.003,
+      "loss": 3.9994,
+      "step": 24409
+    },
+    {
+      "epoch": 0.2441,
+      "grad_norm": 1.3561983998887073,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 24410
+    },
+    {
+      "epoch": 0.24411,
+      "grad_norm": 1.1545828102418174,
+      "learning_rate": 0.003,
+      "loss": 3.9906,
+      "step": 24411
+    },
+    {
+      "epoch": 0.24412,
+      "grad_norm": 1.2603649346267525,
+      "learning_rate": 0.003,
+      "loss": 3.9957,
+      "step": 24412
+    },
+    {
+      "epoch": 0.24413,
+      "grad_norm": 1.2729389148004777,
+      "learning_rate": 0.003,
+      "loss": 4.025,
+      "step": 24413
+    },
+    {
+      "epoch": 0.24414,
+      "grad_norm": 1.2361068807094853,
+      "learning_rate": 0.003,
+      "loss": 4.0277,
+      "step": 24414
+    },
+    {
+      "epoch": 0.24415,
+      "grad_norm": 1.2102621052864784,
+      "learning_rate": 0.003,
+      "loss": 3.9712,
+      "step": 24415
+    },
+    {
+      "epoch": 0.24416,
+      "grad_norm": 1.2536920599162082,
+      "learning_rate": 0.003,
+      "loss": 4.016,
+      "step": 24416
+    },
+    {
+      "epoch": 0.24417,
+      "grad_norm": 1.2354477919849793,
+      "learning_rate": 0.003,
+      "loss": 3.9877,
+      "step": 24417
+    },
+    {
+      "epoch": 0.24418,
+      "grad_norm": 1.1486667979070766,
+      "learning_rate": 0.003,
+      "loss": 3.9848,
+      "step": 24418
+    },
+    {
+      "epoch": 0.24419,
+      "grad_norm": 1.3142035597874264,
+      "learning_rate": 0.003,
+      "loss": 4.001,
+      "step": 24419
+    },
+    {
+      "epoch": 0.2442,
+      "grad_norm": 1.0881019481016025,
+      "learning_rate": 0.003,
+      "loss": 3.9965,
+      "step": 24420
+    },
+    {
+      "epoch": 0.24421,
+      "grad_norm": 1.6285304259604259,
+      "learning_rate": 0.003,
+      "loss": 4.002,
+      "step": 24421
+    },
+    {
+      "epoch": 0.24422,
+      "grad_norm": 1.0945482006505143,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 24422
+    },
+    {
+      "epoch": 0.24423,
+      "grad_norm": 1.4386910922882614,
+      "learning_rate": 0.003,
+      "loss": 3.9933,
+      "step": 24423
+    },
+    {
+      "epoch": 0.24424,
+      "grad_norm": 1.0749238964289267,
+      "learning_rate": 0.003,
+      "loss": 4.0151,
+      "step": 24424
+    },
+    {
+      "epoch": 0.24425,
+      "grad_norm": 1.601371586044003,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 24425
+    },
+    {
+      "epoch": 0.24426,
+      "grad_norm": 1.264147204571013,
+      "learning_rate": 0.003,
+      "loss": 4.0203,
+      "step": 24426
+    },
+    {
+      "epoch": 0.24427,
+      "grad_norm": 1.4660590848347255,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 24427
+    },
+    {
+      "epoch": 0.24428,
+      "grad_norm": 1.361533111769222,
+      "learning_rate": 0.003,
+      "loss": 4.0111,
+      "step": 24428
+    },
+    {
+      "epoch": 0.24429,
+      "grad_norm": 1.1054025979712803,
+      "learning_rate": 0.003,
+      "loss": 4.0014,
+      "step": 24429
+    },
+    {
+      "epoch": 0.2443,
+      "grad_norm": 1.607089699855232,
+      "learning_rate": 0.003,
+      "loss": 4.0287,
+      "step": 24430
+    },
+    {
+      "epoch": 0.24431,
+      "grad_norm": 1.0589506934600224,
+      "learning_rate": 0.003,
+      "loss": 4.0129,
+      "step": 24431
+    },
+    {
+      "epoch": 0.24432,
+      "grad_norm": 1.4425507614502244,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 24432
+    },
+    {
+      "epoch": 0.24433,
+      "grad_norm": 1.2536526237620638,
+      "learning_rate": 0.003,
+      "loss": 4.0266,
+      "step": 24433
+    },
+    {
+      "epoch": 0.24434,
+      "grad_norm": 1.2799014871333905,
+      "learning_rate": 0.003,
+      "loss": 3.9998,
+      "step": 24434
+    },
+    {
+      "epoch": 0.24435,
+      "grad_norm": 1.2087310362840358,
+      "learning_rate": 0.003,
+      "loss": 3.995,
+      "step": 24435
+    },
+    {
+      "epoch": 0.24436,
+      "grad_norm": 1.2589430404319766,
+      "learning_rate": 0.003,
+      "loss": 4.0048,
+      "step": 24436
+    },
+    {
+      "epoch": 0.24437,
+      "grad_norm": 1.3020743241944703,
+      "learning_rate": 0.003,
+      "loss": 3.9849,
+      "step": 24437
+    },
+    {
+      "epoch": 0.24438,
+      "grad_norm": 1.0350526192333778,
+      "learning_rate": 0.003,
+      "loss": 3.9833,
+      "step": 24438
+    },
+    {
+      "epoch": 0.24439,
+      "grad_norm": 1.613769720221129,
+      "learning_rate": 0.003,
+      "loss": 4.0056,
+      "step": 24439
+    },
+    {
+      "epoch": 0.2444,
+      "grad_norm": 0.9170389233384412,
+      "learning_rate": 0.003,
+      "loss": 4.0022,
+      "step": 24440
+    },
+    {
+      "epoch": 0.24441,
+      "grad_norm": 1.3503951688466662,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 24441
+    },
+    {
+      "epoch": 0.24442,
+      "grad_norm": 1.1689935574508825,
+      "learning_rate": 0.003,
+      "loss": 3.9964,
+      "step": 24442
+    },
+    {
+      "epoch": 0.24443,
+      "grad_norm": 1.4489234487660718,
+      "learning_rate": 0.003,
+      "loss": 4.0173,
+      "step": 24443
+    },
+    {
+      "epoch": 0.24444,
+      "grad_norm": 1.0872102385875815,
+      "learning_rate": 0.003,
+      "loss": 3.9742,
+      "step": 24444
+    },
+    {
+      "epoch": 0.24445,
+      "grad_norm": 1.3098726064245774,
+      "learning_rate": 0.003,
+      "loss": 4.0341,
+      "step": 24445
+    },
+    {
+      "epoch": 0.24446,
+      "grad_norm": 1.338901525415141,
+      "learning_rate": 0.003,
+      "loss": 4.0158,
+      "step": 24446
+    },
+    {
+      "epoch": 0.24447,
+      "grad_norm": 1.034534928980268,
+      "learning_rate": 0.003,
+      "loss": 3.9843,
+      "step": 24447
+    },
+    {
+      "epoch": 0.24448,
+      "grad_norm": 1.822390535620151,
+      "learning_rate": 0.003,
+      "loss": 3.9991,
+      "step": 24448
+    },
+    {
+      "epoch": 0.24449,
+      "grad_norm": 0.9124108929155023,
+      "learning_rate": 0.003,
+      "loss": 3.9926,
+      "step": 24449
+    },
+    {
+      "epoch": 0.2445,
+      "grad_norm": 1.2408020598965728,
+      "learning_rate": 0.003,
+      "loss": 3.9788,
+      "step": 24450
+    },
+    {
+      "epoch": 0.24451,
+      "grad_norm": 1.2732858985349713,
+      "learning_rate": 0.003,
+      "loss": 4.0168,
+      "step": 24451
+    },
+    {
+      "epoch": 0.24452,
+      "grad_norm": 1.0834995761354618,
+      "learning_rate": 0.003,
+      "loss": 4.0064,
+      "step": 24452
+    },
+    {
+      "epoch": 0.24453,
+      "grad_norm": 1.5144110650126403,
+      "learning_rate": 0.003,
+      "loss": 4.0311,
+      "step": 24453
+    },
+    {
+      "epoch": 0.24454,
+      "grad_norm": 1.1521916240709267,
+      "learning_rate": 0.003,
+      "loss": 4.0096,
+      "step": 24454
+    },
+    {
+      "epoch": 0.24455,
+      "grad_norm": 1.3092658707896614,
+      "learning_rate": 0.003,
+      "loss": 4.0017,
+      "step": 24455
+    },
+    {
+      "epoch": 0.24456,
+      "grad_norm": 1.3860268411626766,
+      "learning_rate": 0.003,
+      "loss": 4.0154,
+      "step": 24456
+    },
+    {
+      "epoch": 0.24457,
+      "grad_norm": 1.1689766898842917,
+      "learning_rate": 0.003,
+      "loss": 4.0008,
+      "step": 24457
+    },
+    {
+      "epoch": 0.24458,
+      "grad_norm": 1.4772332410144584,
+      "learning_rate": 0.003,
+      "loss": 4.0299,
+      "step": 24458
+    },
+    {
+      "epoch": 0.24459,
+      "grad_norm": 1.2304619562599264,
+      "learning_rate": 0.003,
+      "loss": 3.9903,
+      "step": 24459
+    },
+    {
+      "epoch": 0.2446,
+      "grad_norm": 1.309390676115892,
+      "learning_rate": 0.003,
+      "loss": 3.9941,
+      "step": 24460
+    },
+    {
+      "epoch": 0.24461,
+      "grad_norm": 1.2113454880787824,
+      "learning_rate": 0.003,
+      "loss": 4.0065,
+      "step": 24461
+    },
+    {
+      "epoch": 0.24462,
+      "grad_norm": 1.2904248793524917,
+      "learning_rate": 0.003,
+      "loss": 4.002,
+      "step": 24462
+    },
+    {
+      "epoch": 0.24463,
+      "grad_norm": 1.304548084444334,
+      "learning_rate": 0.003,
+      "loss": 4.0019,
+      "step": 24463
+    },
+    {
+      "epoch": 0.24464,
+      "grad_norm": 1.1189969015118677,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 24464
+    },
+    {
+      "epoch": 0.24465,
+      "grad_norm": 1.389772974590791,
+      "learning_rate": 0.003,
+      "loss": 3.9959,
+      "step": 24465
+    },
+    {
+      "epoch": 0.24466,
+      "grad_norm": 1.210061588011052,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 24466
+    },
+    {
+      "epoch": 0.24467,
+      "grad_norm": 1.3100128507491784,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 24467
+    },
+    {
+      "epoch": 0.24468,
+      "grad_norm": 1.1941964184232559,
+      "learning_rate": 0.003,
+      "loss": 3.9932,
+      "step": 24468
+    },
+    {
+      "epoch": 0.24469,
+      "grad_norm": 1.308705515543237,
+      "learning_rate": 0.003,
+      "loss": 4.0465,
+      "step": 24469
+    },
+    {
+      "epoch": 0.2447,
+      "grad_norm": 1.0195916220893464,
+      "learning_rate": 0.003,
+      "loss": 3.9782,
+      "step": 24470
+    },
+    {
+      "epoch": 0.24471,
+      "grad_norm": 1.6070817554026,
+      "learning_rate": 0.003,
+      "loss": 3.9948,
+      "step": 24471
+    },
+    {
+      "epoch": 0.24472,
+      "grad_norm": 1.0093747519472442,
+      "learning_rate": 0.003,
+      "loss": 3.9884,
+      "step": 24472
+    },
+    {
+      "epoch": 0.24473,
+      "grad_norm": 1.69713409622631,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 24473
+    },
+    {
+      "epoch": 0.24474,
+      "grad_norm": 1.1770981194343089,
+      "learning_rate": 0.003,
+      "loss": 4.001,
+      "step": 24474
+    },
+    {
+      "epoch": 0.24475,
+      "grad_norm": 1.230236205206841,
+      "learning_rate": 0.003,
+      "loss": 4.0089,
+      "step": 24475
+    },
+    {
+      "epoch": 0.24476,
+      "grad_norm": 1.3238783048504714,
+      "learning_rate": 0.003,
+      "loss": 4.0009,
+      "step": 24476
+    },
+    {
+      "epoch": 0.24477,
+      "grad_norm": 1.7297844209362898,
+      "learning_rate": 0.003,
+      "loss": 3.9978,
+      "step": 24477
+    },
+    {
+      "epoch": 0.24478,
+      "grad_norm": 1.176109623268336,
+      "learning_rate": 0.003,
+      "loss": 4.0126,
+      "step": 24478
+    },
+    {
+      "epoch": 0.24479,
+      "grad_norm": 1.373955092425568,
+      "learning_rate": 0.003,
+      "loss": 4.0109,
+      "step": 24479
+    },
+    {
+      "epoch": 0.2448,
+      "grad_norm": 1.124894114251988,
+      "learning_rate": 0.003,
+      "loss": 3.9781,
+      "step": 24480
+    },
+    {
+      "epoch": 0.24481,
+      "grad_norm": 1.2702195447978766,
+      "learning_rate": 0.003,
+      "loss": 3.9845,
+      "step": 24481
+    },
+    {
+      "epoch": 0.24482,
+      "grad_norm": 1.1609298837806605,
+      "learning_rate": 0.003,
+      "loss": 4.0018,
+      "step": 24482
+    },
+    {
+      "epoch": 0.24483,
+      "grad_norm": 1.389432765093146,
+      "learning_rate": 0.003,
+      "loss": 3.9743,
+      "step": 24483
+    },
+    {
+      "epoch": 0.24484,
+      "grad_norm": 1.1555440693822656,
+      "learning_rate": 0.003,
+      "loss": 4.0209,
+      "step": 24484
+    },
+    {
+      "epoch": 0.24485,
+      "grad_norm": 1.362078798919951,
+      "learning_rate": 0.003,
+      "loss": 3.9924,
+      "step": 24485
+    },
+    {
+      "epoch": 0.24486,
+      "grad_norm": 1.2181931020681083,
+      "learning_rate": 0.003,
+      "loss": 3.9953,
+      "step": 24486
+    },
+    {
+      "epoch": 0.24487,
+      "grad_norm": 1.240411429671414,
+      "learning_rate": 0.003,
+      "loss": 3.9925,
+      "step": 24487
+    },
+    {
+      "epoch": 0.24488,
+      "grad_norm": 1.181158430694127,
+      "learning_rate": 0.003,
+      "loss": 4.0282,
+      "step": 24488
+    },
+    {
+      "epoch": 0.24489,
+      "grad_norm": 1.3770575576881237,
+      "learning_rate": 0.003,
+      "loss": 4.0251,
+      "step": 24489
+    },
+    {
+      "epoch": 0.2449,
+      "grad_norm": 1.1912864239449947,
+      "learning_rate": 0.003,
+      "loss": 4.0151,
+      "step": 24490
+    },
+    {
+      "epoch": 0.24491,
+      "grad_norm": 1.1782499554532953,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 24491
+    },
+    {
+      "epoch": 0.24492,
+      "grad_norm": 1.3124209281722807,
+      "learning_rate": 0.003,
+      "loss": 4.0025,
+      "step": 24492
+    },
+    {
+      "epoch": 0.24493,
+      "grad_norm": 1.2408224119289284,
+      "learning_rate": 0.003,
+      "loss": 3.9737,
+      "step": 24493
+    },
+    {
+      "epoch": 0.24494,
+      "grad_norm": 1.4268146314175043,
+      "learning_rate": 0.003,
+      "loss": 3.9989,
+      "step": 24494
+    },
+    {
+      "epoch": 0.24495,
+      "grad_norm": 1.1266870655034145,
+      "learning_rate": 0.003,
+      "loss": 4.0093,
+      "step": 24495
+    },
+    {
+      "epoch": 0.24496,
+      "grad_norm": 1.3905254055904925,
+      "learning_rate": 0.003,
+      "loss": 4.0082,
+      "step": 24496
+    },
+    {
+      "epoch": 0.24497,
+      "grad_norm": 1.2704920317502815,
+      "learning_rate": 0.003,
+      "loss": 3.9944,
+      "step": 24497
+    },
+    {
+      "epoch": 0.24498,
+      "grad_norm": 1.1600406672321961,
+      "learning_rate": 0.003,
+      "loss": 4.0088,
+      "step": 24498
+    },
+    {
+      "epoch": 0.24499,
+      "grad_norm": 1.2503352133157692,
+      "learning_rate": 0.003,
+      "loss": 3.9721,
+      "step": 24499
+    },
+    {
+      "epoch": 0.245,
+      "grad_norm": 1.369644885154186,
+      "learning_rate": 0.003,
+      "loss": 3.992,
+      "step": 24500
+    },
+    {
+      "epoch": 0.24501,
+      "grad_norm": 1.0735186559545227,
+      "learning_rate": 0.003,
+      "loss": 3.9841,
+      "step": 24501
+    },
+    {
+      "epoch": 0.24502,
+      "grad_norm": 1.3026273523704313,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 24502
+    },
+    {
+      "epoch": 0.24503,
+      "grad_norm": 1.423704251697948,
+      "learning_rate": 0.003,
+      "loss": 3.9872,
+      "step": 24503
+    },
+    {
+      "epoch": 0.24504,
+      "grad_norm": 1.2419070689989116,
+      "learning_rate": 0.003,
+      "loss": 3.9895,
+      "step": 24504
+    },
+    {
+      "epoch": 0.24505,
+      "grad_norm": 1.285348758082936,
+      "learning_rate": 0.003,
+      "loss": 3.9785,
+      "step": 24505
+    },
+    {
+      "epoch": 0.24506,
+      "grad_norm": 1.4129311142429941,
+      "learning_rate": 0.003,
+      "loss": 4.0074,
+      "step": 24506
+    },
+    {
+      "epoch": 0.24507,
+      "grad_norm": 1.3375240857028547,
+      "learning_rate": 0.003,
+      "loss": 3.9982,
+      "step": 24507
+    },
+    {
+      "epoch": 0.24508,
+      "grad_norm": 1.1157070409357956,
+      "learning_rate": 0.003,
+      "loss": 3.971,
+      "step": 24508
+    },
+    {
+      "epoch": 0.24509,
+      "grad_norm": 1.2705513672455138,
+      "learning_rate": 0.003,
+      "loss": 3.9879,
+      "step": 24509
+    },
+    {
+      "epoch": 0.2451,
+      "grad_norm": 1.0732006513587,
+      "learning_rate": 0.003,
+      "loss": 3.9828,
+      "step": 24510
+    },
+    {
+      "epoch": 0.24511,
+      "grad_norm": 1.4601980972998414,
+      "learning_rate": 0.003,
+      "loss": 4.0021,
+      "step": 24511
+    },
+    {
+      "epoch": 0.24512,
+      "grad_norm": 1.0790293292743436,
+      "learning_rate": 0.003,
+      "loss": 4.0002,
+      "step": 24512
+    },
+    {
+      "epoch": 0.24513,
+      "grad_norm": 1.6412413349277297,
+      "learning_rate": 0.003,
+      "loss": 4.0019,
+      "step": 24513
+    },
+    {
+      "epoch": 0.24514,
+      "grad_norm": 1.314448730278494,
+      "learning_rate": 0.003,
+      "loss": 4.0416,
+      "step": 24514
+    },
+    {
+      "epoch": 0.24515,
+      "grad_norm": 1.3512280096730391,
+      "learning_rate": 0.003,
+      "loss": 4.0044,
+      "step": 24515
+    },
+    {
+      "epoch": 0.24516,
+      "grad_norm": 1.3611627149487544,
+      "learning_rate": 0.003,
+      "loss": 3.9949,
+      "step": 24516
+    },
+    {
+      "epoch": 0.24517,
+      "grad_norm": 1.2213263879840819,
+      "learning_rate": 0.003,
+      "loss": 3.9898,
+      "step": 24517
+    },
+    {
+      "epoch": 0.24518,
+      "grad_norm": 1.1371680851822656,
+      "learning_rate": 0.003,
+      "loss": 3.9957,
+      "step": 24518
+    },
+    {
+      "epoch": 0.24519,
+      "grad_norm": 1.2458782152889503,
+      "learning_rate": 0.003,
+      "loss": 3.9918,
+      "step": 24519
+    },
+    {
+      "epoch": 0.2452,
+      "grad_norm": 1.4081830129503665,
+      "learning_rate": 0.003,
+      "loss": 4.01,
+      "step": 24520
+    },
+    {
+      "epoch": 0.24521,
+      "grad_norm": 1.0482776221555647,
+      "learning_rate": 0.003,
+      "loss": 3.9957,
+      "step": 24521
+    },
+    {
+      "epoch": 0.24522,
+      "grad_norm": 1.197475730869575,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 24522
+    },
+    {
+      "epoch": 0.24523,
+      "grad_norm": 1.1650905666249265,
+      "learning_rate": 0.003,
+      "loss": 3.9846,
+      "step": 24523
+    },
+    {
+      "epoch": 0.24524,
+      "grad_norm": 1.3532240837994298,
+      "learning_rate": 0.003,
+      "loss": 4.0096,
+      "step": 24524
+    },
+    {
+      "epoch": 0.24525,
+      "grad_norm": 1.2558574149030357,
+      "learning_rate": 0.003,
+      "loss": 4.0019,
+      "step": 24525
+    },
+    {
+      "epoch": 0.24526,
+      "grad_norm": 1.6444831127005999,
+      "learning_rate": 0.003,
+      "loss": 3.9933,
+      "step": 24526
+    },
+    {
+      "epoch": 0.24527,
+      "grad_norm": 1.133678187830215,
+      "learning_rate": 0.003,
+      "loss": 4.0264,
+      "step": 24527
+    },
+    {
+      "epoch": 0.24528,
+      "grad_norm": 1.4383648726986396,
+      "learning_rate": 0.003,
+      "loss": 4.0014,
+      "step": 24528
+    },
+    {
+      "epoch": 0.24529,
+      "grad_norm": 1.2718814167572745,
+      "learning_rate": 0.003,
+      "loss": 4.008,
+      "step": 24529
+    },
+    {
+      "epoch": 0.2453,
+      "grad_norm": 1.2477328920504283,
+      "learning_rate": 0.003,
+      "loss": 3.9879,
+      "step": 24530
+    },
+    {
+      "epoch": 0.24531,
+      "grad_norm": 1.408256452920792,
+      "learning_rate": 0.003,
+      "loss": 3.9671,
+      "step": 24531
+    },
+    {
+      "epoch": 0.24532,
+      "grad_norm": 1.1492057031083291,
+      "learning_rate": 0.003,
+      "loss": 4.0017,
+      "step": 24532
+    },
+    {
+      "epoch": 0.24533,
+      "grad_norm": 1.369057819754174,
+      "learning_rate": 0.003,
+      "loss": 3.9799,
+      "step": 24533
+    },
+    {
+      "epoch": 0.24534,
+      "grad_norm": 1.401522661203297,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 24534
+    },
+    {
+      "epoch": 0.24535,
+      "grad_norm": 1.1862726772169272,
+      "learning_rate": 0.003,
+      "loss": 4.0006,
+      "step": 24535
+    },
+    {
+      "epoch": 0.24536,
+      "grad_norm": 1.2185082581522924,
+      "learning_rate": 0.003,
+      "loss": 3.9944,
+      "step": 24536
+    },
+    {
+      "epoch": 0.24537,
+      "grad_norm": 1.219255272832426,
+      "learning_rate": 0.003,
+      "loss": 3.9909,
+      "step": 24537
+    },
+    {
+      "epoch": 0.24538,
+      "grad_norm": 1.5179490647095155,
+      "learning_rate": 0.003,
+      "loss": 3.9616,
+      "step": 24538
+    },
+    {
+      "epoch": 0.24539,
+      "grad_norm": 1.163424682689765,
+      "learning_rate": 0.003,
+      "loss": 3.9896,
+      "step": 24539
+    },
+    {
+      "epoch": 0.2454,
+      "grad_norm": 1.2817318303185041,
+      "learning_rate": 0.003,
+      "loss": 4.0035,
+      "step": 24540
+    },
+    {
+      "epoch": 0.24541,
+      "grad_norm": 0.9420540565277724,
+      "learning_rate": 0.003,
+      "loss": 3.9974,
+      "step": 24541
+    },
+    {
+      "epoch": 0.24542,
+      "grad_norm": 1.4018311995444985,
+      "learning_rate": 0.003,
+      "loss": 4.0104,
+      "step": 24542
+    },
+    {
+      "epoch": 0.24543,
+      "grad_norm": 1.2580613173194635,
+      "learning_rate": 0.003,
+      "loss": 4.0134,
+      "step": 24543
+    },
+    {
+      "epoch": 0.24544,
+      "grad_norm": 1.437851300333455,
+      "learning_rate": 0.003,
+      "loss": 4.0323,
+      "step": 24544
+    },
+    {
+      "epoch": 0.24545,
+      "grad_norm": 1.2114115888413228,
+      "learning_rate": 0.003,
+      "loss": 3.9897,
+      "step": 24545
+    },
+    {
+      "epoch": 0.24546,
+      "grad_norm": 1.2623818232419044,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 24546
+    },
+    {
+      "epoch": 0.24547,
+      "grad_norm": 1.4984936596595229,
+      "learning_rate": 0.003,
+      "loss": 4.0329,
+      "step": 24547
+    },
+    {
+      "epoch": 0.24548,
+      "grad_norm": 1.0999440684068809,
+      "learning_rate": 0.003,
+      "loss": 4.0024,
+      "step": 24548
+    },
+    {
+      "epoch": 0.24549,
+      "grad_norm": 1.6448288005632892,
+      "learning_rate": 0.003,
+      "loss": 4.0079,
+      "step": 24549
+    },
+    {
+      "epoch": 0.2455,
+      "grad_norm": 0.9018576442833915,
+      "learning_rate": 0.003,
+      "loss": 3.9855,
+      "step": 24550
+    },
+    {
+      "epoch": 0.24551,
+      "grad_norm": 1.2194648901281933,
+      "learning_rate": 0.003,
+      "loss": 4.0069,
+      "step": 24551
+    },
+    {
+      "epoch": 0.24552,
+      "grad_norm": 1.1394659303795138,
+      "learning_rate": 0.003,
+      "loss": 3.9994,
+      "step": 24552
+    },
+    {
+      "epoch": 0.24553,
+      "grad_norm": 1.3419129356567934,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 24553
+    },
+    {
+      "epoch": 0.24554,
+      "grad_norm": 1.3201990573418674,
+      "learning_rate": 0.003,
+      "loss": 4.0336,
+      "step": 24554
+    },
+    {
+      "epoch": 0.24555,
+      "grad_norm": 1.2580154092506968,
+      "learning_rate": 0.003,
+      "loss": 3.9935,
+      "step": 24555
+    },
+    {
+      "epoch": 0.24556,
+      "grad_norm": 1.2812275573351946,
+      "learning_rate": 0.003,
+      "loss": 4.0016,
+      "step": 24556
+    },
+    {
+      "epoch": 0.24557,
+      "grad_norm": 1.2925600126482795,
+      "learning_rate": 0.003,
+      "loss": 3.9683,
+      "step": 24557
+    },
+    {
+      "epoch": 0.24558,
+      "grad_norm": 1.487371113976635,
+      "learning_rate": 0.003,
+      "loss": 4.0255,
+      "step": 24558
+    },
+    {
+      "epoch": 0.24559,
+      "grad_norm": 1.1025350485512706,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 24559
+    },
+    {
+      "epoch": 0.2456,
+      "grad_norm": 1.3954429475575116,
+      "learning_rate": 0.003,
+      "loss": 4.0426,
+      "step": 24560
+    },
+    {
+      "epoch": 0.24561,
+      "grad_norm": 1.0460054064259876,
+      "learning_rate": 0.003,
+      "loss": 3.9734,
+      "step": 24561
+    },
+    {
+      "epoch": 0.24562,
+      "grad_norm": 1.3645304422715374,
+      "learning_rate": 0.003,
+      "loss": 4.0005,
+      "step": 24562
+    },
+    {
+      "epoch": 0.24563,
+      "grad_norm": 1.061585556252094,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 24563
+    },
+    {
+      "epoch": 0.24564,
+      "grad_norm": 1.184292909063992,
+      "learning_rate": 0.003,
+      "loss": 3.9871,
+      "step": 24564
+    },
+    {
+      "epoch": 0.24565,
+      "grad_norm": 1.125124395888999,
+      "learning_rate": 0.003,
+      "loss": 3.9935,
+      "step": 24565
+    },
+    {
+      "epoch": 0.24566,
+      "grad_norm": 1.4158312503414552,
+      "learning_rate": 0.003,
+      "loss": 3.9986,
+      "step": 24566
+    },
+    {
+      "epoch": 0.24567,
+      "grad_norm": 1.346947364425602,
+      "learning_rate": 0.003,
+      "loss": 4.0118,
+      "step": 24567
+    },
+    {
+      "epoch": 0.24568,
+      "grad_norm": 1.2229252099783257,
+      "learning_rate": 0.003,
+      "loss": 3.9983,
+      "step": 24568
+    },
+    {
+      "epoch": 0.24569,
+      "grad_norm": 1.4765092301188953,
+      "learning_rate": 0.003,
+      "loss": 3.9761,
+      "step": 24569
+    },
+    {
+      "epoch": 0.2457,
+      "grad_norm": 1.2310452922461506,
+      "learning_rate": 0.003,
+      "loss": 4.0128,
+      "step": 24570
+    },
+    {
+      "epoch": 0.24571,
+      "grad_norm": 1.364300757269087,
+      "learning_rate": 0.003,
+      "loss": 4.029,
+      "step": 24571
+    },
+    {
+      "epoch": 0.24572,
+      "grad_norm": 1.1422795307189637,
+      "learning_rate": 0.003,
+      "loss": 3.9977,
+      "step": 24572
+    },
+    {
+      "epoch": 0.24573,
+      "grad_norm": 1.3796235722620354,
+      "learning_rate": 0.003,
+      "loss": 4.0071,
+      "step": 24573
+    },
+    {
+      "epoch": 0.24574,
+      "grad_norm": 1.0977449625933147,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 24574
+    },
+    {
+      "epoch": 0.24575,
+      "grad_norm": 1.3240354983035647,
+      "learning_rate": 0.003,
+      "loss": 3.9923,
+      "step": 24575
+    },
+    {
+      "epoch": 0.24576,
+      "grad_norm": 1.0579597764118023,
+      "learning_rate": 0.003,
+      "loss": 3.9947,
+      "step": 24576
+    },
+    {
+      "epoch": 0.24577,
+      "grad_norm": 1.3234379744052676,
+      "learning_rate": 0.003,
+      "loss": 4.016,
+      "step": 24577
+    },
+    {
+      "epoch": 0.24578,
+      "grad_norm": 1.1933931453837714,
+      "learning_rate": 0.003,
+      "loss": 3.9946,
+      "step": 24578
+    },
+    {
+      "epoch": 0.24579,
+      "grad_norm": 1.2636363163217763,
+      "learning_rate": 0.003,
+      "loss": 3.9977,
+      "step": 24579
+    },
+    {
+      "epoch": 0.2458,
+      "grad_norm": 1.2662381245714782,
+      "learning_rate": 0.003,
+      "loss": 3.9891,
+      "step": 24580
+    },
+    {
+      "epoch": 0.24581,
+      "grad_norm": 1.1194897724760686,
+      "learning_rate": 0.003,
+      "loss": 3.9927,
+      "step": 24581
+    },
+    {
+      "epoch": 0.24582,
+      "grad_norm": 1.4762857320179332,
+      "learning_rate": 0.003,
+      "loss": 4.0512,
+      "step": 24582
+    },
+    {
+      "epoch": 0.24583,
+      "grad_norm": 1.298757407592655,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 24583
+    },
+    {
+      "epoch": 0.24584,
+      "grad_norm": 1.5973441344607289,
+      "learning_rate": 0.003,
+      "loss": 4.03,
+      "step": 24584
+    },
+    {
+      "epoch": 0.24585,
+      "grad_norm": 1.1223455472749755,
+      "learning_rate": 0.003,
+      "loss": 4.0361,
+      "step": 24585
+    },
+    {
+      "epoch": 0.24586,
+      "grad_norm": 1.1470665467696848,
+      "learning_rate": 0.003,
+      "loss": 4.0086,
+      "step": 24586
+    },
+    {
+      "epoch": 0.24587,
+      "grad_norm": 1.412875845681809,
+      "learning_rate": 0.003,
+      "loss": 4.007,
+      "step": 24587
+    },
+    {
+      "epoch": 0.24588,
+      "grad_norm": 1.1770124407006441,
+      "learning_rate": 0.003,
+      "loss": 3.9833,
+      "step": 24588
+    },
+    {
+      "epoch": 0.24589,
+      "grad_norm": 1.2112001023809325,
+      "learning_rate": 0.003,
+      "loss": 3.971,
+      "step": 24589
+    },
+    {
+      "epoch": 0.2459,
+      "grad_norm": 1.1961615396078393,
+      "learning_rate": 0.003,
+      "loss": 4.007,
+      "step": 24590
+    },
+    {
+      "epoch": 0.24591,
+      "grad_norm": 1.2519526500479183,
+      "learning_rate": 0.003,
+      "loss": 4.0005,
+      "step": 24591
+    },
+    {
+      "epoch": 0.24592,
+      "grad_norm": 1.4227788972555007,
+      "learning_rate": 0.003,
+      "loss": 4.019,
+      "step": 24592
+    },
+    {
+      "epoch": 0.24593,
+      "grad_norm": 1.2738424858524824,
+      "learning_rate": 0.003,
+      "loss": 4.0214,
+      "step": 24593
+    },
+    {
+      "epoch": 0.24594,
+      "grad_norm": 1.2670364087975747,
+      "learning_rate": 0.003,
+      "loss": 4.0029,
+      "step": 24594
+    },
+    {
+      "epoch": 0.24595,
+      "grad_norm": 1.3074508756614147,
+      "learning_rate": 0.003,
+      "loss": 3.9965,
+      "step": 24595
+    },
+    {
+      "epoch": 0.24596,
+      "grad_norm": 1.215218656124111,
+      "learning_rate": 0.003,
+      "loss": 4.0032,
+      "step": 24596
+    },
+    {
+      "epoch": 0.24597,
+      "grad_norm": 1.478268394064855,
+      "learning_rate": 0.003,
+      "loss": 4.0356,
+      "step": 24597
+    },
+    {
+      "epoch": 0.24598,
+      "grad_norm": 1.2231255134123205,
+      "learning_rate": 0.003,
+      "loss": 4.0028,
+      "step": 24598
+    },
+    {
+      "epoch": 0.24599,
+      "grad_norm": 1.421576465437636,
+      "learning_rate": 0.003,
+      "loss": 3.9766,
+      "step": 24599
+    },
+    {
+      "epoch": 0.246,
+      "grad_norm": 1.3500851628892074,
+      "learning_rate": 0.003,
+      "loss": 3.9658,
+      "step": 24600
+    },
+    {
+      "epoch": 0.24601,
+      "grad_norm": 1.4312364743226715,
+      "learning_rate": 0.003,
+      "loss": 4.0099,
+      "step": 24601
+    },
+    {
+      "epoch": 0.24602,
+      "grad_norm": 1.2356717794140983,
+      "learning_rate": 0.003,
+      "loss": 4.0039,
+      "step": 24602
+    },
+    {
+      "epoch": 0.24603,
+      "grad_norm": 1.1336612457017279,
+      "learning_rate": 0.003,
+      "loss": 3.9704,
+      "step": 24603
+    },
+    {
+      "epoch": 0.24604,
+      "grad_norm": 1.2941876663492768,
+      "learning_rate": 0.003,
+      "loss": 3.9905,
+      "step": 24604
+    },
+    {
+      "epoch": 0.24605,
+      "grad_norm": 1.244908761009941,
+      "learning_rate": 0.003,
+      "loss": 4.0091,
+      "step": 24605
+    },
+    {
+      "epoch": 0.24606,
+      "grad_norm": 1.268072967591686,
+      "learning_rate": 0.003,
+      "loss": 3.9757,
+      "step": 24606
+    },
+    {
+      "epoch": 0.24607,
+      "grad_norm": 1.3442752716281736,
+      "learning_rate": 0.003,
+      "loss": 4.0205,
+      "step": 24607
+    },
+    {
+      "epoch": 0.24608,
+      "grad_norm": 1.1324212796888171,
+      "learning_rate": 0.003,
+      "loss": 4.0074,
+      "step": 24608
+    },
+    {
+      "epoch": 0.24609,
+      "grad_norm": 1.227788441114899,
+      "learning_rate": 0.003,
+      "loss": 3.9814,
+      "step": 24609
+    },
+    {
+      "epoch": 0.2461,
+      "grad_norm": 1.3017630716193305,
+      "learning_rate": 0.003,
+      "loss": 3.9858,
+      "step": 24610
+    },
+    {
+      "epoch": 0.24611,
+      "grad_norm": 1.3561549928484509,
+      "learning_rate": 0.003,
+      "loss": 3.9722,
+      "step": 24611
+    },
+    {
+      "epoch": 0.24612,
+      "grad_norm": 1.560212527903966,
+      "learning_rate": 0.003,
+      "loss": 3.9767,
+      "step": 24612
+    },
+    {
+      "epoch": 0.24613,
+      "grad_norm": 1.2236228667992388,
+      "learning_rate": 0.003,
+      "loss": 4.0086,
+      "step": 24613
+    },
+    {
+      "epoch": 0.24614,
+      "grad_norm": 1.249680377062008,
+      "learning_rate": 0.003,
+      "loss": 3.9889,
+      "step": 24614
+    },
+    {
+      "epoch": 0.24615,
+      "grad_norm": 1.2857542939928213,
+      "learning_rate": 0.003,
+      "loss": 3.9924,
+      "step": 24615
+    },
+    {
+      "epoch": 0.24616,
+      "grad_norm": 1.2304845537166347,
+      "learning_rate": 0.003,
+      "loss": 4.0063,
+      "step": 24616
+    },
+    {
+      "epoch": 0.24617,
+      "grad_norm": 1.2694328843493385,
+      "learning_rate": 0.003,
+      "loss": 3.993,
+      "step": 24617
+    },
+    {
+      "epoch": 0.24618,
+      "grad_norm": 1.4108939244196412,
+      "learning_rate": 0.003,
+      "loss": 4.0083,
+      "step": 24618
+    },
+    {
+      "epoch": 0.24619,
+      "grad_norm": 1.2315563654455357,
+      "learning_rate": 0.003,
+      "loss": 4.0051,
+      "step": 24619
+    },
+    {
+      "epoch": 0.2462,
+      "grad_norm": 1.5787837354955057,
+      "learning_rate": 0.003,
+      "loss": 4.0169,
+      "step": 24620
+    },
+    {
+      "epoch": 0.24621,
+      "grad_norm": 1.1914516991164639,
+      "learning_rate": 0.003,
+      "loss": 4.0223,
+      "step": 24621
+    },
+    {
+      "epoch": 0.24622,
+      "grad_norm": 1.2351897390461064,
+      "learning_rate": 0.003,
+      "loss": 3.9818,
+      "step": 24622
+    },
+    {
+      "epoch": 0.24623,
+      "grad_norm": 1.4150402269769844,
+      "learning_rate": 0.003,
+      "loss": 3.9972,
+      "step": 24623
+    },
+    {
+      "epoch": 0.24624,
+      "grad_norm": 1.2510689537351796,
+      "learning_rate": 0.003,
+      "loss": 3.9912,
+      "step": 24624
+    },
+    {
+      "epoch": 0.24625,
+      "grad_norm": 1.3422137124531885,
+      "learning_rate": 0.003,
+      "loss": 4.0047,
+      "step": 24625
+    },
+    {
+      "epoch": 0.24626,
+      "grad_norm": 1.411009099589323,
+      "learning_rate": 0.003,
+      "loss": 3.9715,
+      "step": 24626
+    },
+    {
+      "epoch": 0.24627,
+      "grad_norm": 1.2324799071058952,
+      "learning_rate": 0.003,
+      "loss": 3.9912,
+      "step": 24627
+    },
+    {
+      "epoch": 0.24628,
+      "grad_norm": 1.346467316647814,
+      "learning_rate": 0.003,
+      "loss": 4.0039,
+      "step": 24628
+    },
+    {
+      "epoch": 0.24629,
+      "grad_norm": 1.215171737310081,
+      "learning_rate": 0.003,
+      "loss": 3.9881,
+      "step": 24629
+    },
+    {
+      "epoch": 0.2463,
+      "grad_norm": 1.4533512593478701,
+      "learning_rate": 0.003,
+      "loss": 3.9748,
+      "step": 24630
+    },
+    {
+      "epoch": 0.24631,
+      "grad_norm": 1.0023167948491618,
+      "learning_rate": 0.003,
+      "loss": 3.991,
+      "step": 24631
+    },
+    {
+      "epoch": 0.24632,
+      "grad_norm": 1.2723511155493927,
+      "learning_rate": 0.003,
+      "loss": 3.9706,
+      "step": 24632
+    },
+    {
+      "epoch": 0.24633,
+      "grad_norm": 1.0529819141510273,
+      "learning_rate": 0.003,
+      "loss": 3.9873,
+      "step": 24633
+    },
+    {
+      "epoch": 0.24634,
+      "grad_norm": 1.4957611325375222,
+      "learning_rate": 0.003,
+      "loss": 3.9746,
+      "step": 24634
+    },
+    {
+      "epoch": 0.24635,
+      "grad_norm": 0.939000968052872,
+      "learning_rate": 0.003,
+      "loss": 4.0111,
+      "step": 24635
+    },
+    {
+      "epoch": 0.24636,
+      "grad_norm": 1.3301545619991126,
+      "learning_rate": 0.003,
+      "loss": 4.0039,
+      "step": 24636
+    },
+    {
+      "epoch": 0.24637,
+      "grad_norm": 1.3104883390547468,
+      "learning_rate": 0.003,
+      "loss": 3.9913,
+      "step": 24637
+    },
+    {
+      "epoch": 0.24638,
+      "grad_norm": 1.4847929035333716,
+      "learning_rate": 0.003,
+      "loss": 3.9929,
+      "step": 24638
+    },
+    {
+      "epoch": 0.24639,
+      "grad_norm": 1.2236463408822666,
+      "learning_rate": 0.003,
+      "loss": 3.9808,
+      "step": 24639
+    },
+    {
+      "epoch": 0.2464,
+      "grad_norm": 1.252220237899825,
+      "learning_rate": 0.003,
+      "loss": 3.979,
+      "step": 24640
+    },
+    {
+      "epoch": 0.24641,
+      "grad_norm": 1.2406049485693091,
+      "learning_rate": 0.003,
+      "loss": 4.0139,
+      "step": 24641
+    },
+    {
+      "epoch": 0.24642,
+      "grad_norm": 1.1164121013398498,
+      "learning_rate": 0.003,
+      "loss": 3.9965,
+      "step": 24642
+    },
+    {
+      "epoch": 0.24643,
+      "grad_norm": 1.3662341308430657,
+      "learning_rate": 0.003,
+      "loss": 4.0346,
+      "step": 24643
+    },
+    {
+      "epoch": 0.24644,
+      "grad_norm": 0.9813241759048587,
+      "learning_rate": 0.003,
+      "loss": 3.9881,
+      "step": 24644
+    },
+    {
+      "epoch": 0.24645,
+      "grad_norm": 1.5167245491466705,
+      "learning_rate": 0.003,
+      "loss": 4.0098,
+      "step": 24645
+    },
+    {
+      "epoch": 0.24646,
+      "grad_norm": 1.1735627610234423,
+      "learning_rate": 0.003,
+      "loss": 3.9976,
+      "step": 24646
+    },
+    {
+      "epoch": 0.24647,
+      "grad_norm": 1.318700530122228,
+      "learning_rate": 0.003,
+      "loss": 4.0015,
+      "step": 24647
+    },
+    {
+      "epoch": 0.24648,
+      "grad_norm": 1.2440487818748398,
+      "learning_rate": 0.003,
+      "loss": 4.0059,
+      "step": 24648
+    },
+    {
+      "epoch": 0.24649,
+      "grad_norm": 1.574597883591368,
+      "learning_rate": 0.003,
+      "loss": 4.0058,
+      "step": 24649
+    },
+    {
+      "epoch": 0.2465,
+      "grad_norm": 1.1903268396552567,
+      "learning_rate": 0.003,
+      "loss": 4.0027,
+      "step": 24650
+    },
+    {
+      "epoch": 0.24651,
+      "grad_norm": 1.543438089793802,
+      "learning_rate": 0.003,
+      "loss": 3.9817,
+      "step": 24651
+    },
+    {
+      "epoch": 0.24652,
+      "grad_norm": 1.4121029372690372,
+      "learning_rate": 0.003,
+      "loss": 3.9889,
+      "step": 24652
+    },
+    {
+      "epoch": 0.24653,
+      "grad_norm": 1.288286124340375,
+      "learning_rate": 0.003,
+      "loss": 3.9866,
+      "step": 24653
+    },
+    {
+      "epoch": 0.24654,
+      "grad_norm": 1.034673341108081,
+      "learning_rate": 0.003,
+      "loss": 4.0238,
+      "step": 24654
+    },
+    {
+      "epoch": 0.24655,
+      "grad_norm": 1.5053082760957677,
+      "learning_rate": 0.003,
+      "loss": 4.046,
+      "step": 24655
+    },
+    {
+      "epoch": 0.24656,
+      "grad_norm": 1.1412355825018874,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 24656
+    },
+    {
+      "epoch": 0.24657,
+      "grad_norm": 1.370928221612757,
+      "learning_rate": 0.003,
+      "loss": 3.9857,
+      "step": 24657
+    },
+    {
+      "epoch": 0.24658,
+      "grad_norm": 1.117289832037451,
+      "learning_rate": 0.003,
+      "loss": 3.991,
+      "step": 24658
+    },
+    {
+      "epoch": 0.24659,
+      "grad_norm": 1.3381013297036435,
+      "learning_rate": 0.003,
+      "loss": 3.9954,
+      "step": 24659
+    },
+    {
+      "epoch": 0.2466,
+      "grad_norm": 1.111318051470423,
+      "learning_rate": 0.003,
+      "loss": 4.0048,
+      "step": 24660
+    },
+    {
+      "epoch": 0.24661,
+      "grad_norm": 1.6080263304074163,
+      "learning_rate": 0.003,
+      "loss": 3.9881,
+      "step": 24661
+    },
+    {
+      "epoch": 0.24662,
+      "grad_norm": 1.129315054342282,
+      "learning_rate": 0.003,
+      "loss": 3.973,
+      "step": 24662
+    },
+    {
+      "epoch": 0.24663,
+      "grad_norm": 1.3811289546712864,
+      "learning_rate": 0.003,
+      "loss": 4.0021,
+      "step": 24663
+    },
+    {
+      "epoch": 0.24664,
+      "grad_norm": 1.307843823927585,
+      "learning_rate": 0.003,
+      "loss": 4.002,
+      "step": 24664
+    },
+    {
+      "epoch": 0.24665,
+      "grad_norm": 1.0816513454227719,
+      "learning_rate": 0.003,
+      "loss": 3.9892,
+      "step": 24665
+    },
+    {
+      "epoch": 0.24666,
+      "grad_norm": 1.4668083650688304,
+      "learning_rate": 0.003,
+      "loss": 4.0034,
+      "step": 24666
+    },
+    {
+      "epoch": 0.24667,
+      "grad_norm": 1.148674567251029,
+      "learning_rate": 0.003,
+      "loss": 3.9935,
+      "step": 24667
+    },
+    {
+      "epoch": 0.24668,
+      "grad_norm": 1.379267112917855,
+      "learning_rate": 0.003,
+      "loss": 4.0081,
+      "step": 24668
+    },
+    {
+      "epoch": 0.24669,
+      "grad_norm": 1.2382739544890313,
+      "learning_rate": 0.003,
+      "loss": 4.0029,
+      "step": 24669
+    },
+    {
+      "epoch": 0.2467,
+      "grad_norm": 1.2879716938350132,
+      "learning_rate": 0.003,
+      "loss": 3.9824,
+      "step": 24670
+    },
+    {
+      "epoch": 0.24671,
+      "grad_norm": 1.3697348966662908,
+      "learning_rate": 0.003,
+      "loss": 4.0007,
+      "step": 24671
+    },
+    {
+      "epoch": 0.24672,
+      "grad_norm": 0.9681121932216001,
+      "learning_rate": 0.003,
+      "loss": 4.0354,
+      "step": 24672
+    },
+    {
+      "epoch": 0.24673,
+      "grad_norm": 1.3648075189460367,
+      "learning_rate": 0.003,
+      "loss": 3.9923,
+      "step": 24673
+    },
+    {
+      "epoch": 0.24674,
+      "grad_norm": 1.1897306529745113,
+      "learning_rate": 0.003,
+      "loss": 4.0079,
+      "step": 24674
+    },
+    {
+      "epoch": 0.24675,
+      "grad_norm": 1.3939470498291302,
+      "learning_rate": 0.003,
+      "loss": 4.0174,
+      "step": 24675
+    },
+    {
+      "epoch": 0.24676,
+      "grad_norm": 1.388461956446603,
+      "learning_rate": 0.003,
+      "loss": 4.0202,
+      "step": 24676
+    },
+    {
+      "epoch": 0.24677,
+      "grad_norm": 1.2110177841607612,
+      "learning_rate": 0.003,
+      "loss": 4.0248,
+      "step": 24677
+    },
+    {
+      "epoch": 0.24678,
+      "grad_norm": 1.3740443016258956,
+      "learning_rate": 0.003,
+      "loss": 4.0014,
+      "step": 24678
+    },
+    {
+      "epoch": 0.24679,
+      "grad_norm": 1.1637022029701953,
+      "learning_rate": 0.003,
+      "loss": 4.0018,
+      "step": 24679
+    },
+    {
+      "epoch": 0.2468,
+      "grad_norm": 1.2642799061699437,
+      "learning_rate": 0.003,
+      "loss": 3.9866,
+      "step": 24680
+    },
+    {
+      "epoch": 0.24681,
+      "grad_norm": 1.2130351144366158,
+      "learning_rate": 0.003,
+      "loss": 3.9892,
+      "step": 24681
+    },
+    {
+      "epoch": 0.24682,
+      "grad_norm": 1.3904717767109291,
+      "learning_rate": 0.003,
+      "loss": 3.9819,
+      "step": 24682
+    },
+    {
+      "epoch": 0.24683,
+      "grad_norm": 1.157820231661598,
+      "learning_rate": 0.003,
+      "loss": 4.0004,
+      "step": 24683
+    },
+    {
+      "epoch": 0.24684,
+      "grad_norm": 1.3822265669542249,
+      "learning_rate": 0.003,
+      "loss": 4.0018,
+      "step": 24684
+    },
+    {
+      "epoch": 0.24685,
+      "grad_norm": 1.3087600251322966,
+      "learning_rate": 0.003,
+      "loss": 3.9768,
+      "step": 24685
+    },
+    {
+      "epoch": 0.24686,
+      "grad_norm": 1.0570031173666607,
+      "learning_rate": 0.003,
+      "loss": 4.003,
+      "step": 24686
+    },
+    {
+      "epoch": 0.24687,
+      "grad_norm": 1.415435006666779,
+      "learning_rate": 0.003,
+      "loss": 4.0175,
+      "step": 24687
+    },
+    {
+      "epoch": 0.24688,
+      "grad_norm": 1.0731590666948478,
+      "learning_rate": 0.003,
+      "loss": 4.0033,
+      "step": 24688
+    },
+    {
+      "epoch": 0.24689,
+      "grad_norm": 1.5939051492914877,
+      "learning_rate": 0.003,
+      "loss": 3.9784,
+      "step": 24689
+    },
+    {
+      "epoch": 0.2469,
+      "grad_norm": 1.113414783922738,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 24690
+    },
+    {
+      "epoch": 0.24691,
+      "grad_norm": 1.307529866877825,
+      "learning_rate": 0.003,
+      "loss": 4.0257,
+      "step": 24691
+    },
+    {
+      "epoch": 0.24692,
+      "grad_norm": 1.4741871564841347,
+      "learning_rate": 0.003,
+      "loss": 3.9917,
+      "step": 24692
+    },
+    {
+      "epoch": 0.24693,
+      "grad_norm": 1.0641330877511335,
+      "learning_rate": 0.003,
+      "loss": 4.0142,
+      "step": 24693
+    },
+    {
+      "epoch": 0.24694,
+      "grad_norm": 1.3335242967290242,
+      "learning_rate": 0.003,
+      "loss": 3.9883,
+      "step": 24694
+    },
+    {
+      "epoch": 0.24695,
+      "grad_norm": 1.3180727379872395,
+      "learning_rate": 0.003,
+      "loss": 3.9934,
+      "step": 24695
+    },
+    {
+      "epoch": 0.24696,
+      "grad_norm": 1.1923775652094024,
+      "learning_rate": 0.003,
+      "loss": 3.9874,
+      "step": 24696
+    },
+    {
+      "epoch": 0.24697,
+      "grad_norm": 1.2253150337792198,
+      "learning_rate": 0.003,
+      "loss": 3.9891,
+      "step": 24697
+    },
+    {
+      "epoch": 0.24698,
+      "grad_norm": 1.4961497604511478,
+      "learning_rate": 0.003,
+      "loss": 4.0041,
+      "step": 24698
+    },
+    {
+      "epoch": 0.24699,
+      "grad_norm": 1.394396202463164,
+      "learning_rate": 0.003,
+      "loss": 4.0214,
+      "step": 24699
+    },
+    {
+      "epoch": 0.247,
+      "grad_norm": 1.385844747272234,
+      "learning_rate": 0.003,
+      "loss": 4.0022,
+      "step": 24700
+    },
+    {
+      "epoch": 0.24701,
+      "grad_norm": 1.1348510934581726,
+      "learning_rate": 0.003,
+      "loss": 3.975,
+      "step": 24701
+    },
+    {
+      "epoch": 0.24702,
+      "grad_norm": 1.2552381088560451,
+      "learning_rate": 0.003,
+      "loss": 3.9862,
+      "step": 24702
+    },
+    {
+      "epoch": 0.24703,
+      "grad_norm": 1.0837319100493805,
+      "learning_rate": 0.003,
+      "loss": 3.9817,
+      "step": 24703
+    },
+    {
+      "epoch": 0.24704,
+      "grad_norm": 1.430204713237622,
+      "learning_rate": 0.003,
+      "loss": 4.0151,
+      "step": 24704
+    },
+    {
+      "epoch": 0.24705,
+      "grad_norm": 1.0526726030411293,
+      "learning_rate": 0.003,
+      "loss": 4.013,
+      "step": 24705
+    },
+    {
+      "epoch": 0.24706,
+      "grad_norm": 1.5449131898390647,
+      "learning_rate": 0.003,
+      "loss": 4.0211,
+      "step": 24706
+    },
+    {
+      "epoch": 0.24707,
+      "grad_norm": 1.2288483978475593,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 24707
+    },
+    {
+      "epoch": 0.24708,
+      "grad_norm": 1.3457463107015135,
+      "learning_rate": 0.003,
+      "loss": 4.0142,
+      "step": 24708
+    },
+    {
+      "epoch": 0.24709,
+      "grad_norm": 1.258543961492084,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 24709
+    },
+    {
+      "epoch": 0.2471,
+      "grad_norm": 1.124878261479507,
+      "learning_rate": 0.003,
+      "loss": 3.9957,
+      "step": 24710
+    },
+    {
+      "epoch": 0.24711,
+      "grad_norm": 1.2861756594307459,
+      "learning_rate": 0.003,
+      "loss": 3.9815,
+      "step": 24711
+    },
+    {
+      "epoch": 0.24712,
+      "grad_norm": 1.3107277992789066,
+      "learning_rate": 0.003,
+      "loss": 3.9838,
+      "step": 24712
+    },
+    {
+      "epoch": 0.24713,
+      "grad_norm": 1.2094004363372723,
+      "learning_rate": 0.003,
+      "loss": 3.9846,
+      "step": 24713
+    },
+    {
+      "epoch": 0.24714,
+      "grad_norm": 1.4353340142787379,
+      "learning_rate": 0.003,
+      "loss": 3.9841,
+      "step": 24714
+    },
+    {
+      "epoch": 0.24715,
+      "grad_norm": 1.0824603406089681,
+      "learning_rate": 0.003,
+      "loss": 4.0006,
+      "step": 24715
+    },
+    {
+      "epoch": 0.24716,
+      "grad_norm": 1.2514238730842917,
+      "learning_rate": 0.003,
+      "loss": 3.9783,
+      "step": 24716
+    },
+    {
+      "epoch": 0.24717,
+      "grad_norm": 1.0970669889939322,
+      "learning_rate": 0.003,
+      "loss": 3.9816,
+      "step": 24717
+    },
+    {
+      "epoch": 0.24718,
+      "grad_norm": 1.6964929773739108,
+      "learning_rate": 0.003,
+      "loss": 4.025,
+      "step": 24718
+    },
+    {
+      "epoch": 0.24719,
+      "grad_norm": 1.0139117811931586,
+      "learning_rate": 0.003,
+      "loss": 3.9887,
+      "step": 24719
+    },
+    {
+      "epoch": 0.2472,
+      "grad_norm": 1.5055898627298747,
+      "learning_rate": 0.003,
+      "loss": 3.9999,
+      "step": 24720
+    },
+    {
+      "epoch": 0.24721,
+      "grad_norm": 1.326926218751792,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 24721
+    },
+    {
+      "epoch": 0.24722,
+      "grad_norm": 1.2718031908737437,
+      "learning_rate": 0.003,
+      "loss": 4.0005,
+      "step": 24722
+    },
+    {
+      "epoch": 0.24723,
+      "grad_norm": 1.3474357058127395,
+      "learning_rate": 0.003,
+      "loss": 3.9757,
+      "step": 24723
+    },
+    {
+      "epoch": 0.24724,
+      "grad_norm": 1.2334546859459838,
+      "learning_rate": 0.003,
+      "loss": 4.0138,
+      "step": 24724
+    },
+    {
+      "epoch": 0.24725,
+      "grad_norm": 1.2738675099412546,
+      "learning_rate": 0.003,
+      "loss": 3.9868,
+      "step": 24725
+    },
+    {
+      "epoch": 0.24726,
+      "grad_norm": 1.2398545146372983,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 24726
+    },
+    {
+      "epoch": 0.24727,
+      "grad_norm": 1.2581645339660752,
+      "learning_rate": 0.003,
+      "loss": 3.9824,
+      "step": 24727
+    },
+    {
+      "epoch": 0.24728,
+      "grad_norm": 1.1667037085644867,
+      "learning_rate": 0.003,
+      "loss": 3.9724,
+      "step": 24728
+    },
+    {
+      "epoch": 0.24729,
+      "grad_norm": 1.1483772299098491,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 24729
+    },
+    {
+      "epoch": 0.2473,
+      "grad_norm": 1.5958945826100563,
+      "learning_rate": 0.003,
+      "loss": 3.9773,
+      "step": 24730
+    },
+    {
+      "epoch": 0.24731,
+      "grad_norm": 0.9956806217326519,
+      "learning_rate": 0.003,
+      "loss": 4.022,
+      "step": 24731
+    },
+    {
+      "epoch": 0.24732,
+      "grad_norm": 1.3054445322130388,
+      "learning_rate": 0.003,
+      "loss": 3.9986,
+      "step": 24732
+    },
+    {
+      "epoch": 0.24733,
+      "grad_norm": 1.3963704821580019,
+      "learning_rate": 0.003,
+      "loss": 4.0099,
+      "step": 24733
+    },
+    {
+      "epoch": 0.24734,
+      "grad_norm": 1.0093120976472445,
+      "learning_rate": 0.003,
+      "loss": 3.9609,
+      "step": 24734
+    },
+    {
+      "epoch": 0.24735,
+      "grad_norm": 1.334384965340662,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 24735
+    },
+    {
+      "epoch": 0.24736,
+      "grad_norm": 1.2636116540425932,
+      "learning_rate": 0.003,
+      "loss": 4.0115,
+      "step": 24736
+    },
+    {
+      "epoch": 0.24737,
+      "grad_norm": 1.2586092547149614,
+      "learning_rate": 0.003,
+      "loss": 3.9881,
+      "step": 24737
+    },
+    {
+      "epoch": 0.24738,
+      "grad_norm": 1.1925142801258852,
+      "learning_rate": 0.003,
+      "loss": 4.0015,
+      "step": 24738
+    },
+    {
+      "epoch": 0.24739,
+      "grad_norm": 1.2659780334955288,
+      "learning_rate": 0.003,
+      "loss": 4.0353,
+      "step": 24739
+    },
+    {
+      "epoch": 0.2474,
+      "grad_norm": 1.2698654079589982,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 24740
+    },
+    {
+      "epoch": 0.24741,
+      "grad_norm": 1.3240897566063115,
+      "learning_rate": 0.003,
+      "loss": 3.9924,
+      "step": 24741
+    },
+    {
+      "epoch": 0.24742,
+      "grad_norm": 1.1282311408421248,
+      "learning_rate": 0.003,
+      "loss": 3.9956,
+      "step": 24742
+    },
+    {
+      "epoch": 0.24743,
+      "grad_norm": 1.5099027682193469,
+      "learning_rate": 0.003,
+      "loss": 3.994,
+      "step": 24743
+    },
+    {
+      "epoch": 0.24744,
+      "grad_norm": 1.1074621639729099,
+      "learning_rate": 0.003,
+      "loss": 3.9818,
+      "step": 24744
+    },
+    {
+      "epoch": 0.24745,
+      "grad_norm": 1.6691681003128065,
+      "learning_rate": 0.003,
+      "loss": 4.0055,
+      "step": 24745
+    },
+    {
+      "epoch": 0.24746,
+      "grad_norm": 1.1079669844042808,
+      "learning_rate": 0.003,
+      "loss": 4.015,
+      "step": 24746
+    },
+    {
+      "epoch": 0.24747,
+      "grad_norm": 1.2344791410365155,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 24747
+    },
+    {
+      "epoch": 0.24748,
+      "grad_norm": 1.3447034703446612,
+      "learning_rate": 0.003,
+      "loss": 4.0149,
+      "step": 24748
+    },
+    {
+      "epoch": 0.24749,
+      "grad_norm": 1.29080803954905,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 24749
+    },
+    {
+      "epoch": 0.2475,
+      "grad_norm": 1.361340045371876,
+      "learning_rate": 0.003,
+      "loss": 3.9742,
+      "step": 24750
+    },
+    {
+      "epoch": 0.24751,
+      "grad_norm": 1.2814527215281026,
+      "learning_rate": 0.003,
+      "loss": 4.0193,
+      "step": 24751
+    },
+    {
+      "epoch": 0.24752,
+      "grad_norm": 1.1921240866254013,
+      "learning_rate": 0.003,
+      "loss": 3.9834,
+      "step": 24752
+    },
+    {
+      "epoch": 0.24753,
+      "grad_norm": 1.19020121814404,
+      "learning_rate": 0.003,
+      "loss": 3.9967,
+      "step": 24753
+    },
+    {
+      "epoch": 0.24754,
+      "grad_norm": 1.4018965540454509,
+      "learning_rate": 0.003,
+      "loss": 4.0251,
+      "step": 24754
+    },
+    {
+      "epoch": 0.24755,
+      "grad_norm": 1.2736313509342867,
+      "learning_rate": 0.003,
+      "loss": 3.9562,
+      "step": 24755
+    },
+    {
+      "epoch": 0.24756,
+      "grad_norm": 1.275815412953786,
+      "learning_rate": 0.003,
+      "loss": 4.0022,
+      "step": 24756
+    },
+    {
+      "epoch": 0.24757,
+      "grad_norm": 1.1781522876838788,
+      "learning_rate": 0.003,
+      "loss": 3.9767,
+      "step": 24757
+    },
+    {
+      "epoch": 0.24758,
+      "grad_norm": 1.3573637019639206,
+      "learning_rate": 0.003,
+      "loss": 3.9954,
+      "step": 24758
+    },
+    {
+      "epoch": 0.24759,
+      "grad_norm": 1.128259837349403,
+      "learning_rate": 0.003,
+      "loss": 4.0088,
+      "step": 24759
+    },
+    {
+      "epoch": 0.2476,
+      "grad_norm": 1.5959503675740554,
+      "learning_rate": 0.003,
+      "loss": 4.0148,
+      "step": 24760
+    },
+    {
+      "epoch": 0.24761,
+      "grad_norm": 1.1466718233716835,
+      "learning_rate": 0.003,
+      "loss": 3.9865,
+      "step": 24761
+    },
+    {
+      "epoch": 0.24762,
+      "grad_norm": 1.51858803139405,
+      "learning_rate": 0.003,
+      "loss": 4.0311,
+      "step": 24762
+    },
+    {
+      "epoch": 0.24763,
+      "grad_norm": 1.163714099801333,
+      "learning_rate": 0.003,
+      "loss": 4.0227,
+      "step": 24763
+    },
+    {
+      "epoch": 0.24764,
+      "grad_norm": 1.3484213032818329,
+      "learning_rate": 0.003,
+      "loss": 4.0218,
+      "step": 24764
+    },
+    {
+      "epoch": 0.24765,
+      "grad_norm": 1.291693135609676,
+      "learning_rate": 0.003,
+      "loss": 4.0041,
+      "step": 24765
+    },
+    {
+      "epoch": 0.24766,
+      "grad_norm": 1.1403443614249857,
+      "learning_rate": 0.003,
+      "loss": 4.0031,
+      "step": 24766
+    },
+    {
+      "epoch": 0.24767,
+      "grad_norm": 1.5780773108369814,
+      "learning_rate": 0.003,
+      "loss": 4.0453,
+      "step": 24767
+    },
+    {
+      "epoch": 0.24768,
+      "grad_norm": 1.065138455982252,
+      "learning_rate": 0.003,
+      "loss": 4.0032,
+      "step": 24768
+    },
+    {
+      "epoch": 0.24769,
+      "grad_norm": 1.3206284779595903,
+      "learning_rate": 0.003,
+      "loss": 3.9804,
+      "step": 24769
+    },
+    {
+      "epoch": 0.2477,
+      "grad_norm": 1.3284004512689915,
+      "learning_rate": 0.003,
+      "loss": 4.0287,
+      "step": 24770
+    },
+    {
+      "epoch": 0.24771,
+      "grad_norm": 1.1615201297390054,
+      "learning_rate": 0.003,
+      "loss": 4.0058,
+      "step": 24771
+    },
+    {
+      "epoch": 0.24772,
+      "grad_norm": 1.3971247323894533,
+      "learning_rate": 0.003,
+      "loss": 3.9796,
+      "step": 24772
+    },
+    {
+      "epoch": 0.24773,
+      "grad_norm": 1.3256733342569045,
+      "learning_rate": 0.003,
+      "loss": 3.9904,
+      "step": 24773
+    },
+    {
+      "epoch": 0.24774,
+      "grad_norm": 1.1989344069739116,
+      "learning_rate": 0.003,
+      "loss": 4.0056,
+      "step": 24774
+    },
+    {
+      "epoch": 0.24775,
+      "grad_norm": 1.2368180548030965,
+      "learning_rate": 0.003,
+      "loss": 3.9849,
+      "step": 24775
+    },
+    {
+      "epoch": 0.24776,
+      "grad_norm": 1.3685428826936963,
+      "learning_rate": 0.003,
+      "loss": 4.0157,
+      "step": 24776
+    },
+    {
+      "epoch": 0.24777,
+      "grad_norm": 1.1221595675127687,
+      "learning_rate": 0.003,
+      "loss": 4.0033,
+      "step": 24777
+    },
+    {
+      "epoch": 0.24778,
+      "grad_norm": 1.4277987613885321,
+      "learning_rate": 0.003,
+      "loss": 4.0261,
+      "step": 24778
+    },
+    {
+      "epoch": 0.24779,
+      "grad_norm": 1.1956437166207747,
+      "learning_rate": 0.003,
+      "loss": 3.9862,
+      "step": 24779
+    },
+    {
+      "epoch": 0.2478,
+      "grad_norm": 1.6955267886029173,
+      "learning_rate": 0.003,
+      "loss": 4.0161,
+      "step": 24780
+    },
+    {
+      "epoch": 0.24781,
+      "grad_norm": 1.180455378502205,
+      "learning_rate": 0.003,
+      "loss": 3.9858,
+      "step": 24781
+    },
+    {
+      "epoch": 0.24782,
+      "grad_norm": 1.2144860676444047,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 24782
+    },
+    {
+      "epoch": 0.24783,
+      "grad_norm": 1.306323527570029,
+      "learning_rate": 0.003,
+      "loss": 4.0255,
+      "step": 24783
+    },
+    {
+      "epoch": 0.24784,
+      "grad_norm": 1.0510745712520186,
+      "learning_rate": 0.003,
+      "loss": 4.0013,
+      "step": 24784
+    },
+    {
+      "epoch": 0.24785,
+      "grad_norm": 1.2810880255990889,
+      "learning_rate": 0.003,
+      "loss": 3.9948,
+      "step": 24785
+    },
+    {
+      "epoch": 0.24786,
+      "grad_norm": 1.2748510982389845,
+      "learning_rate": 0.003,
+      "loss": 4.0137,
+      "step": 24786
+    },
+    {
+      "epoch": 0.24787,
+      "grad_norm": 1.2994113774627847,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 24787
+    },
+    {
+      "epoch": 0.24788,
+      "grad_norm": 1.3985471764189301,
+      "learning_rate": 0.003,
+      "loss": 3.9893,
+      "step": 24788
+    },
+    {
+      "epoch": 0.24789,
+      "grad_norm": 1.2302664049235608,
+      "learning_rate": 0.003,
+      "loss": 4.0011,
+      "step": 24789
+    },
+    {
+      "epoch": 0.2479,
+      "grad_norm": 1.3160163375197016,
+      "learning_rate": 0.003,
+      "loss": 4.0012,
+      "step": 24790
+    },
+    {
+      "epoch": 0.24791,
+      "grad_norm": 1.1412465332686366,
+      "learning_rate": 0.003,
+      "loss": 3.9908,
+      "step": 24791
+    },
+    {
+      "epoch": 0.24792,
+      "grad_norm": 1.2900846421922052,
+      "learning_rate": 0.003,
+      "loss": 3.9888,
+      "step": 24792
+    },
+    {
+      "epoch": 0.24793,
+      "grad_norm": 1.3234687080967253,
+      "learning_rate": 0.003,
+      "loss": 3.9849,
+      "step": 24793
+    },
+    {
+      "epoch": 0.24794,
+      "grad_norm": 1.13379135872044,
+      "learning_rate": 0.003,
+      "loss": 3.9964,
+      "step": 24794
+    },
+    {
+      "epoch": 0.24795,
+      "grad_norm": 1.5337351656933191,
+      "learning_rate": 0.003,
+      "loss": 4.0129,
+      "step": 24795
+    },
+    {
+      "epoch": 0.24796,
+      "grad_norm": 1.1046009467491282,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 24796
+    },
+    {
+      "epoch": 0.24797,
+      "grad_norm": 1.377945094801853,
+      "learning_rate": 0.003,
+      "loss": 3.9909,
+      "step": 24797
+    },
+    {
+      "epoch": 0.24798,
+      "grad_norm": 1.093946467931297,
+      "learning_rate": 0.003,
+      "loss": 4.0068,
+      "step": 24798
+    },
+    {
+      "epoch": 0.24799,
+      "grad_norm": 1.5333987127755129,
+      "learning_rate": 0.003,
+      "loss": 3.9945,
+      "step": 24799
+    },
+    {
+      "epoch": 0.248,
+      "grad_norm": 1.29877957555061,
+      "learning_rate": 0.003,
+      "loss": 3.9824,
+      "step": 24800
+    },
+    {
+      "epoch": 0.24801,
+      "grad_norm": 1.2347007440661206,
+      "learning_rate": 0.003,
+      "loss": 4.0282,
+      "step": 24801
+    },
+    {
+      "epoch": 0.24802,
+      "grad_norm": 1.2527769893177603,
+      "learning_rate": 0.003,
+      "loss": 3.9822,
+      "step": 24802
+    },
+    {
+      "epoch": 0.24803,
+      "grad_norm": 1.2290714697430178,
+      "learning_rate": 0.003,
+      "loss": 3.9937,
+      "step": 24803
+    },
+    {
+      "epoch": 0.24804,
+      "grad_norm": 1.2538015878390643,
+      "learning_rate": 0.003,
+      "loss": 4.0229,
+      "step": 24804
+    },
+    {
+      "epoch": 0.24805,
+      "grad_norm": 1.3845519273382685,
+      "learning_rate": 0.003,
+      "loss": 3.9774,
+      "step": 24805
+    },
+    {
+      "epoch": 0.24806,
+      "grad_norm": 1.1590935312136228,
+      "learning_rate": 0.003,
+      "loss": 3.9666,
+      "step": 24806
+    },
+    {
+      "epoch": 0.24807,
+      "grad_norm": 1.2534142186996797,
+      "learning_rate": 0.003,
+      "loss": 3.9948,
+      "step": 24807
+    },
+    {
+      "epoch": 0.24808,
+      "grad_norm": 1.144729776884583,
+      "learning_rate": 0.003,
+      "loss": 4.0004,
+      "step": 24808
+    },
+    {
+      "epoch": 0.24809,
+      "grad_norm": 1.5284675655193565,
+      "learning_rate": 0.003,
+      "loss": 4.0087,
+      "step": 24809
+    },
+    {
+      "epoch": 0.2481,
+      "grad_norm": 0.9080608647687558,
+      "learning_rate": 0.003,
+      "loss": 3.9726,
+      "step": 24810
+    },
+    {
+      "epoch": 0.24811,
+      "grad_norm": 1.5883547032216803,
+      "learning_rate": 0.003,
+      "loss": 4.0023,
+      "step": 24811
+    },
+    {
+      "epoch": 0.24812,
+      "grad_norm": 1.0876443966108524,
+      "learning_rate": 0.003,
+      "loss": 4.0352,
+      "step": 24812
+    },
+    {
+      "epoch": 0.24813,
+      "grad_norm": 1.6143253048852126,
+      "learning_rate": 0.003,
+      "loss": 4.0008,
+      "step": 24813
+    },
+    {
+      "epoch": 0.24814,
+      "grad_norm": 1.2549682761563574,
+      "learning_rate": 0.003,
+      "loss": 4.0123,
+      "step": 24814
+    },
+    {
+      "epoch": 0.24815,
+      "grad_norm": 1.519959811772563,
+      "learning_rate": 0.003,
+      "loss": 3.9865,
+      "step": 24815
+    },
+    {
+      "epoch": 0.24816,
+      "grad_norm": 1.1442723908405452,
+      "learning_rate": 0.003,
+      "loss": 4.0037,
+      "step": 24816
+    },
+    {
+      "epoch": 0.24817,
+      "grad_norm": 1.3229921312263617,
+      "learning_rate": 0.003,
+      "loss": 4.0065,
+      "step": 24817
+    },
+    {
+      "epoch": 0.24818,
+      "grad_norm": 1.3293788501806725,
+      "learning_rate": 0.003,
+      "loss": 4.005,
+      "step": 24818
+    },
+    {
+      "epoch": 0.24819,
+      "grad_norm": 1.1914358164235435,
+      "learning_rate": 0.003,
+      "loss": 3.9797,
+      "step": 24819
+    },
+    {
+      "epoch": 0.2482,
+      "grad_norm": 1.199770297018345,
+      "learning_rate": 0.003,
+      "loss": 4.0328,
+      "step": 24820
+    },
+    {
+      "epoch": 0.24821,
+      "grad_norm": 1.3972627621377467,
+      "learning_rate": 0.003,
+      "loss": 4.031,
+      "step": 24821
+    },
+    {
+      "epoch": 0.24822,
+      "grad_norm": 1.3454533372962105,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 24822
+    },
+    {
+      "epoch": 0.24823,
+      "grad_norm": 1.2577254830843119,
+      "learning_rate": 0.003,
+      "loss": 4.0072,
+      "step": 24823
+    },
+    {
+      "epoch": 0.24824,
+      "grad_norm": 1.1732228671551188,
+      "learning_rate": 0.003,
+      "loss": 3.9634,
+      "step": 24824
+    },
+    {
+      "epoch": 0.24825,
+      "grad_norm": 1.329687319204992,
+      "learning_rate": 0.003,
+      "loss": 4.0007,
+      "step": 24825
+    },
+    {
+      "epoch": 0.24826,
+      "grad_norm": 1.3064215684678602,
+      "learning_rate": 0.003,
+      "loss": 4.023,
+      "step": 24826
+    },
+    {
+      "epoch": 0.24827,
+      "grad_norm": 1.2189822565967858,
+      "learning_rate": 0.003,
+      "loss": 3.9861,
+      "step": 24827
+    },
+    {
+      "epoch": 0.24828,
+      "grad_norm": 1.2656661438331038,
+      "learning_rate": 0.003,
+      "loss": 3.994,
+      "step": 24828
+    },
+    {
+      "epoch": 0.24829,
+      "grad_norm": 1.3492977939328994,
+      "learning_rate": 0.003,
+      "loss": 4.0314,
+      "step": 24829
+    },
+    {
+      "epoch": 0.2483,
+      "grad_norm": 1.1917671432402603,
+      "learning_rate": 0.003,
+      "loss": 3.9919,
+      "step": 24830
+    },
+    {
+      "epoch": 0.24831,
+      "grad_norm": 1.3749512984482983,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 24831
+    },
+    {
+      "epoch": 0.24832,
+      "grad_norm": 1.2044041399148702,
+      "learning_rate": 0.003,
+      "loss": 3.9889,
+      "step": 24832
+    },
+    {
+      "epoch": 0.24833,
+      "grad_norm": 1.1747425779296397,
+      "learning_rate": 0.003,
+      "loss": 3.9838,
+      "step": 24833
+    },
+    {
+      "epoch": 0.24834,
+      "grad_norm": 1.292257954460559,
+      "learning_rate": 0.003,
+      "loss": 4.0035,
+      "step": 24834
+    },
+    {
+      "epoch": 0.24835,
+      "grad_norm": 1.2785405575054216,
+      "learning_rate": 0.003,
+      "loss": 4.0393,
+      "step": 24835
+    },
+    {
+      "epoch": 0.24836,
+      "grad_norm": 1.5340178107646971,
+      "learning_rate": 0.003,
+      "loss": 4.0196,
+      "step": 24836
+    },
+    {
+      "epoch": 0.24837,
+      "grad_norm": 1.2311010326875502,
+      "learning_rate": 0.003,
+      "loss": 4.0088,
+      "step": 24837
+    },
+    {
+      "epoch": 0.24838,
+      "grad_norm": 1.2727792519813133,
+      "learning_rate": 0.003,
+      "loss": 4.0107,
+      "step": 24838
+    },
+    {
+      "epoch": 0.24839,
+      "grad_norm": 1.3163549304212006,
+      "learning_rate": 0.003,
+      "loss": 3.9897,
+      "step": 24839
+    },
+    {
+      "epoch": 0.2484,
+      "grad_norm": 1.3357520603663466,
+      "learning_rate": 0.003,
+      "loss": 4.0278,
+      "step": 24840
+    },
+    {
+      "epoch": 0.24841,
+      "grad_norm": 1.2218485347932249,
+      "learning_rate": 0.003,
+      "loss": 4.0033,
+      "step": 24841
+    },
+    {
+      "epoch": 0.24842,
+      "grad_norm": 1.2279095558775726,
+      "learning_rate": 0.003,
+      "loss": 3.9823,
+      "step": 24842
+    },
+    {
+      "epoch": 0.24843,
+      "grad_norm": 1.4007336722056156,
+      "learning_rate": 0.003,
+      "loss": 3.9955,
+      "step": 24843
+    },
+    {
+      "epoch": 0.24844,
+      "grad_norm": 1.1662288841338637,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 24844
+    },
+    {
+      "epoch": 0.24845,
+      "grad_norm": 1.4257690719846612,
+      "learning_rate": 0.003,
+      "loss": 4.0139,
+      "step": 24845
+    },
+    {
+      "epoch": 0.24846,
+      "grad_norm": 1.118697449290231,
+      "learning_rate": 0.003,
+      "loss": 3.9926,
+      "step": 24846
+    },
+    {
+      "epoch": 0.24847,
+      "grad_norm": 1.3705895464949163,
+      "learning_rate": 0.003,
+      "loss": 3.9871,
+      "step": 24847
+    },
+    {
+      "epoch": 0.24848,
+      "grad_norm": 1.4590067737964645,
+      "learning_rate": 0.003,
+      "loss": 4.0026,
+      "step": 24848
+    },
+    {
+      "epoch": 0.24849,
+      "grad_norm": 1.1937848953540344,
+      "learning_rate": 0.003,
+      "loss": 4.0191,
+      "step": 24849
+    },
+    {
+      "epoch": 0.2485,
+      "grad_norm": 1.3337511966131725,
+      "learning_rate": 0.003,
+      "loss": 3.9942,
+      "step": 24850
+    },
+    {
+      "epoch": 0.24851,
+      "grad_norm": 1.187546825609993,
+      "learning_rate": 0.003,
+      "loss": 3.989,
+      "step": 24851
+    },
+    {
+      "epoch": 0.24852,
+      "grad_norm": 1.3090821941584518,
+      "learning_rate": 0.003,
+      "loss": 4.0146,
+      "step": 24852
+    },
+    {
+      "epoch": 0.24853,
+      "grad_norm": 1.2488246438520128,
+      "learning_rate": 0.003,
+      "loss": 3.9974,
+      "step": 24853
+    },
+    {
+      "epoch": 0.24854,
+      "grad_norm": 1.3449794365577001,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 24854
+    },
+    {
+      "epoch": 0.24855,
+      "grad_norm": 1.2434427002856288,
+      "learning_rate": 0.003,
+      "loss": 3.9832,
+      "step": 24855
+    },
+    {
+      "epoch": 0.24856,
+      "grad_norm": 1.4036217570259015,
+      "learning_rate": 0.003,
+      "loss": 4.0149,
+      "step": 24856
+    },
+    {
+      "epoch": 0.24857,
+      "grad_norm": 1.2936926977895182,
+      "learning_rate": 0.003,
+      "loss": 3.989,
+      "step": 24857
+    },
+    {
+      "epoch": 0.24858,
+      "grad_norm": 1.287537924964048,
+      "learning_rate": 0.003,
+      "loss": 3.9987,
+      "step": 24858
+    },
+    {
+      "epoch": 0.24859,
+      "grad_norm": 1.161864338666919,
+      "learning_rate": 0.003,
+      "loss": 3.987,
+      "step": 24859
+    },
+    {
+      "epoch": 0.2486,
+      "grad_norm": 1.257853631444904,
+      "learning_rate": 0.003,
+      "loss": 4.0104,
+      "step": 24860
+    },
+    {
+      "epoch": 0.24861,
+      "grad_norm": 1.4348334166425858,
+      "learning_rate": 0.003,
+      "loss": 4.0039,
+      "step": 24861
+    },
+    {
+      "epoch": 0.24862,
+      "grad_norm": 1.0392422049793353,
+      "learning_rate": 0.003,
+      "loss": 3.986,
+      "step": 24862
+    },
+    {
+      "epoch": 0.24863,
+      "grad_norm": 1.455021555540592,
+      "learning_rate": 0.003,
+      "loss": 4.0076,
+      "step": 24863
+    },
+    {
+      "epoch": 0.24864,
+      "grad_norm": 1.218026394904179,
+      "learning_rate": 0.003,
+      "loss": 3.9863,
+      "step": 24864
+    },
+    {
+      "epoch": 0.24865,
+      "grad_norm": 1.5974151090152766,
+      "learning_rate": 0.003,
+      "loss": 4.0008,
+      "step": 24865
+    },
+    {
+      "epoch": 0.24866,
+      "grad_norm": 1.1767481559176702,
+      "learning_rate": 0.003,
+      "loss": 3.9954,
+      "step": 24866
+    },
+    {
+      "epoch": 0.24867,
+      "grad_norm": 1.411737204720549,
+      "learning_rate": 0.003,
+      "loss": 3.9912,
+      "step": 24867
+    },
+    {
+      "epoch": 0.24868,
+      "grad_norm": 1.3491470461922128,
+      "learning_rate": 0.003,
+      "loss": 3.9927,
+      "step": 24868
+    },
+    {
+      "epoch": 0.24869,
+      "grad_norm": 1.2291025752824505,
+      "learning_rate": 0.003,
+      "loss": 3.9783,
+      "step": 24869
+    },
+    {
+      "epoch": 0.2487,
+      "grad_norm": 1.3673394264229057,
+      "learning_rate": 0.003,
+      "loss": 4.0272,
+      "step": 24870
+    },
+    {
+      "epoch": 0.24871,
+      "grad_norm": 1.1903804168971623,
+      "learning_rate": 0.003,
+      "loss": 4.0104,
+      "step": 24871
+    },
+    {
+      "epoch": 0.24872,
+      "grad_norm": 1.6231068917844211,
+      "learning_rate": 0.003,
+      "loss": 4.0419,
+      "step": 24872
+    },
+    {
+      "epoch": 0.24873,
+      "grad_norm": 1.0812242786293513,
+      "learning_rate": 0.003,
+      "loss": 3.987,
+      "step": 24873
+    },
+    {
+      "epoch": 0.24874,
+      "grad_norm": 1.6293445693675597,
+      "learning_rate": 0.003,
+      "loss": 4.0231,
+      "step": 24874
+    },
+    {
+      "epoch": 0.24875,
+      "grad_norm": 1.3188637493780766,
+      "learning_rate": 0.003,
+      "loss": 4.0035,
+      "step": 24875
+    },
+    {
+      "epoch": 0.24876,
+      "grad_norm": 1.2312440385498507,
+      "learning_rate": 0.003,
+      "loss": 4.0002,
+      "step": 24876
+    },
+    {
+      "epoch": 0.24877,
+      "grad_norm": 1.3513017198510853,
+      "learning_rate": 0.003,
+      "loss": 4.0404,
+      "step": 24877
+    },
+    {
+      "epoch": 0.24878,
+      "grad_norm": 1.2550042552259233,
+      "learning_rate": 0.003,
+      "loss": 3.9832,
+      "step": 24878
+    },
+    {
+      "epoch": 0.24879,
+      "grad_norm": 1.422785423946869,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 24879
+    },
+    {
+      "epoch": 0.2488,
+      "grad_norm": 1.2898869860657982,
+      "learning_rate": 0.003,
+      "loss": 3.9946,
+      "step": 24880
+    },
+    {
+      "epoch": 0.24881,
+      "grad_norm": 1.2775380611482845,
+      "learning_rate": 0.003,
+      "loss": 3.9998,
+      "step": 24881
+    },
+    {
+      "epoch": 0.24882,
+      "grad_norm": 1.0304346570132774,
+      "learning_rate": 0.003,
+      "loss": 3.9792,
+      "step": 24882
+    },
+    {
+      "epoch": 0.24883,
+      "grad_norm": 1.5403124407238837,
+      "learning_rate": 0.003,
+      "loss": 4.0048,
+      "step": 24883
+    },
+    {
+      "epoch": 0.24884,
+      "grad_norm": 1.0330056234084284,
+      "learning_rate": 0.003,
+      "loss": 4.0243,
+      "step": 24884
+    },
+    {
+      "epoch": 0.24885,
+      "grad_norm": 1.501084973984122,
+      "learning_rate": 0.003,
+      "loss": 4.0319,
+      "step": 24885
+    },
+    {
+      "epoch": 0.24886,
+      "grad_norm": 1.2210285273791712,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 24886
+    },
+    {
+      "epoch": 0.24887,
+      "grad_norm": 1.1492737150356547,
+      "learning_rate": 0.003,
+      "loss": 3.9873,
+      "step": 24887
+    },
+    {
+      "epoch": 0.24888,
+      "grad_norm": 1.3113781034601535,
+      "learning_rate": 0.003,
+      "loss": 4.0003,
+      "step": 24888
+    },
+    {
+      "epoch": 0.24889,
+      "grad_norm": 1.1488469717652494,
+      "learning_rate": 0.003,
+      "loss": 4.0063,
+      "step": 24889
+    },
+    {
+      "epoch": 0.2489,
+      "grad_norm": 1.3010993277167773,
+      "learning_rate": 0.003,
+      "loss": 3.9887,
+      "step": 24890
+    },
+    {
+      "epoch": 0.24891,
+      "grad_norm": 1.2338534311267324,
+      "learning_rate": 0.003,
+      "loss": 4.015,
+      "step": 24891
+    },
+    {
+      "epoch": 0.24892,
+      "grad_norm": 1.1728781353375652,
+      "learning_rate": 0.003,
+      "loss": 4.0427,
+      "step": 24892
+    },
+    {
+      "epoch": 0.24893,
+      "grad_norm": 1.290165402482736,
+      "learning_rate": 0.003,
+      "loss": 4.0031,
+      "step": 24893
+    },
+    {
+      "epoch": 0.24894,
+      "grad_norm": 1.2617216220671112,
+      "learning_rate": 0.003,
+      "loss": 3.9857,
+      "step": 24894
+    },
+    {
+      "epoch": 0.24895,
+      "grad_norm": 1.252379900982826,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 24895
+    },
+    {
+      "epoch": 0.24896,
+      "grad_norm": 1.374322468132103,
+      "learning_rate": 0.003,
+      "loss": 4.0165,
+      "step": 24896
+    },
+    {
+      "epoch": 0.24897,
+      "grad_norm": 1.270221986956731,
+      "learning_rate": 0.003,
+      "loss": 4.0303,
+      "step": 24897
+    },
+    {
+      "epoch": 0.24898,
+      "grad_norm": 1.1156538217295926,
+      "learning_rate": 0.003,
+      "loss": 4.0085,
+      "step": 24898
+    },
+    {
+      "epoch": 0.24899,
+      "grad_norm": 1.2483250926704648,
+      "learning_rate": 0.003,
+      "loss": 3.9866,
+      "step": 24899
+    },
+    {
+      "epoch": 0.249,
+      "grad_norm": 1.224262851393785,
+      "learning_rate": 0.003,
+      "loss": 4.0075,
+      "step": 24900
+    },
+    {
+      "epoch": 0.24901,
+      "grad_norm": 1.478586386138734,
+      "learning_rate": 0.003,
+      "loss": 3.9788,
+      "step": 24901
+    },
+    {
+      "epoch": 0.24902,
+      "grad_norm": 1.368576399787182,
+      "learning_rate": 0.003,
+      "loss": 3.9975,
+      "step": 24902
+    },
+    {
+      "epoch": 0.24903,
+      "grad_norm": 1.3266853863712427,
+      "learning_rate": 0.003,
+      "loss": 4.0036,
+      "step": 24903
+    },
+    {
+      "epoch": 0.24904,
+      "grad_norm": 1.1217833617925252,
+      "learning_rate": 0.003,
+      "loss": 3.9842,
+      "step": 24904
+    },
+    {
+      "epoch": 0.24905,
+      "grad_norm": 1.5040731263492348,
+      "learning_rate": 0.003,
+      "loss": 4.0296,
+      "step": 24905
+    },
+    {
+      "epoch": 0.24906,
+      "grad_norm": 1.0185206343173159,
+      "learning_rate": 0.003,
+      "loss": 4.0013,
+      "step": 24906
+    },
+    {
+      "epoch": 0.24907,
+      "grad_norm": 1.360055064167583,
+      "learning_rate": 0.003,
+      "loss": 3.9958,
+      "step": 24907
+    },
+    {
+      "epoch": 0.24908,
+      "grad_norm": 0.9433723798840654,
+      "learning_rate": 0.003,
+      "loss": 4.0194,
+      "step": 24908
+    },
+    {
+      "epoch": 0.24909,
+      "grad_norm": 1.2796239947993924,
+      "learning_rate": 0.003,
+      "loss": 4.0141,
+      "step": 24909
+    },
+    {
+      "epoch": 0.2491,
+      "grad_norm": 1.1591845890394785,
+      "learning_rate": 0.003,
+      "loss": 3.994,
+      "step": 24910
+    },
+    {
+      "epoch": 0.24911,
+      "grad_norm": 1.3867317709052105,
+      "learning_rate": 0.003,
+      "loss": 4.0242,
+      "step": 24911
+    },
+    {
+      "epoch": 0.24912,
+      "grad_norm": 1.2216057959852271,
+      "learning_rate": 0.003,
+      "loss": 4.0154,
+      "step": 24912
+    },
+    {
+      "epoch": 0.24913,
+      "grad_norm": 1.3955561167164259,
+      "learning_rate": 0.003,
+      "loss": 4.014,
+      "step": 24913
+    },
+    {
+      "epoch": 0.24914,
+      "grad_norm": 1.2284279014063995,
+      "learning_rate": 0.003,
+      "loss": 4.019,
+      "step": 24914
+    },
+    {
+      "epoch": 0.24915,
+      "grad_norm": 1.3363288766643309,
+      "learning_rate": 0.003,
+      "loss": 4.0371,
+      "step": 24915
+    },
+    {
+      "epoch": 0.24916,
+      "grad_norm": 1.1950620328613943,
+      "learning_rate": 0.003,
+      "loss": 4.0156,
+      "step": 24916
+    },
+    {
+      "epoch": 0.24917,
+      "grad_norm": 1.0744839013891923,
+      "learning_rate": 0.003,
+      "loss": 4.0029,
+      "step": 24917
+    },
+    {
+      "epoch": 0.24918,
+      "grad_norm": 1.5547132638080052,
+      "learning_rate": 0.003,
+      "loss": 3.9992,
+      "step": 24918
+    },
+    {
+      "epoch": 0.24919,
+      "grad_norm": 1.128390721563401,
+      "learning_rate": 0.003,
+      "loss": 3.999,
+      "step": 24919
+    },
+    {
+      "epoch": 0.2492,
+      "grad_norm": 1.476508858884181,
+      "learning_rate": 0.003,
+      "loss": 4.0084,
+      "step": 24920
+    },
+    {
+      "epoch": 0.24921,
+      "grad_norm": 1.0482012963221061,
+      "learning_rate": 0.003,
+      "loss": 4.0259,
+      "step": 24921
+    },
+    {
+      "epoch": 0.24922,
+      "grad_norm": 1.5533303480369969,
+      "learning_rate": 0.003,
+      "loss": 4.0107,
+      "step": 24922
+    },
+    {
+      "epoch": 0.24923,
+      "grad_norm": 1.4538794021283639,
+      "learning_rate": 0.003,
+      "loss": 4.0183,
+      "step": 24923
+    },
+    {
+      "epoch": 0.24924,
+      "grad_norm": 1.3695839094048596,
+      "learning_rate": 0.003,
+      "loss": 4.0053,
+      "step": 24924
+    },
+    {
+      "epoch": 0.24925,
+      "grad_norm": 1.3139881150119592,
+      "learning_rate": 0.003,
+      "loss": 4.0002,
+      "step": 24925
+    },
+    {
+      "epoch": 0.24926,
+      "grad_norm": 1.1931398329789007,
+      "learning_rate": 0.003,
+      "loss": 4.0003,
+      "step": 24926
+    },
+    {
+      "epoch": 0.24927,
+      "grad_norm": 1.1819693387037813,
+      "learning_rate": 0.003,
+      "loss": 4.0017,
+      "step": 24927
+    },
+    {
+      "epoch": 0.24928,
+      "grad_norm": 1.3916955225888425,
+      "learning_rate": 0.003,
+      "loss": 4.0167,
+      "step": 24928
+    },
+    {
+      "epoch": 0.24929,
+      "grad_norm": 1.0355718905472542,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 24929
+    },
+    {
+      "epoch": 0.2493,
+      "grad_norm": 1.3288166444793241,
+      "learning_rate": 0.003,
+      "loss": 3.9776,
+      "step": 24930
+    },
+    {
+      "epoch": 0.24931,
+      "grad_norm": 1.1347798408137386,
+      "learning_rate": 0.003,
+      "loss": 4.0268,
+      "step": 24931
+    },
+    {
+      "epoch": 0.24932,
+      "grad_norm": 1.421258327551958,
+      "learning_rate": 0.003,
+      "loss": 4.0032,
+      "step": 24932
+    },
+    {
+      "epoch": 0.24933,
+      "grad_norm": 1.1115660333179909,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 24933
+    },
+    {
+      "epoch": 0.24934,
+      "grad_norm": 1.522559024691964,
+      "learning_rate": 0.003,
+      "loss": 4.0197,
+      "step": 24934
+    },
+    {
+      "epoch": 0.24935,
+      "grad_norm": 1.090350190871216,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 24935
+    },
+    {
+      "epoch": 0.24936,
+      "grad_norm": 1.468746953267041,
+      "learning_rate": 0.003,
+      "loss": 4.0053,
+      "step": 24936
+    },
+    {
+      "epoch": 0.24937,
+      "grad_norm": 1.3136509783242598,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 24937
+    },
+    {
+      "epoch": 0.24938,
+      "grad_norm": 1.4061972055045102,
+      "learning_rate": 0.003,
+      "loss": 4.0124,
+      "step": 24938
+    },
+    {
+      "epoch": 0.24939,
+      "grad_norm": 1.1537516164255506,
+      "learning_rate": 0.003,
+      "loss": 4.0091,
+      "step": 24939
+    },
+    {
+      "epoch": 0.2494,
+      "grad_norm": 1.446065296390733,
+      "learning_rate": 0.003,
+      "loss": 4.0114,
+      "step": 24940
+    },
+    {
+      "epoch": 0.24941,
+      "grad_norm": 1.183106611479882,
+      "learning_rate": 0.003,
+      "loss": 3.9979,
+      "step": 24941
+    },
+    {
+      "epoch": 0.24942,
+      "grad_norm": 1.2268121251133557,
+      "learning_rate": 0.003,
+      "loss": 4.0121,
+      "step": 24942
+    },
+    {
+      "epoch": 0.24943,
+      "grad_norm": 1.0807861802360297,
+      "learning_rate": 0.003,
+      "loss": 3.9904,
+      "step": 24943
+    },
+    {
+      "epoch": 0.24944,
+      "grad_norm": 1.6299058204574701,
+      "learning_rate": 0.003,
+      "loss": 3.9898,
+      "step": 24944
+    },
+    {
+      "epoch": 0.24945,
+      "grad_norm": 1.144800465697433,
+      "learning_rate": 0.003,
+      "loss": 3.9794,
+      "step": 24945
+    },
+    {
+      "epoch": 0.24946,
+      "grad_norm": 1.4730186371994483,
+      "learning_rate": 0.003,
+      "loss": 4.0064,
+      "step": 24946
+    },
+    {
+      "epoch": 0.24947,
+      "grad_norm": 1.2595786307961154,
+      "learning_rate": 0.003,
+      "loss": 4.0025,
+      "step": 24947
+    },
+    {
+      "epoch": 0.24948,
+      "grad_norm": 1.420030123986208,
+      "learning_rate": 0.003,
+      "loss": 4.0312,
+      "step": 24948
+    },
+    {
+      "epoch": 0.24949,
+      "grad_norm": 1.1821453181049493,
+      "learning_rate": 0.003,
+      "loss": 3.9822,
+      "step": 24949
+    },
+    {
+      "epoch": 0.2495,
+      "grad_norm": 1.3442993067862419,
+      "learning_rate": 0.003,
+      "loss": 4.0257,
+      "step": 24950
+    },
+    {
+      "epoch": 0.24951,
+      "grad_norm": 1.156361813184033,
+      "learning_rate": 0.003,
+      "loss": 4.0084,
+      "step": 24951
+    },
+    {
+      "epoch": 0.24952,
+      "grad_norm": 1.1748116656927985,
+      "learning_rate": 0.003,
+      "loss": 3.9797,
+      "step": 24952
+    },
+    {
+      "epoch": 0.24953,
+      "grad_norm": 1.259643366729641,
+      "learning_rate": 0.003,
+      "loss": 3.9839,
+      "step": 24953
+    },
+    {
+      "epoch": 0.24954,
+      "grad_norm": 1.308741374061298,
+      "learning_rate": 0.003,
+      "loss": 3.991,
+      "step": 24954
+    },
+    {
+      "epoch": 0.24955,
+      "grad_norm": 1.1466401956111962,
+      "learning_rate": 0.003,
+      "loss": 4.0149,
+      "step": 24955
+    },
+    {
+      "epoch": 0.24956,
+      "grad_norm": 1.4626940193379336,
+      "learning_rate": 0.003,
+      "loss": 4.0152,
+      "step": 24956
+    },
+    {
+      "epoch": 0.24957,
+      "grad_norm": 1.2646721109707957,
+      "learning_rate": 0.003,
+      "loss": 4.0255,
+      "step": 24957
+    },
+    {
+      "epoch": 0.24958,
+      "grad_norm": 1.1599052528386549,
+      "learning_rate": 0.003,
+      "loss": 4.017,
+      "step": 24958
+    },
+    {
+      "epoch": 0.24959,
+      "grad_norm": 1.3651897754703737,
+      "learning_rate": 0.003,
+      "loss": 3.9831,
+      "step": 24959
+    },
+    {
+      "epoch": 0.2496,
+      "grad_norm": 1.2369685441056684,
+      "learning_rate": 0.003,
+      "loss": 4.0049,
+      "step": 24960
+    },
+    {
+      "epoch": 0.24961,
+      "grad_norm": 1.3813918443348578,
+      "learning_rate": 0.003,
+      "loss": 3.9778,
+      "step": 24961
+    },
+    {
+      "epoch": 0.24962,
+      "grad_norm": 1.0030628330559601,
+      "learning_rate": 0.003,
+      "loss": 4.0039,
+      "step": 24962
+    },
+    {
+      "epoch": 0.24963,
+      "grad_norm": 1.3014232896652491,
+      "learning_rate": 0.003,
+      "loss": 4.0036,
+      "step": 24963
+    },
+    {
+      "epoch": 0.24964,
+      "grad_norm": 1.1956331930084867,
+      "learning_rate": 0.003,
+      "loss": 4.0217,
+      "step": 24964
+    },
+    {
+      "epoch": 0.24965,
+      "grad_norm": 1.356563197835384,
+      "learning_rate": 0.003,
+      "loss": 4.012,
+      "step": 24965
+    },
+    {
+      "epoch": 0.24966,
+      "grad_norm": 0.9755089726097715,
+      "learning_rate": 0.003,
+      "loss": 4.0139,
+      "step": 24966
+    },
+    {
+      "epoch": 0.24967,
+      "grad_norm": 1.3562426891920392,
+      "learning_rate": 0.003,
+      "loss": 4.0274,
+      "step": 24967
+    },
+    {
+      "epoch": 0.24968,
+      "grad_norm": 1.1466238053903122,
+      "learning_rate": 0.003,
+      "loss": 3.988,
+      "step": 24968
+    },
+    {
+      "epoch": 0.24969,
+      "grad_norm": 1.4219946951404148,
+      "learning_rate": 0.003,
+      "loss": 4.0021,
+      "step": 24969
+    },
+    {
+      "epoch": 0.2497,
+      "grad_norm": 1.3542650404664287,
+      "learning_rate": 0.003,
+      "loss": 3.9818,
+      "step": 24970
+    },
+    {
+      "epoch": 0.24971,
+      "grad_norm": 1.297008110025553,
+      "learning_rate": 0.003,
+      "loss": 4.0151,
+      "step": 24971
+    },
+    {
+      "epoch": 0.24972,
+      "grad_norm": 1.2922783175227976,
+      "learning_rate": 0.003,
+      "loss": 4.0216,
+      "step": 24972
+    },
+    {
+      "epoch": 0.24973,
+      "grad_norm": 1.196093398319463,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 24973
+    },
+    {
+      "epoch": 0.24974,
+      "grad_norm": 1.2255564615847019,
+      "learning_rate": 0.003,
+      "loss": 4.0157,
+      "step": 24974
+    },
+    {
+      "epoch": 0.24975,
+      "grad_norm": 1.2893941186461642,
+      "learning_rate": 0.003,
+      "loss": 3.9976,
+      "step": 24975
+    },
+    {
+      "epoch": 0.24976,
+      "grad_norm": 1.3047554085986857,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 24976
+    },
+    {
+      "epoch": 0.24977,
+      "grad_norm": 1.3328456934666635,
+      "learning_rate": 0.003,
+      "loss": 4.0089,
+      "step": 24977
+    },
+    {
+      "epoch": 0.24978,
+      "grad_norm": 1.4299312005817246,
+      "learning_rate": 0.003,
+      "loss": 3.9953,
+      "step": 24978
+    },
+    {
+      "epoch": 0.24979,
+      "grad_norm": 1.2446861552126227,
+      "learning_rate": 0.003,
+      "loss": 4.0301,
+      "step": 24979
+    },
+    {
+      "epoch": 0.2498,
+      "grad_norm": 1.3209754383417758,
+      "learning_rate": 0.003,
+      "loss": 3.9737,
+      "step": 24980
+    },
+    {
+      "epoch": 0.24981,
+      "grad_norm": 1.165391154919898,
+      "learning_rate": 0.003,
+      "loss": 3.9908,
+      "step": 24981
+    },
+    {
+      "epoch": 0.24982,
+      "grad_norm": 1.411780290857666,
+      "learning_rate": 0.003,
+      "loss": 3.9956,
+      "step": 24982
+    },
+    {
+      "epoch": 0.24983,
+      "grad_norm": 1.1164819095400895,
+      "learning_rate": 0.003,
+      "loss": 3.977,
+      "step": 24983
+    },
+    {
+      "epoch": 0.24984,
+      "grad_norm": 1.4304366564880564,
+      "learning_rate": 0.003,
+      "loss": 3.9521,
+      "step": 24984
+    },
+    {
+      "epoch": 0.24985,
+      "grad_norm": 1.2177555672595468,
+      "learning_rate": 0.003,
+      "loss": 4.0195,
+      "step": 24985
+    },
+    {
+      "epoch": 0.24986,
+      "grad_norm": 1.3659115081361797,
+      "learning_rate": 0.003,
+      "loss": 3.9957,
+      "step": 24986
+    },
+    {
+      "epoch": 0.24987,
+      "grad_norm": 1.5989730475575408,
+      "learning_rate": 0.003,
+      "loss": 4.0117,
+      "step": 24987
+    },
+    {
+      "epoch": 0.24988,
+      "grad_norm": 1.1118646182094676,
+      "learning_rate": 0.003,
+      "loss": 4.0189,
+      "step": 24988
+    },
+    {
+      "epoch": 0.24989,
+      "grad_norm": 1.3840877902302413,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 24989
+    },
+    {
+      "epoch": 0.2499,
+      "grad_norm": 1.428853365160207,
+      "learning_rate": 0.003,
+      "loss": 3.9944,
+      "step": 24990
+    },
+    {
+      "epoch": 0.24991,
+      "grad_norm": 0.8227728008148509,
+      "learning_rate": 0.003,
+      "loss": 3.9941,
+      "step": 24991
+    },
+    {
+      "epoch": 0.24992,
+      "grad_norm": 1.0930502279450027,
+      "learning_rate": 0.003,
+      "loss": 4.0256,
+      "step": 24992
+    },
+    {
+      "epoch": 0.24993,
+      "grad_norm": 1.4653652598884657,
+      "learning_rate": 0.003,
+      "loss": 4.0099,
+      "step": 24993
+    },
+    {
+      "epoch": 0.24994,
+      "grad_norm": 1.313163637321667,
+      "learning_rate": 0.003,
+      "loss": 3.9906,
+      "step": 24994
+    },
+    {
+      "epoch": 0.24995,
+      "grad_norm": 1.4100179493395204,
+      "learning_rate": 0.003,
+      "loss": 4.0199,
+      "step": 24995
+    },
+    {
+      "epoch": 0.24996,
+      "grad_norm": 1.0736960633565933,
+      "learning_rate": 0.003,
+      "loss": 4.0022,
+      "step": 24996
+    },
+    {
+      "epoch": 0.24997,
+      "grad_norm": 1.428969266481232,
+      "learning_rate": 0.003,
+      "loss": 3.9878,
+      "step": 24997
+    },
+    {
+      "epoch": 0.24998,
+      "grad_norm": 1.0164523449679026,
+      "learning_rate": 0.003,
+      "loss": 4.0159,
+      "step": 24998
+    },
+    {
+      "epoch": 0.24999,
+      "grad_norm": 1.3151121452946746,
+      "learning_rate": 0.003,
+      "loss": 4.0249,
+      "step": 24999
+    },
+    {
+      "epoch": 0.25,
+      "grad_norm": 1.254323579563658,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 25000
+    },
+    {
+      "epoch": 0.25001,
+      "grad_norm": 1.1726761094085252,
+      "learning_rate": 0.003,
+      "loss": 3.9737,
+      "step": 25001
+    },
+    {
+      "epoch": 0.25002,
+      "grad_norm": 1.4045875169063473,
+      "learning_rate": 0.003,
+      "loss": 3.9878,
+      "step": 25002
+    },
+    {
+      "epoch": 0.25003,
+      "grad_norm": 1.0666532407443317,
+      "learning_rate": 0.003,
+      "loss": 4.0008,
+      "step": 25003
+    },
+    {
+      "epoch": 0.25004,
+      "grad_norm": 1.4171042991201845,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 25004
+    },
+    {
+      "epoch": 0.25005,
+      "grad_norm": 1.2419184185156935,
+      "learning_rate": 0.003,
+      "loss": 3.9894,
+      "step": 25005
+    },
+    {
+      "epoch": 0.25006,
+      "grad_norm": 1.4458404209168565,
+      "learning_rate": 0.003,
+      "loss": 3.9988,
+      "step": 25006
+    },
+    {
+      "epoch": 0.25007,
+      "grad_norm": 1.4474515097419627,
+      "learning_rate": 0.003,
+      "loss": 3.9724,
+      "step": 25007
+    },
+    {
+      "epoch": 0.25008,
+      "grad_norm": 1.344288401360348,
+      "learning_rate": 0.003,
+      "loss": 4.0269,
+      "step": 25008
+    },
+    {
+      "epoch": 0.25009,
+      "grad_norm": 1.2552024724561957,
+      "learning_rate": 0.003,
+      "loss": 4.0035,
+      "step": 25009
+    },
+    {
+      "epoch": 0.2501,
+      "grad_norm": 1.3244401056196873,
+      "learning_rate": 0.003,
+      "loss": 3.9904,
+      "step": 25010
+    },
+    {
+      "epoch": 0.25011,
+      "grad_norm": 1.2906485025793337,
+      "learning_rate": 0.003,
+      "loss": 4.0149,
+      "step": 25011
+    },
+    {
+      "epoch": 0.25012,
+      "grad_norm": 1.385164181519221,
+      "learning_rate": 0.003,
+      "loss": 3.9978,
+      "step": 25012
+    },
+    {
+      "epoch": 0.25013,
+      "grad_norm": 1.2667395896622444,
+      "learning_rate": 0.003,
+      "loss": 3.9881,
+      "step": 25013
+    },
+    {
+      "epoch": 0.25014,
+      "grad_norm": 1.1685461260514172,
+      "learning_rate": 0.003,
+      "loss": 3.9724,
+      "step": 25014
+    },
+    {
+      "epoch": 0.25015,
+      "grad_norm": 1.2382602601767199,
+      "learning_rate": 0.003,
+      "loss": 3.9982,
+      "step": 25015
+    },
+    {
+      "epoch": 0.25016,
+      "grad_norm": 1.3898484666780762,
+      "learning_rate": 0.003,
+      "loss": 3.9953,
+      "step": 25016
+    },
+    {
+      "epoch": 0.25017,
+      "grad_norm": 1.1950526280595362,
+      "learning_rate": 0.003,
+      "loss": 4.0201,
+      "step": 25017
+    },
+    {
+      "epoch": 0.25018,
+      "grad_norm": 1.3439930092116594,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 25018
+    },
+    {
+      "epoch": 0.25019,
+      "grad_norm": 1.1091918665162197,
+      "learning_rate": 0.003,
+      "loss": 4.027,
+      "step": 25019
+    },
+    {
+      "epoch": 0.2502,
+      "grad_norm": 1.5459808958322396,
+      "learning_rate": 0.003,
+      "loss": 4.0212,
+      "step": 25020
+    },
+    {
+      "epoch": 0.25021,
+      "grad_norm": 1.0995884174168025,
+      "learning_rate": 0.003,
+      "loss": 4.0142,
+      "step": 25021
+    },
+    {
+      "epoch": 0.25022,
+      "grad_norm": 1.4436823119519229,
+      "learning_rate": 0.003,
+      "loss": 4.0135,
+      "step": 25022
+    },
+    {
+      "epoch": 0.25023,
+      "grad_norm": 1.1896949849217537,
+      "learning_rate": 0.003,
+      "loss": 3.9992,
+      "step": 25023
+    },
+    {
+      "epoch": 0.25024,
+      "grad_norm": 1.5035352707211171,
+      "learning_rate": 0.003,
+      "loss": 3.9881,
+      "step": 25024
+    },
+    {
+      "epoch": 0.25025,
+      "grad_norm": 1.2775174854752636,
+      "learning_rate": 0.003,
+      "loss": 4.0224,
+      "step": 25025
+    },
+    {
+      "epoch": 0.25026,
+      "grad_norm": 1.2478477156914143,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 25026
+    },
+    {
+      "epoch": 0.25027,
+      "grad_norm": 1.1798637218533474,
+      "learning_rate": 0.003,
+      "loss": 4.0092,
+      "step": 25027
+    },
+    {
+      "epoch": 0.25028,
+      "grad_norm": 1.3102537882474037,
+      "learning_rate": 0.003,
+      "loss": 3.9945,
+      "step": 25028
+    },
+    {
+      "epoch": 0.25029,
+      "grad_norm": 1.5984478095604737,
+      "learning_rate": 0.003,
+      "loss": 3.989,
+      "step": 25029
+    },
+    {
+      "epoch": 0.2503,
+      "grad_norm": 1.2416904504467885,
+      "learning_rate": 0.003,
+      "loss": 3.9941,
+      "step": 25030
+    },
+    {
+      "epoch": 0.25031,
+      "grad_norm": 1.4360778462055885,
+      "learning_rate": 0.003,
+      "loss": 4.0167,
+      "step": 25031
+    },
+    {
+      "epoch": 0.25032,
+      "grad_norm": 1.2692011377644292,
+      "learning_rate": 0.003,
+      "loss": 4.0059,
+      "step": 25032
+    },
+    {
+      "epoch": 0.25033,
+      "grad_norm": 1.1445657833053884,
+      "learning_rate": 0.003,
+      "loss": 4.0146,
+      "step": 25033
+    },
+    {
+      "epoch": 0.25034,
+      "grad_norm": 1.359884218420161,
+      "learning_rate": 0.003,
+      "loss": 3.991,
+      "step": 25034
+    },
+    {
+      "epoch": 0.25035,
+      "grad_norm": 1.0808704740704813,
+      "learning_rate": 0.003,
+      "loss": 3.9831,
+      "step": 25035
+    },
+    {
+      "epoch": 0.25036,
+      "grad_norm": 1.4554990343660752,
+      "learning_rate": 0.003,
+      "loss": 4.0067,
+      "step": 25036
+    },
+    {
+      "epoch": 0.25037,
+      "grad_norm": 1.2081846904534108,
+      "learning_rate": 0.003,
+      "loss": 4.0043,
+      "step": 25037
+    },
+    {
+      "epoch": 0.25038,
+      "grad_norm": 1.420385646145912,
+      "learning_rate": 0.003,
+      "loss": 4.011,
+      "step": 25038
+    },
+    {
+      "epoch": 0.25039,
+      "grad_norm": 1.0284145500044675,
+      "learning_rate": 0.003,
+      "loss": 3.9684,
+      "step": 25039
+    },
+    {
+      "epoch": 0.2504,
+      "grad_norm": 1.7626288397524545,
+      "learning_rate": 0.003,
+      "loss": 4.0184,
+      "step": 25040
+    },
+    {
+      "epoch": 0.25041,
+      "grad_norm": 1.0476374341182602,
+      "learning_rate": 0.003,
+      "loss": 4.0185,
+      "step": 25041
+    },
+    {
+      "epoch": 0.25042,
+      "grad_norm": 1.2344905484621174,
+      "learning_rate": 0.003,
+      "loss": 4.0013,
+      "step": 25042
+    },
+    {
+      "epoch": 0.25043,
+      "grad_norm": 1.287590250882196,
+      "learning_rate": 0.003,
+      "loss": 4.0036,
+      "step": 25043
+    },
+    {
+      "epoch": 0.25044,
+      "grad_norm": 1.251201406378147,
+      "learning_rate": 0.003,
+      "loss": 4.0225,
+      "step": 25044
+    },
+    {
+      "epoch": 0.25045,
+      "grad_norm": 1.1603752977637478,
+      "learning_rate": 0.003,
+      "loss": 3.9837,
+      "step": 25045
+    },
+    {
+      "epoch": 0.25046,
+      "grad_norm": 1.5818933233120087,
+      "learning_rate": 0.003,
+      "loss": 3.9665,
+      "step": 25046
+    },
+    {
+      "epoch": 0.25047,
+      "grad_norm": 1.0123998535568026,
+      "learning_rate": 0.003,
+      "loss": 4.0131,
+      "step": 25047
+    },
+    {
+      "epoch": 0.25048,
+      "grad_norm": 1.5230259799784935,
+      "learning_rate": 0.003,
+      "loss": 3.9665,
+      "step": 25048
+    },
+    {
+      "epoch": 0.25049,
+      "grad_norm": 1.1002495591504406,
+      "learning_rate": 0.003,
+      "loss": 4.0176,
+      "step": 25049
+    },
+    {
+      "epoch": 0.2505,
+      "grad_norm": 1.5632387185638208,
+      "learning_rate": 0.003,
+      "loss": 4.0283,
+      "step": 25050
+    },
+    {
+      "epoch": 0.25051,
+      "grad_norm": 1.1677807188752878,
+      "learning_rate": 0.003,
+      "loss": 3.9972,
+      "step": 25051
+    },
+    {
+      "epoch": 0.25052,
+      "grad_norm": 1.3558667064894316,
+      "learning_rate": 0.003,
+      "loss": 3.9775,
+      "step": 25052
+    },
+    {
+      "epoch": 0.25053,
+      "grad_norm": 1.1108157977919293,
+      "learning_rate": 0.003,
+      "loss": 4.0145,
+      "step": 25053
+    },
+    {
+      "epoch": 0.25054,
+      "grad_norm": 1.4295244642500986,
+      "learning_rate": 0.003,
+      "loss": 3.9853,
+      "step": 25054
+    },
+    {
+      "epoch": 0.25055,
+      "grad_norm": 1.209986825566858,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 25055
+    },
+    {
+      "epoch": 0.25056,
+      "grad_norm": 1.4415529503890356,
+      "learning_rate": 0.003,
+      "loss": 4.0125,
+      "step": 25056
+    },
+    {
+      "epoch": 0.25057,
+      "grad_norm": 1.1665958795916318,
+      "learning_rate": 0.003,
+      "loss": 4.0178,
+      "step": 25057
+    },
+    {
+      "epoch": 0.25058,
+      "grad_norm": 1.2801314041438154,
+      "learning_rate": 0.003,
+      "loss": 4.0065,
+      "step": 25058
+    },
+    {
+      "epoch": 0.25059,
+      "grad_norm": 1.2596978877890534,
+      "learning_rate": 0.003,
+      "loss": 3.9716,
+      "step": 25059
+    },
+    {
+      "epoch": 0.2506,
+      "grad_norm": 1.4610789188335271,
+      "learning_rate": 0.003,
+      "loss": 3.9958,
+      "step": 25060
+    },
+    {
+      "epoch": 0.25061,
+      "grad_norm": 1.2214315699253864,
+      "learning_rate": 0.003,
+      "loss": 3.9891,
+      "step": 25061
+    },
+    {
+      "epoch": 0.25062,
+      "grad_norm": 1.180662794849378,
+      "learning_rate": 0.003,
+      "loss": 4.0073,
+      "step": 25062
+    },
+    {
+      "epoch": 0.25063,
+      "grad_norm": 1.2172355133895043,
+      "learning_rate": 0.003,
+      "loss": 4.0043,
+      "step": 25063
+    },
+    {
+      "epoch": 0.25064,
+      "grad_norm": 1.2915031233972083,
+      "learning_rate": 0.003,
+      "loss": 4.0141,
+      "step": 25064
+    },
+    {
+      "epoch": 0.25065,
+      "grad_norm": 1.2633163673204781,
+      "learning_rate": 0.003,
+      "loss": 4.0062,
+      "step": 25065
+    },
+    {
+      "epoch": 0.25066,
+      "grad_norm": 1.2842987092356644,
+      "learning_rate": 0.003,
+      "loss": 4.0045,
+      "step": 25066
+    },
+    {
+      "epoch": 0.25067,
+      "grad_norm": 1.2449155356154518,
+      "learning_rate": 0.003,
+      "loss": 4.0182,
+      "step": 25067
+    },
+    {
+      "epoch": 0.25068,
+      "grad_norm": 1.1319140547539226,
+      "learning_rate": 0.003,
+      "loss": 3.9598,
+      "step": 25068
+    },
+    {
+      "epoch": 0.25069,
+      "grad_norm": 1.3042409553557093,
+      "learning_rate": 0.003,
+      "loss": 4.0115,
+      "step": 25069
+    },
+    {
+      "epoch": 0.2507,
+      "grad_norm": 1.0581001685939164,
+      "learning_rate": 0.003,
+      "loss": 4.0019,
+      "step": 25070
+    },
+    {
+      "epoch": 0.25071,
+      "grad_norm": 1.2483629391189257,
+      "learning_rate": 0.003,
+      "loss": 3.9796,
+      "step": 25071
+    },
+    {
+      "epoch": 0.25072,
+      "grad_norm": 1.1008380440341259,
+      "learning_rate": 0.003,
+      "loss": 3.9909,
+      "step": 25072
+    },
+    {
+      "epoch": 0.25073,
+      "grad_norm": 1.3226570191471951,
+      "learning_rate": 0.003,
+      "loss": 3.9992,
+      "step": 25073
+    },
+    {
+      "epoch": 0.25074,
+      "grad_norm": 1.0957056478848737,
+      "learning_rate": 0.003,
+      "loss": 4.0287,
+      "step": 25074
+    },
+    {
+      "epoch": 0.25075,
+      "grad_norm": 1.4135190984019916,
+      "learning_rate": 0.003,
+      "loss": 3.965,
+      "step": 25075
+    },
+    {
+      "epoch": 0.25076,
+      "grad_norm": 1.1007445010283923,
+      "learning_rate": 0.003,
+      "loss": 3.9983,
+      "step": 25076
+    },
+    {
+      "epoch": 0.25077,
+      "grad_norm": 1.484491096870048,
+      "learning_rate": 0.003,
+      "loss": 3.9921,
+      "step": 25077
+    },
+    {
+      "epoch": 0.25078,
+      "grad_norm": 1.411550288990779,
+      "learning_rate": 0.003,
+      "loss": 4.0085,
+      "step": 25078
+    },
+    {
+      "epoch": 0.25079,
+      "grad_norm": 1.3934041861581046,
+      "learning_rate": 0.003,
+      "loss": 3.9765,
+      "step": 25079
+    },
+    {
+      "epoch": 0.2508,
+      "grad_norm": 1.1466706727315046,
+      "learning_rate": 0.003,
+      "loss": 3.9953,
+      "step": 25080
+    },
+    {
+      "epoch": 0.25081,
+      "grad_norm": 1.2514075140712781,
+      "learning_rate": 0.003,
+      "loss": 3.9772,
+      "step": 25081
+    },
+    {
+      "epoch": 0.25082,
+      "grad_norm": 1.1267217442791468,
+      "learning_rate": 0.003,
+      "loss": 4.0113,
+      "step": 25082
+    },
+    {
+      "epoch": 0.25083,
+      "grad_norm": 1.4863117364363891,
+      "learning_rate": 0.003,
+      "loss": 3.9928,
+      "step": 25083
+    },
+    {
+      "epoch": 0.25084,
+      "grad_norm": 1.103359801154759,
+      "learning_rate": 0.003,
+      "loss": 3.9935,
+      "step": 25084
+    },
+    {
+      "epoch": 0.25085,
+      "grad_norm": 1.3800309277846032,
+      "learning_rate": 0.003,
+      "loss": 4.0027,
+      "step": 25085
+    },
+    {
+      "epoch": 0.25086,
+      "grad_norm": 1.162740401521776,
+      "learning_rate": 0.003,
+      "loss": 3.9987,
+      "step": 25086
+    },
+    {
+      "epoch": 0.25087,
+      "grad_norm": 1.597006331463267,
+      "learning_rate": 0.003,
+      "loss": 4.0181,
+      "step": 25087
+    },
+    {
+      "epoch": 0.25088,
+      "grad_norm": 1.1027234939442274,
+      "learning_rate": 0.003,
+      "loss": 4.0221,
+      "step": 25088
+    },
+    {
+      "epoch": 0.25089,
+      "grad_norm": 1.3551766858115322,
+      "learning_rate": 0.003,
+      "loss": 4.0147,
+      "step": 25089
+    },
+    {
+      "epoch": 0.2509,
+      "grad_norm": 1.1586851154708737,
+      "learning_rate": 0.003,
+      "loss": 3.9982,
+      "step": 25090
+    },
+    {
+      "epoch": 0.25091,
+      "grad_norm": 1.3349411451456055,
+      "learning_rate": 0.003,
+      "loss": 3.9842,
+      "step": 25091
+    },
+    {
+      "epoch": 0.25092,
+      "grad_norm": 1.3561597720274357,
+      "learning_rate": 0.003,
+      "loss": 4.0271,
+      "step": 25092
+    },
+    {
+      "epoch": 0.25093,
+      "grad_norm": 1.1554365109647118,
+      "learning_rate": 0.003,
+      "loss": 4.0134,
+      "step": 25093
+    },
+    {
+      "epoch": 0.25094,
+      "grad_norm": 1.4190318079556719,
+      "learning_rate": 0.003,
+      "loss": 3.9739,
+      "step": 25094
+    },
+    {
+      "epoch": 0.25095,
+      "grad_norm": 1.1313655810032637,
+      "learning_rate": 0.003,
+      "loss": 4.0002,
+      "step": 25095
+    },
+    {
+      "epoch": 0.25096,
+      "grad_norm": 1.4685216677357762,
+      "learning_rate": 0.003,
+      "loss": 3.9987,
+      "step": 25096
+    },
+    {
+      "epoch": 0.25097,
+      "grad_norm": 1.0373720468829517,
+      "learning_rate": 0.003,
+      "loss": 3.9951,
+      "step": 25097
+    },
+    {
+      "epoch": 0.25098,
+      "grad_norm": 1.6232857301906507,
+      "learning_rate": 0.003,
+      "loss": 3.9932,
+      "step": 25098
+    },
+    {
+      "epoch": 0.25099,
+      "grad_norm": 1.196393702303139,
+      "learning_rate": 0.003,
+      "loss": 3.9624,
+      "step": 25099
+    },
+    {
+      "epoch": 0.251,
+      "grad_norm": 1.3127359106121608,
+      "learning_rate": 0.003,
+      "loss": 3.9965,
+      "step": 25100
+    },
+    {
+      "epoch": 0.25101,
+      "grad_norm": 1.187779498995851,
+      "learning_rate": 0.003,
+      "loss": 3.9979,
+      "step": 25101
+    },
+    {
+      "epoch": 0.25102,
+      "grad_norm": 1.540842955527268,
+      "learning_rate": 0.003,
+      "loss": 3.9668,
+      "step": 25102
+    },
+    {
+      "epoch": 0.25103,
+      "grad_norm": 1.1835194558085675,
+      "learning_rate": 0.003,
+      "loss": 3.9932,
+      "step": 25103
+    },
+    {
+      "epoch": 0.25104,
+      "grad_norm": 1.4018586159666628,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 25104
+    },
+    {
+      "epoch": 0.25105,
+      "grad_norm": 0.9491764613483369,
+      "learning_rate": 0.003,
+      "loss": 4.0116,
+      "step": 25105
+    },
+    {
+      "epoch": 0.25106,
+      "grad_norm": 1.2492174734338268,
+      "learning_rate": 0.003,
+      "loss": 3.9962,
+      "step": 25106
+    },
+    {
+      "epoch": 0.25107,
+      "grad_norm": 1.3414113254751727,
+      "learning_rate": 0.003,
+      "loss": 3.9674,
+      "step": 25107
+    },
+    {
+      "epoch": 0.25108,
+      "grad_norm": 1.2969306780983616,
+      "learning_rate": 0.003,
+      "loss": 3.979,
+      "step": 25108
+    },
+    {
+      "epoch": 0.25109,
+      "grad_norm": 1.1089514722242826,
+      "learning_rate": 0.003,
+      "loss": 3.9878,
+      "step": 25109
+    },
+    {
+      "epoch": 0.2511,
+      "grad_norm": 1.354589217408254,
+      "learning_rate": 0.003,
+      "loss": 4.0129,
+      "step": 25110
+    },
+    {
+      "epoch": 0.25111,
+      "grad_norm": 1.3187397714165523,
+      "learning_rate": 0.003,
+      "loss": 3.9794,
+      "step": 25111
+    },
+    {
+      "epoch": 0.25112,
+      "grad_norm": 1.1377497986390446,
+      "learning_rate": 0.003,
+      "loss": 3.9758,
+      "step": 25112
+    },
+    {
+      "epoch": 0.25113,
+      "grad_norm": 1.4867237558017687,
+      "learning_rate": 0.003,
+      "loss": 4.0329,
+      "step": 25113
+    },
+    {
+      "epoch": 0.25114,
+      "grad_norm": 1.1092503749418137,
+      "learning_rate": 0.003,
+      "loss": 4.0018,
+      "step": 25114
+    },
+    {
+      "epoch": 0.25115,
+      "grad_norm": 1.4369903314914072,
+      "learning_rate": 0.003,
+      "loss": 3.9785,
+      "step": 25115
+    },
+    {
+      "epoch": 0.25116,
+      "grad_norm": 1.0660633646814148,
+      "learning_rate": 0.003,
+      "loss": 3.9745,
+      "step": 25116
+    },
+    {
+      "epoch": 0.25117,
+      "grad_norm": 1.356633713501841,
+      "learning_rate": 0.003,
+      "loss": 4.0164,
+      "step": 25117
+    },
+    {
+      "epoch": 0.25118,
+      "grad_norm": 1.1115858971824126,
+      "learning_rate": 0.003,
+      "loss": 4.0079,
+      "step": 25118
+    },
+    {
+      "epoch": 0.25119,
+      "grad_norm": 1.460476541635725,
+      "learning_rate": 0.003,
+      "loss": 3.9575,
+      "step": 25119
+    },
+    {
+      "epoch": 0.2512,
+      "grad_norm": 1.4003082601877483,
+      "learning_rate": 0.003,
+      "loss": 3.9976,
+      "step": 25120
+    },
+    {
+      "epoch": 0.25121,
+      "grad_norm": 1.427055256843793,
+      "learning_rate": 0.003,
+      "loss": 4.0223,
+      "step": 25121
+    },
+    {
+      "epoch": 0.25122,
+      "grad_norm": 1.1531254619200084,
+      "learning_rate": 0.003,
+      "loss": 4.0057,
+      "step": 25122
+    },
+    {
+      "epoch": 0.25123,
+      "grad_norm": 1.3855664234315315,
+      "learning_rate": 0.003,
+      "loss": 3.9927,
+      "step": 25123
+    },
+    {
+      "epoch": 0.25124,
+      "grad_norm": 1.3301051042753584,
+      "learning_rate": 0.003,
+      "loss": 4.0172,
+      "step": 25124
+    },
+    {
+      "epoch": 0.25125,
+      "grad_norm": 1.0969464788555006,
+      "learning_rate": 0.003,
+      "loss": 4.0108,
+      "step": 25125
+    },
+    {
+      "epoch": 0.25126,
+      "grad_norm": 1.542106184261514,
+      "learning_rate": 0.003,
+      "loss": 4.0132,
+      "step": 25126
+    },
+    {
+      "epoch": 0.25127,
+      "grad_norm": 1.0859914450786448,
+      "learning_rate": 0.003,
+      "loss": 3.9864,
+      "step": 25127
+    },
+    {
+      "epoch": 0.25128,
+      "grad_norm": 1.5682000785268324,
+      "learning_rate": 0.003,
+      "loss": 4.0336,
+      "step": 25128
+    },
+    {
+      "epoch": 0.25129,
+      "grad_norm": 1.1372008139939256,
+      "learning_rate": 0.003,
+      "loss": 3.9903,
+      "step": 25129
+    },
+    {
+      "epoch": 0.2513,
+      "grad_norm": 1.244169546610798,
+      "learning_rate": 0.003,
+      "loss": 3.9882,
+      "step": 25130
+    },
+    {
+      "epoch": 0.25131,
+      "grad_norm": 1.341525172848858,
+      "learning_rate": 0.003,
+      "loss": 4.0179,
+      "step": 25131
+    },
+    {
+      "epoch": 0.25132,
+      "grad_norm": 1.1902965429071324,
+      "learning_rate": 0.003,
+      "loss": 3.9937,
+      "step": 25132
+    },
+    {
+      "epoch": 0.25133,
+      "grad_norm": 1.21664317148608,
+      "learning_rate": 0.003,
+      "loss": 3.9843,
+      "step": 25133
+    },
+    {
+      "epoch": 0.25134,
+      "grad_norm": 1.62935504626529,
+      "learning_rate": 0.003,
+      "loss": 3.9939,
+      "step": 25134
+    },
+    {
+      "epoch": 0.25135,
+      "grad_norm": 0.983262991612043,
+      "learning_rate": 0.003,
+      "loss": 4.0119,
+      "step": 25135
+    },
+    {
+      "epoch": 0.25136,
+      "grad_norm": 1.3668242801694626,
+      "learning_rate": 0.003,
+      "loss": 3.9615,
+      "step": 25136
+    },
+    {
+      "epoch": 0.25137,
+      "grad_norm": 1.0661503263671175,
+      "learning_rate": 0.003,
+      "loss": 4.0075,
+      "step": 25137
+    },
+    {
+      "epoch": 0.25138,
+      "grad_norm": 1.2571871536846977,
+      "learning_rate": 0.003,
+      "loss": 4.0056,
+      "step": 25138
+    },
+    {
+      "epoch": 0.25139,
+      "grad_norm": 1.386189478957809,
+      "learning_rate": 0.003,
+      "loss": 4.0051,
+      "step": 25139
+    },
+    {
+      "epoch": 0.2514,
+      "grad_norm": 1.249122257336315,
+      "learning_rate": 0.003,
+      "loss": 3.9969,
+      "step": 25140
+    },
+    {
+      "epoch": 0.25141,
+      "grad_norm": 1.1965421416930204,
+      "learning_rate": 0.003,
+      "loss": 3.9934,
+      "step": 25141
+    },
+    {
+      "epoch": 0.25142,
+      "grad_norm": 1.1838799472465256,
+      "learning_rate": 0.003,
+      "loss": 3.993,
+      "step": 25142
+    },
+    {
+      "epoch": 0.25143,
+      "grad_norm": 1.3025266597897034,
+      "learning_rate": 0.003,
+      "loss": 4.0,
+      "step": 25143
+    },
+    {
+      "epoch": 0.25144,
+      "grad_norm": 1.18657979146309,
+      "learning_rate": 0.003,
+      "loss": 4.0106,
+      "step": 25144
+    },
+    {
+      "epoch": 0.25145,
+      "grad_norm": 1.3677950663185916,
+      "learning_rate": 0.003,
+      "loss": 4.0086,
+      "step": 25145
+    },
+    {
+      "epoch": 0.25146,
+      "grad_norm": 1.1958745544632032,
+      "learning_rate": 0.003,
+      "loss": 3.9825,
+      "step": 25146
+    },
+    {
+      "epoch": 0.25147,
+      "grad_norm": 1.2946924532072932,
+      "learning_rate": 0.003,
+      "loss": 4.009,
+      "step": 25147
+    },
+    {
+      "epoch": 0.25148,
+      "grad_norm": 1.4322931387944158,
+      "learning_rate": 0.003,
+      "loss": 4.028,
+      "step": 25148
+    },
+    {
+      "epoch": 0.25149,
+      "grad_norm": 1.2096735197208432,
+      "learning_rate": 0.003,
+      "loss": 3.9721,
+      "step": 25149
+    },
+    {
+      "epoch": 0.2515,
+      "grad_norm": 1.512754787657585,
+      "learning_rate": 0.003,
+      "loss": 4.0186,
+      "step": 25150
+    },
+    {
+      "epoch": 0.25151,
+      "grad_norm": 1.0603699377936373,
+      "learning_rate": 0.003,
+      "loss": 4.0047,
+      "step": 25151
+    },
+    {
+      "epoch": 0.25152,
+      "grad_norm": 1.2236630487713256,
+      "learning_rate": 0.003,
+      "loss": 4.0171,
+      "step": 25152
+    },
+    {
+      "epoch": 0.25153,
+      "grad_norm": 1.0697301654769433,
+      "learning_rate": 0.003,
+      "loss": 4.0009,
+      "step": 25153
+    },
+    {
+      "epoch": 0.25154,
+      "grad_norm": 1.455168917947051,
+      "learning_rate": 0.003,
+      "loss": 4.0305,
+      "step": 25154
+    },
+    {
+      "epoch": 0.25155,
+      "grad_norm": 1.0675317463582599,
+      "learning_rate": 0.003,
+      "loss": 4.0102,
+      "step": 25155
+    },
+    {
+      "epoch": 0.25156,
+      "grad_norm": 1.4702213166602978,
+      "learning_rate": 0.003,
+      "loss": 3.9786,
+      "step": 25156
+    },
+    {
+      "epoch": 0.25157,
+      "grad_norm": 0.891599590569262,
+      "learning_rate": 0.003,
+      "loss": 3.9799,
+      "step": 25157
+    },
+    {
+      "epoch": 0.25158,
+      "grad_norm": 1.1900695477136447,
+      "learning_rate": 0.003,
+      "loss": 3.999,
+      "step": 25158
+    },
+    {
+      "epoch": 0.25159,
+      "grad_norm": 1.104095250450303,
+      "learning_rate": 0.003,
+      "loss": 3.9945,
+      "step": 25159
+    },
+    {
+      "epoch": 0.2516,
+      "grad_norm": 1.401518669131215,
+      "learning_rate": 0.003,
+      "loss": 3.9853,
+      "step": 25160
+    },
+    {
+      "epoch": 0.25161,
+      "grad_norm": 1.1850669746182465,
+      "learning_rate": 0.003,
+      "loss": 3.9976,
+      "step": 25161
+    },
+    {
+      "epoch": 0.25162,
+      "grad_norm": 1.458152466301285,
+      "learning_rate": 0.003,
+      "loss": 3.9903,
+      "step": 25162
+    },
+    {
+      "epoch": 0.25163,
+      "grad_norm": 1.3910255815758166,
+      "learning_rate": 0.003,
+      "loss": 4.0066,
+      "step": 25163
+    },
+    {
+      "epoch": 0.25164,
+      "grad_norm": 1.4695814526810207,
+      "learning_rate": 0.003,
+      "loss": 4.0063,
+      "step": 25164
+    },
+    {
+      "epoch": 0.25165,
+      "grad_norm": 1.4813313075226162,
+      "learning_rate": 0.003,
+      "loss": 4.0399,
+      "step": 25165
+    },
+    {
+      "epoch": 0.25166,
+      "grad_norm": 1.0686759109957347,
+      "learning_rate": 0.003,
+      "loss": 3.9948,
+      "step": 25166
+    },
+    {
+      "epoch": 0.25167,
+      "grad_norm": 1.4111212662415484,
+      "learning_rate": 0.003,
+      "loss": 4.015,
+      "step": 25167
+    },
+    {
+      "epoch": 0.25168,
+      "grad_norm": 1.0871740902917189,
+      "learning_rate": 0.003,
+      "loss": 3.9824,
+      "step": 25168
+    },
+    {
+      "epoch": 0.25169,
+      "grad_norm": 1.288619007902377,
+      "learning_rate": 0.003,
+      "loss": 4.0121,
+      "step": 25169
+    },
+    {
+      "epoch": 0.2517,
+      "grad_norm": 1.3197341762503663,
+      "learning_rate": 0.003,
+      "loss": 4.0015,
+      "step": 25170
+    },
+    {
+      "epoch": 0.25171,
+      "grad_norm": 1.458035980076775,
+      "learning_rate": 0.003,
+      "loss": 3.9791,
+      "step": 25171
+    },
+    {
+      "epoch": 0.25172,
+      "grad_norm": 1.477493630679087,
+      "learning_rate": 0.003,
+      "loss": 4.0054,
+      "step": 25172
+    },
+    {
+      "epoch": 0.25173,
+      "grad_norm": 0.8756368242157456,
+      "learning_rate": 0.003,
+      "loss": 3.9476,
+      "step": 25173
+    },
+    {
+      "epoch": 0.25174,
+      "grad_norm": 1.1118569887598118,
+      "learning_rate": 0.003,
+      "loss": 4.0173,
+      "step": 25174
+    },
+    {
+      "epoch": 0.25175,
+      "grad_norm": 1.3639301518925628,
+      "learning_rate": 0.003,
+      "loss": 4.0028,
+      "step": 25175
+    },
+    {
+      "epoch": 0.25176,
+      "grad_norm": 1.213757711184685,
+      "learning_rate": 0.003,
+      "loss": 4.0002,
+      "step": 25176
+    },
+    {
+      "epoch": 0.25177,
+      "grad_norm": 1.6151701647760228,
+      "learning_rate": 0.003,
+      "loss": 3.9915,
+      "step": 25177
+    },
+    {
+      "epoch": 0.25178,
+      "grad_norm": 1.0077703080661762,
+      "learning_rate": 0.003,
+      "loss": 4.0291,
+      "step": 25178
+    },
+    {
+      "epoch": 0.25179,
+      "grad_norm": 1.539495128379226,
+      "learning_rate": 0.003,
+      "loss": 4.0057,
+      "step": 25179
+    },
+    {
+      "epoch": 0.2518,
+      "grad_norm": 1.0152261527528479,
+      "learning_rate": 0.003,
+      "loss": 4.0257,
+      "step": 25180
+    },
+    {
+      "epoch": 0.25181,
+      "grad_norm": 1.5112433044823934,
+      "learning_rate": 0.003,
+      "loss": 3.9906,
+      "step": 25181
+    },
+    {
+      "epoch": 0.25182,
+      "grad_norm": 1.0284760943801041,
+      "learning_rate": 0.003,
+      "loss": 4.0441,
+      "step": 25182
+    },
+    {
+      "epoch": 0.25183,
+      "grad_norm": 1.57293651810338,
+      "learning_rate": 0.003,
+      "loss": 4.0129,
+      "step": 25183
+    },
+    {
+      "epoch": 0.25184,
+      "grad_norm": 1.2385443670694518,
+      "learning_rate": 0.003,
+      "loss": 3.9965,
+      "step": 25184
+    },
+    {
+      "epoch": 0.25185,
+      "grad_norm": 1.2218602291849459,
+      "learning_rate": 0.003,
+      "loss": 3.9808,
+      "step": 25185
+    },
+    {
+      "epoch": 0.25186,
+      "grad_norm": 1.3207421638472514,
+      "learning_rate": 0.003,
+      "loss": 4.0004,
+      "step": 25186
+    },
+    {
+      "epoch": 0.25187,
+      "grad_norm": 1.255934901484141,
+      "learning_rate": 0.003,
+      "loss": 4.0303,
+      "step": 25187
+    },
+    {
+      "epoch": 0.25188,
+      "grad_norm": 1.3882667359737606,
+      "learning_rate": 0.003,
+      "loss": 3.9913,
+      "step": 25188
+    },
+    {
+      "epoch": 0.25189,
+      "grad_norm": 1.0776369648865534,
+      "learning_rate": 0.003,
+      "loss": 4.0424,
+      "step": 25189
+    },
+    {
+      "epoch": 0.2519,
+      "grad_norm": 1.5583511505853782,
+      "learning_rate": 0.003,
+      "loss": 4.0037,
+      "step": 25190
+    },
+    {
+      "epoch": 0.25191,
+      "grad_norm": 1.16380485712266,
+      "learning_rate": 0.003,
+      "loss": 3.9966,
+      "step": 25191
+    },
+    {
+      "epoch": 0.25192,
+      "grad_norm": 1.4706175497023224,
+      "learning_rate": 0.003,
+      "loss": 3.9908,
+      "step": 25192
+    },
+    {
+      "epoch": 0.25193,
+      "grad_norm": 1.1624328636772914,
+      "learning_rate": 0.003,
+      "loss": 3.9833,
+      "step": 25193
+    },
+    {
+      "epoch": 0.25194,
+      "grad_norm": 1.3635317356797707,
+      "learning_rate": 0.003,
+      "loss": 3.971,
+      "step": 25194
+    },
+    {
+      "epoch": 0.25195,
+      "grad_norm": 1.3668509878113857,
+      "learning_rate": 0.003,
+      "loss": 4.0132,
+      "step": 25195
+    },
+    {
+      "epoch": 0.25196,
+      "grad_norm": 1.323053035960398,
+      "learning_rate": 0.003,
+      "loss": 4.0014,
+      "step": 25196
+    },
+    {
+      "epoch": 0.25197,
+      "grad_norm": 1.4247377160347867,
+      "learning_rate": 0.003,
+      "loss": 3.9869,
+      "step": 25197
+    },
+    {
+      "epoch": 0.25198,
+      "grad_norm": 1.1380930403120582,
+      "learning_rate": 0.003,
+      "loss": 3.9902,
+      "step": 25198
+    },
+    {
+      "epoch": 0.25199,
+      "grad_norm": 1.289942416828056,
+      "learning_rate": 0.003,
+      "loss": 3.9701,
+      "step": 25199
+    },
+    {
+      "epoch": 0.252,
+      "grad_norm": 1.3214210308825594,
+      "learning_rate": 0.003,
+      "loss": 4.0059,
+      "step": 25200
+    },
+    {
+      "epoch": 0.25201,
+      "grad_norm": 1.243204705905994,
+      "learning_rate": 0.003,
+      "loss": 3.9963,
+      "step": 25201
+    },
+    {
+      "epoch": 0.25202,
+      "grad_norm": 1.336410204260555,
+      "learning_rate": 0.003,
+      "loss": 3.9815,
+      "step": 25202
+    },
+    {
+      "epoch": 0.25203,
+      "grad_norm": 0.9210018587813728,
+      "learning_rate": 0.003,
+      "loss": 3.9548,
+      "step": 25203
+    },
+    {
+      "epoch": 0.25204,
+      "grad_norm": 1.3932379562169532,
+      "learning_rate": 0.003,
+      "loss": 4.0082,
+      "step": 25204
+    },
+    {
+      "epoch": 0.25205,
+      "grad_norm": 1.3422730554155937,
+      "learning_rate": 0.003,
+      "loss": 3.9825,
+      "step": 25205
+    },
+    {
+      "epoch": 0.25206,
+      "grad_norm": 1.4678703839188953,
+      "learning_rate": 0.003,
+      "loss": 4.0038,
+      "step": 25206
+    },
+    {
+      "epoch": 0.25207,
+      "grad_norm": 1.2092573996741576,
+      "learning_rate": 0.003,
+      "loss": 3.9889,
+      "step": 25207
+    },
+    {
+      "epoch": 0.25208,
+      "grad_norm": 1.2443302042790751,
+      "learning_rate": 0.003,
+      "loss": 3.9917,
+      "step": 25208
+    },
+    {
+      "epoch": 0.25209,
+      "grad_norm": 1.2473350723406134,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 25209
+    },
+    {
+      "epoch": 0.2521,
+      "grad_norm": 1.2570284981185613,
+      "learning_rate": 0.003,
+      "loss": 3.9994,
+      "step": 25210
+    },
+    {
+      "epoch": 0.25211,
+      "grad_norm": 1.320906335009051,
+      "learning_rate": 0.003,
+      "loss": 4.0134,
+      "step": 25211
+    },
+    {
+      "epoch": 0.25212,
+      "grad_norm": 1.0999760495138171,
+      "learning_rate": 0.003,
+      "loss": 3.981,
+      "step": 25212
+    },
+    {
+      "epoch": 0.25213,
+      "grad_norm": 1.3819490699103154,
+      "learning_rate": 0.003,
+      "loss": 3.9925,
+      "step": 25213
+    },
+    {
+      "epoch": 0.25214,
+      "grad_norm": 1.4425082634587052,
+      "learning_rate": 0.003,
+      "loss": 3.9751,
+      "step": 25214
+    },
+    {
+      "epoch": 0.25215,
+      "grad_norm": 1.1558580715538946,
+      "learning_rate": 0.003,
+      "loss": 4.0005,
+      "step": 25215
+    },
+    {
+      "epoch": 0.25216,
+      "grad_norm": 1.1625197861709324,
+      "learning_rate": 0.003,
+      "loss": 4.0089,
+      "step": 25216
+    },
+    {
+      "epoch": 0.25217,
+      "grad_norm": 1.2875101162013372,
+      "learning_rate": 0.003,
+      "loss": 3.9683,
+      "step": 25217
+    },
+    {
+      "epoch": 0.25218,
+      "grad_norm": 1.3339198932884528,
+      "learning_rate": 0.003,
+      "loss": 4.008,
+      "step": 25218
+    },
+    {
+      "epoch": 0.25219,
+      "grad_norm": 1.1188015534506348,
+      "learning_rate": 0.003,
+      "loss": 3.984,
+      "step": 25219
+    },
+    {
+      "epoch": 0.2522,
+      "grad_norm": 1.2921078280968838,
+      "learning_rate": 0.003,
+      "loss": 4.0039,
+      "step": 25220
+    },
+    {
+      "epoch": 0.25221,
+      "grad_norm": 1.156254608061559,
+      "learning_rate": 0.003,
+      "loss": 4.0419,
+      "step": 25221
+    },
+    {
+      "epoch": 0.25222,
+      "grad_norm": 1.3518333750012181,
+      "learning_rate": 0.003,
+      "loss": 4.0208,
+      "step": 25222
+    },
+    {
+      "epoch": 0.25223,
+      "grad_norm": 1.2759068376304437,
+      "learning_rate": 0.003,
+      "loss": 3.991,
+      "step": 25223
+    },
+    {
+      "epoch": 0.25224,
+      "grad_norm": 1.3471110223508431,
+      "learning_rate": 0.003,
+      "loss": 3.9992,
+      "step": 25224
+    },
+    {
+      "epoch": 0.25225,
+      "grad_norm": 1.0440758873330132,
+      "learning_rate": 0.003,
+      "loss": 3.9773,
+      "step": 25225
+    },
+    {
+      "epoch": 0.25226,
+      "grad_norm": 1.5059581408859932,
+      "learning_rate": 0.003,
+      "loss": 4.0391,
+      "step": 25226
+    },
+    {
+      "epoch": 0.25227,
+      "grad_norm": 1.4255770268044303,
+      "learning_rate": 0.003,
+      "loss": 4.0133,
+      "step": 25227
+    },
+    {
+      "epoch": 0.25228,
+      "grad_norm": 1.187510585857748,
+      "learning_rate": 0.003,
+      "loss": 3.9952,
+      "step": 25228
+    },
+    {
+      "epoch": 0.25229,
+      "grad_norm": 1.354767786105194,
+      "learning_rate": 0.003,
+      "loss": 3.9925,
+      "step": 25229
+    },
+    {
+      "epoch": 0.2523,
+      "grad_norm": 1.2678444760428609,
+      "learning_rate": 0.003,
+      "loss": 3.9893,
+      "step": 25230
+    },
+    {
+      "epoch": 0.25231,
+      "grad_norm": 1.398992925007705,
+      "learning_rate": 0.003,
+      "loss": 3.9798,
+      "step": 25231
+    },
+    {
+      "epoch": 0.25232,
+      "grad_norm": 1.1191735481890759,
+      "learning_rate": 0.003,
+      "loss": 4.0028,
+      "step": 25232
+    },
+    {
+      "epoch": 0.25233,
+      "grad_norm": 1.3296097117464747,
+      "learning_rate": 0.003,
+      "loss": 4.0215,
+      "step": 25233
+    },
+    {
+      "epoch": 0.25234,
+      "grad_norm": 1.1753424895595677,
+      "learning_rate": 0.003,
+      "loss": 4.0101,
+      "step": 25234
+    },
+    {
+      "epoch": 0.25235,
+      "grad_norm": 1.2844521487017182,
+      "learning_rate": 0.003,
+      "loss": 3.9836,
+      "step": 25235
+    },
+    {
+      "epoch": 0.25236,
+      "grad_norm": 1.1030741763335932,
+      "learning_rate": 0.003,
+      "loss": 4.0213,
+      "step": 25236
+    },
+    {
+      "epoch": 0.25237,
+      "grad_norm": 1.4607762696718185,
+      "learning_rate": 0.003,
+      "loss": 3.9845,
+      "step": 25237
+    },
+    {
+      "epoch": 0.25238,
+      "grad_norm": 0.975044965436386,
+      "learning_rate": 0.003,
+      "loss": 3.9662,
+      "step": 25238
+    },
+    {
+      "epoch": 0.25239,
+      "grad_norm": 1.509939768568235,
+      "learning_rate": 0.003,
+      "loss": 3.9885,
+      "step": 25239
+    },
+    {
+      "epoch": 0.2524,
+      "grad_norm": 1.131897100904184,
+      "learning_rate": 0.003,
+      "loss": 4.0127,
+      "step": 25240
+    },
+    {
+      "epoch": 0.25241,
+      "grad_norm": 1.4383373199217495,
+      "learning_rate": 0.003,
+      "loss": 4.0094,
+      "step": 25241
+    },
+    {
+      "epoch": 0.25242,
+      "grad_norm": 1.0222182551390455,
+      "learning_rate": 0.003,
+      "loss": 3.9757,
+      "step": 25242
+    },
+    {
+      "epoch": 0.25243,
+      "grad_norm": 1.3328058165199228,
+      "learning_rate": 0.003,
+      "loss": 4.0033,
+      "step": 25243
+    },
+    {
+      "epoch": 0.25244,
+      "grad_norm": 1.276850400040123,
+      "learning_rate": 0.003,
+      "loss": 4.001,
+      "step": 25244
+    },
+    {
+      "epoch": 0.25245,
+      "grad_norm": 1.3321503530579146,
+      "learning_rate": 0.003,
+      "loss": 4.0058,
+      "step": 25245
+    },
+    {
+      "epoch": 0.25246,
+      "grad_norm": 1.0332754443377115,
+      "learning_rate": 0.003,
+      "loss": 3.9697,
+      "step": 25246
+    },
+    {
+      "epoch": 0.25247,
+      "grad_norm": 1.3915364950866893,
+      "learning_rate": 0.003,
+      "loss": 4.0164,
+      "step": 25247
+    },
+    {
+      "epoch": 0.25248,
+      "grad_norm": 1.1512537522208093,
+      "learning_rate": 0.003,
+      "loss": 3.9763,
+      "step": 25248
+    },
+    {
+      "epoch": 0.25249,
+      "grad_norm": 1.5845508683399963,
+      "learning_rate": 0.003,
+      "loss": 4.0068,
+      "step": 25249
+    },
+    {
+      "epoch": 0.2525,
+      "grad_norm": 1.3607202272410714,
+      "learning_rate": 0.003,
+      "loss": 3.9902,
+      "step": 25250
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 100000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 25,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.001001982099456e+18,
+  "train_batch_size": 256,
+  "trial_name": null,
+  "trial_params": null
+}