{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.1, "eval_steps": 500, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1e-05, "grad_norm": 1.085229352812366, "learning_rate": 3e-06, "loss": 10.849, "step": 1 }, { "epoch": 2e-05, "grad_norm": 1.0764689186661929, "learning_rate": 6e-06, "loss": 10.8489, "step": 2 }, { "epoch": 3e-05, "grad_norm": 1.0926036068515363, "learning_rate": 9e-06, "loss": 10.8486, "step": 3 }, { "epoch": 4e-05, "grad_norm": 1.0859011783792423, "learning_rate": 1.2e-05, "loss": 10.848, "step": 4 }, { "epoch": 5e-05, "grad_norm": 1.0906873388641662, "learning_rate": 1.5e-05, "loss": 10.8453, "step": 5 }, { "epoch": 6e-05, "grad_norm": 1.0895888734627917, "learning_rate": 1.8e-05, "loss": 10.8447, "step": 6 }, { "epoch": 7e-05, "grad_norm": 1.0913329404483254, "learning_rate": 2.1000000000000002e-05, "loss": 10.8355, "step": 7 }, { "epoch": 8e-05, "grad_norm": 1.0766237663279077, "learning_rate": 2.4e-05, "loss": 10.8141, "step": 8 }, { "epoch": 9e-05, "grad_norm": 1.0617425338278697, "learning_rate": 2.7e-05, "loss": 10.8099, "step": 9 }, { "epoch": 0.0001, "grad_norm": 1.080271312928173, "learning_rate": 3e-05, "loss": 10.7983, "step": 10 }, { "epoch": 0.00011, "grad_norm": 1.0663119503184246, "learning_rate": 3.2999999999999996e-05, "loss": 10.7844, "step": 11 }, { "epoch": 0.00012, "grad_norm": 1.0694505502575085, "learning_rate": 3.6e-05, "loss": 10.7739, "step": 12 }, { "epoch": 0.00013, "grad_norm": 1.0509206118753271, "learning_rate": 3.9e-05, "loss": 10.7554, "step": 13 }, { "epoch": 0.00014, "grad_norm": 1.0427932624919014, "learning_rate": 4.2000000000000004e-05, "loss": 10.7452, "step": 14 }, { "epoch": 0.00015, "grad_norm": 1.0323600599358198, "learning_rate": 4.4999999999999996e-05, "loss": 10.736, "step": 15 }, { "epoch": 0.00016, "grad_norm": 1.0039727604034705, "learning_rate": 4.8e-05, "loss": 10.7191, "step": 16 }, { "epoch": 0.00017, "grad_norm": 0.9736033767894897, "learning_rate": 5.1000000000000006e-05, "loss": 10.7049, "step": 17 }, { "epoch": 0.00018, "grad_norm": 0.9665971933165531, "learning_rate": 5.4e-05, "loss": 10.69, "step": 18 }, { "epoch": 0.00019, "grad_norm": 0.9428089164290921, "learning_rate": 5.7e-05, "loss": 10.6767, "step": 19 }, { "epoch": 0.0002, "grad_norm": 0.943192779381936, "learning_rate": 6e-05, "loss": 10.666, "step": 20 }, { "epoch": 0.00021, "grad_norm": 0.9246956411226879, "learning_rate": 6.3e-05, "loss": 10.6525, "step": 21 }, { "epoch": 0.00022, "grad_norm": 0.9206776841693647, "learning_rate": 6.599999999999999e-05, "loss": 10.6378, "step": 22 }, { "epoch": 0.00023, "grad_norm": 0.9152142474292139, "learning_rate": 6.9e-05, "loss": 10.626, "step": 23 }, { "epoch": 0.00024, "grad_norm": 0.9101286615991204, "learning_rate": 7.2e-05, "loss": 10.613, "step": 24 }, { "epoch": 0.00025, "grad_norm": 0.9100685942512898, "learning_rate": 7.500000000000001e-05, "loss": 10.5998, "step": 25 }, { "epoch": 0.00026, "grad_norm": 0.9093157110032968, "learning_rate": 7.8e-05, "loss": 10.5859, "step": 26 }, { "epoch": 0.00027, "grad_norm": 0.9036082412984342, "learning_rate": 8.1e-05, "loss": 10.5742, "step": 27 }, { "epoch": 0.00028, "grad_norm": 0.9033464449252919, "learning_rate": 8.400000000000001e-05, "loss": 10.5612, "step": 28 }, { "epoch": 0.00029, "grad_norm": 0.9067163029768021, "learning_rate": 8.7e-05, "loss": 10.5465, "step": 29 }, { "epoch": 0.0003, "grad_norm": 0.9152916501496612, "learning_rate": 8.999999999999999e-05, "loss": 10.5321, "step": 30 }, { "epoch": 0.00031, "grad_norm": 0.9122391999445313, "learning_rate": 9.3e-05, "loss": 10.5167, "step": 31 }, { "epoch": 0.00032, "grad_norm": 0.9090668181794543, "learning_rate": 9.6e-05, "loss": 10.5034, "step": 32 }, { "epoch": 0.00033, "grad_norm": 0.9095629132760054, "learning_rate": 9.900000000000001e-05, "loss": 10.4865, "step": 33 }, { "epoch": 0.00034, "grad_norm": 0.9107692784496799, "learning_rate": 0.00010200000000000001, "loss": 10.4706, "step": 34 }, { "epoch": 0.00035, "grad_norm": 0.9162740070354684, "learning_rate": 0.00010500000000000002, "loss": 10.4535, "step": 35 }, { "epoch": 0.00036, "grad_norm": 0.9114324016484531, "learning_rate": 0.000108, "loss": 10.4367, "step": 36 }, { "epoch": 0.00037, "grad_norm": 0.901331547883087, "learning_rate": 0.000111, "loss": 10.4196, "step": 37 }, { "epoch": 0.00038, "grad_norm": 0.9056352937834914, "learning_rate": 0.000114, "loss": 10.4003, "step": 38 }, { "epoch": 0.00039, "grad_norm": 0.9071824304759276, "learning_rate": 0.000117, "loss": 10.3815, "step": 39 }, { "epoch": 0.0004, "grad_norm": 0.911150856035614, "learning_rate": 0.00012, "loss": 10.3599, "step": 40 }, { "epoch": 0.00041, "grad_norm": 0.9001357892993758, "learning_rate": 0.000123, "loss": 10.3411, "step": 41 }, { "epoch": 0.00042, "grad_norm": 0.9057215347961051, "learning_rate": 0.000126, "loss": 10.3208, "step": 42 }, { "epoch": 0.00043, "grad_norm": 0.9101872556211666, "learning_rate": 0.000129, "loss": 10.2978, "step": 43 }, { "epoch": 0.00044, "grad_norm": 0.9097345366629672, "learning_rate": 0.00013199999999999998, "loss": 10.2768, "step": 44 }, { "epoch": 0.00045, "grad_norm": 0.9111670268341607, "learning_rate": 0.000135, "loss": 10.2539, "step": 45 }, { "epoch": 0.00046, "grad_norm": 0.9030473796809102, "learning_rate": 0.000138, "loss": 10.2322, "step": 46 }, { "epoch": 0.00047, "grad_norm": 0.9086055117133346, "learning_rate": 0.000141, "loss": 10.2064, "step": 47 }, { "epoch": 0.00048, "grad_norm": 0.9060414917101882, "learning_rate": 0.000144, "loss": 10.1837, "step": 48 }, { "epoch": 0.00049, "grad_norm": 0.9059366436676172, "learning_rate": 0.000147, "loss": 10.1599, "step": 49 }, { "epoch": 0.0005, "grad_norm": 0.9106165244124662, "learning_rate": 0.00015000000000000001, "loss": 10.1343, "step": 50 }, { "epoch": 0.00051, "grad_norm": 0.9110992341155927, "learning_rate": 0.000153, "loss": 10.1078, "step": 51 }, { "epoch": 0.00052, "grad_norm": 0.9077294523989683, "learning_rate": 0.000156, "loss": 10.0815, "step": 52 }, { "epoch": 0.00053, "grad_norm": 0.8995624264210066, "learning_rate": 0.000159, "loss": 10.0581, "step": 53 }, { "epoch": 0.00054, "grad_norm": 0.9187536344258231, "learning_rate": 0.000162, "loss": 10.026, "step": 54 }, { "epoch": 0.00055, "grad_norm": 0.9102867236672618, "learning_rate": 0.000165, "loss": 10.0019, "step": 55 }, { "epoch": 0.00056, "grad_norm": 0.9031170239719724, "learning_rate": 0.00016800000000000002, "loss": 9.9743, "step": 56 }, { "epoch": 0.00057, "grad_norm": 0.9090239107255728, "learning_rate": 0.000171, "loss": 9.9467, "step": 57 }, { "epoch": 0.00058, "grad_norm": 0.9070896749665766, "learning_rate": 0.000174, "loss": 9.9223, "step": 58 }, { "epoch": 0.00059, "grad_norm": 0.9116582783399498, "learning_rate": 0.000177, "loss": 9.8905, "step": 59 }, { "epoch": 0.0006, "grad_norm": 0.899071209460366, "learning_rate": 0.00017999999999999998, "loss": 9.8642, "step": 60 }, { "epoch": 0.00061, "grad_norm": 0.8984032767613607, "learning_rate": 0.000183, "loss": 9.8367, "step": 61 }, { "epoch": 0.00062, "grad_norm": 0.9043779152804675, "learning_rate": 0.000186, "loss": 9.8058, "step": 62 }, { "epoch": 0.00063, "grad_norm": 0.8969230453763369, "learning_rate": 0.000189, "loss": 9.7784, "step": 63 }, { "epoch": 0.00064, "grad_norm": 0.8929896633083918, "learning_rate": 0.000192, "loss": 9.7485, "step": 64 }, { "epoch": 0.00065, "grad_norm": 0.9016512171852502, "learning_rate": 0.00019500000000000002, "loss": 9.7165, "step": 65 }, { "epoch": 0.00066, "grad_norm": 0.8946267084059, "learning_rate": 0.00019800000000000002, "loss": 9.6927, "step": 66 }, { "epoch": 0.00067, "grad_norm": 0.8963872265737496, "learning_rate": 0.000201, "loss": 9.6633, "step": 67 }, { "epoch": 0.00068, "grad_norm": 0.9035635569767697, "learning_rate": 0.00020400000000000003, "loss": 9.6313, "step": 68 }, { "epoch": 0.00069, "grad_norm": 0.8884430485081615, "learning_rate": 0.00020700000000000002, "loss": 9.604, "step": 69 }, { "epoch": 0.0007, "grad_norm": 0.901710386427562, "learning_rate": 0.00021000000000000004, "loss": 9.5675, "step": 70 }, { "epoch": 0.00071, "grad_norm": 0.8913815692585527, "learning_rate": 0.00021299999999999997, "loss": 9.5403, "step": 71 }, { "epoch": 0.00072, "grad_norm": 0.8947982003050186, "learning_rate": 0.000216, "loss": 9.5138, "step": 72 }, { "epoch": 0.00073, "grad_norm": 0.8936046055705469, "learning_rate": 0.00021899999999999998, "loss": 9.4802, "step": 73 }, { "epoch": 0.00074, "grad_norm": 0.894403551075387, "learning_rate": 0.000222, "loss": 9.4539, "step": 74 }, { "epoch": 0.00075, "grad_norm": 0.8897139679621429, "learning_rate": 0.000225, "loss": 9.4187, "step": 75 }, { "epoch": 0.00076, "grad_norm": 0.89755782108852, "learning_rate": 0.000228, "loss": 9.3929, "step": 76 }, { "epoch": 0.00077, "grad_norm": 0.8917412779828411, "learning_rate": 0.000231, "loss": 9.3649, "step": 77 }, { "epoch": 0.00078, "grad_norm": 0.886930008239094, "learning_rate": 0.000234, "loss": 9.3351, "step": 78 }, { "epoch": 0.00079, "grad_norm": 0.8893115023720741, "learning_rate": 0.00023700000000000001, "loss": 9.2992, "step": 79 }, { "epoch": 0.0008, "grad_norm": 0.8917772276535281, "learning_rate": 0.00024, "loss": 9.2632, "step": 80 }, { "epoch": 0.00081, "grad_norm": 0.8971231461853929, "learning_rate": 0.00024300000000000002, "loss": 9.2316, "step": 81 }, { "epoch": 0.00082, "grad_norm": 0.9071155901294882, "learning_rate": 0.000246, "loss": 9.206, "step": 82 }, { "epoch": 0.00083, "grad_norm": 0.8963243651316662, "learning_rate": 0.00024900000000000004, "loss": 9.1708, "step": 83 }, { "epoch": 0.00084, "grad_norm": 0.9002519637093493, "learning_rate": 0.000252, "loss": 9.1395, "step": 84 }, { "epoch": 0.00085, "grad_norm": 0.8962870771351267, "learning_rate": 0.000255, "loss": 9.1159, "step": 85 }, { "epoch": 0.00086, "grad_norm": 0.8973951423301171, "learning_rate": 0.000258, "loss": 9.0799, "step": 86 }, { "epoch": 0.00087, "grad_norm": 0.8894399774297843, "learning_rate": 0.000261, "loss": 9.0571, "step": 87 }, { "epoch": 0.00088, "grad_norm": 0.8954974565265822, "learning_rate": 0.00026399999999999997, "loss": 9.0211, "step": 88 }, { "epoch": 0.00089, "grad_norm": 0.8903887799592864, "learning_rate": 0.000267, "loss": 8.9943, "step": 89 }, { "epoch": 0.0009, "grad_norm": 0.8896831425922581, "learning_rate": 0.00027, "loss": 8.9666, "step": 90 }, { "epoch": 0.00091, "grad_norm": 0.8841935030693385, "learning_rate": 0.000273, "loss": 8.9365, "step": 91 }, { "epoch": 0.00092, "grad_norm": 0.8862593694142118, "learning_rate": 0.000276, "loss": 8.9052, "step": 92 }, { "epoch": 0.00093, "grad_norm": 0.8881041222250594, "learning_rate": 0.000279, "loss": 8.8752, "step": 93 }, { "epoch": 0.00094, "grad_norm": 0.8868931156100198, "learning_rate": 0.000282, "loss": 8.8494, "step": 94 }, { "epoch": 0.00095, "grad_norm": 0.8826333164427848, "learning_rate": 0.000285, "loss": 8.8203, "step": 95 }, { "epoch": 0.00096, "grad_norm": 0.8910955494857569, "learning_rate": 0.000288, "loss": 8.7905, "step": 96 }, { "epoch": 0.00097, "grad_norm": 0.8809279978723125, "learning_rate": 0.000291, "loss": 8.7668, "step": 97 }, { "epoch": 0.00098, "grad_norm": 0.8829202734417477, "learning_rate": 0.000294, "loss": 8.73, "step": 98 }, { "epoch": 0.00099, "grad_norm": 0.8858154061322314, "learning_rate": 0.000297, "loss": 8.7072, "step": 99 }, { "epoch": 0.001, "grad_norm": 0.8861735275161908, "learning_rate": 0.00030000000000000003, "loss": 8.6806, "step": 100 }, { "epoch": 0.00101, "grad_norm": 0.8886468466177152, "learning_rate": 0.00030300000000000005, "loss": 8.6455, "step": 101 }, { "epoch": 0.00102, "grad_norm": 0.8755547614914917, "learning_rate": 0.000306, "loss": 8.625, "step": 102 }, { "epoch": 0.00103, "grad_norm": 0.879237266457317, "learning_rate": 0.000309, "loss": 8.6003, "step": 103 }, { "epoch": 0.00104, "grad_norm": 0.8697854479956653, "learning_rate": 0.000312, "loss": 8.5786, "step": 104 }, { "epoch": 0.00105, "grad_norm": 0.8733949043334459, "learning_rate": 0.000315, "loss": 8.552, "step": 105 }, { "epoch": 0.00106, "grad_norm": 0.8706148334292045, "learning_rate": 0.000318, "loss": 8.5224, "step": 106 }, { "epoch": 0.00107, "grad_norm": 0.8653683244965015, "learning_rate": 0.000321, "loss": 8.4981, "step": 107 }, { "epoch": 0.00108, "grad_norm": 0.8737239863866451, "learning_rate": 0.000324, "loss": 8.4698, "step": 108 }, { "epoch": 0.00109, "grad_norm": 0.8706512729216435, "learning_rate": 0.000327, "loss": 8.4501, "step": 109 }, { "epoch": 0.0011, "grad_norm": 0.8842127152624679, "learning_rate": 0.00033, "loss": 8.4274, "step": 110 }, { "epoch": 0.00111, "grad_norm": 0.9007754832304464, "learning_rate": 0.000333, "loss": 8.3985, "step": 111 }, { "epoch": 0.00112, "grad_norm": 0.9159855921530741, "learning_rate": 0.00033600000000000004, "loss": 8.3784, "step": 112 }, { "epoch": 0.00113, "grad_norm": 0.9063278036144603, "learning_rate": 0.000339, "loss": 8.3391, "step": 113 }, { "epoch": 0.00114, "grad_norm": 0.8437820836704115, "learning_rate": 0.000342, "loss": 8.3286, "step": 114 }, { "epoch": 0.00115, "grad_norm": 0.8612821674982505, "learning_rate": 0.00034500000000000004, "loss": 8.3022, "step": 115 }, { "epoch": 0.00116, "grad_norm": 0.8581012057508914, "learning_rate": 0.000348, "loss": 8.2786, "step": 116 }, { "epoch": 0.00117, "grad_norm": 0.834681162463853, "learning_rate": 0.000351, "loss": 8.2472, "step": 117 }, { "epoch": 0.00118, "grad_norm": 0.8526508674143746, "learning_rate": 0.000354, "loss": 8.2312, "step": 118 }, { "epoch": 0.00119, "grad_norm": 0.8471842679679056, "learning_rate": 0.000357, "loss": 8.2186, "step": 119 }, { "epoch": 0.0012, "grad_norm": 0.8238029079166322, "learning_rate": 0.00035999999999999997, "loss": 8.1917, "step": 120 }, { "epoch": 0.00121, "grad_norm": 0.832770075662114, "learning_rate": 0.000363, "loss": 8.1641, "step": 121 }, { "epoch": 0.00122, "grad_norm": 0.8176689288160716, "learning_rate": 0.000366, "loss": 8.1471, "step": 122 }, { "epoch": 0.00123, "grad_norm": 0.8121975389696077, "learning_rate": 0.000369, "loss": 8.1243, "step": 123 }, { "epoch": 0.00124, "grad_norm": 0.8287555396618358, "learning_rate": 0.000372, "loss": 8.1037, "step": 124 }, { "epoch": 0.00125, "grad_norm": 0.8387006564379252, "learning_rate": 0.000375, "loss": 8.0802, "step": 125 }, { "epoch": 0.00126, "grad_norm": 0.7985855201767323, "learning_rate": 0.000378, "loss": 8.0672, "step": 126 }, { "epoch": 0.00127, "grad_norm": 0.7830434817433392, "learning_rate": 0.000381, "loss": 8.0437, "step": 127 }, { "epoch": 0.00128, "grad_norm": 0.8466276209824322, "learning_rate": 0.000384, "loss": 8.0264, "step": 128 }, { "epoch": 0.00129, "grad_norm": 1.1259201196462498, "learning_rate": 0.00038700000000000003, "loss": 8.0199, "step": 129 }, { "epoch": 0.0013, "grad_norm": 1.356061683657538, "learning_rate": 0.00039000000000000005, "loss": 8.0054, "step": 130 }, { "epoch": 0.00131, "grad_norm": 0.781068809712636, "learning_rate": 0.000393, "loss": 7.9576, "step": 131 }, { "epoch": 0.00132, "grad_norm": 1.0367188815921278, "learning_rate": 0.00039600000000000003, "loss": 7.9591, "step": 132 }, { "epoch": 0.00133, "grad_norm": 0.7841067594214959, "learning_rate": 0.00039900000000000005, "loss": 7.9293, "step": 133 }, { "epoch": 0.00134, "grad_norm": 0.8594113418007541, "learning_rate": 0.000402, "loss": 7.9217, "step": 134 }, { "epoch": 0.00135, "grad_norm": 0.7468719464596628, "learning_rate": 0.00040500000000000003, "loss": 7.8883, "step": 135 }, { "epoch": 0.00136, "grad_norm": 0.8415432392576198, "learning_rate": 0.00040800000000000005, "loss": 7.8771, "step": 136 }, { "epoch": 0.00137, "grad_norm": 0.7386877288273068, "learning_rate": 0.000411, "loss": 7.8542, "step": 137 }, { "epoch": 0.00138, "grad_norm": 0.7450176106214967, "learning_rate": 0.00041400000000000003, "loss": 7.8322, "step": 138 }, { "epoch": 0.00139, "grad_norm": 0.7424698225823185, "learning_rate": 0.00041700000000000005, "loss": 7.8197, "step": 139 }, { "epoch": 0.0014, "grad_norm": 0.7268306219948636, "learning_rate": 0.00042000000000000007, "loss": 7.8018, "step": 140 }, { "epoch": 0.00141, "grad_norm": 0.7115037341291065, "learning_rate": 0.000423, "loss": 7.7905, "step": 141 }, { "epoch": 0.00142, "grad_norm": 0.6725409058271569, "learning_rate": 0.00042599999999999995, "loss": 7.7772, "step": 142 }, { "epoch": 0.00143, "grad_norm": 0.6830809074405504, "learning_rate": 0.00042899999999999997, "loss": 7.7496, "step": 143 }, { "epoch": 0.00144, "grad_norm": 0.6791849969278475, "learning_rate": 0.000432, "loss": 7.7318, "step": 144 }, { "epoch": 0.00145, "grad_norm": 0.6462817248800249, "learning_rate": 0.000435, "loss": 7.7251, "step": 145 }, { "epoch": 0.00146, "grad_norm": 0.6695930969912641, "learning_rate": 0.00043799999999999997, "loss": 7.6893, "step": 146 }, { "epoch": 0.00147, "grad_norm": 0.7012896651032599, "learning_rate": 0.000441, "loss": 7.6817, "step": 147 }, { "epoch": 0.00148, "grad_norm": 0.7755580877429182, "learning_rate": 0.000444, "loss": 7.6698, "step": 148 }, { "epoch": 0.00149, "grad_norm": 1.088947674236225, "learning_rate": 0.00044699999999999997, "loss": 7.6713, "step": 149 }, { "epoch": 0.0015, "grad_norm": 0.9396541473912592, "learning_rate": 0.00045, "loss": 7.6436, "step": 150 }, { "epoch": 0.00151, "grad_norm": 0.640217037835256, "learning_rate": 0.000453, "loss": 7.6133, "step": 151 }, { "epoch": 0.00152, "grad_norm": 0.9098496631236208, "learning_rate": 0.000456, "loss": 7.6149, "step": 152 }, { "epoch": 0.00153, "grad_norm": 0.6175759444520236, "learning_rate": 0.000459, "loss": 7.5962, "step": 153 }, { "epoch": 0.00154, "grad_norm": 0.6884616968083866, "learning_rate": 0.000462, "loss": 7.5772, "step": 154 }, { "epoch": 0.00155, "grad_norm": 0.6360938051960316, "learning_rate": 0.000465, "loss": 7.5598, "step": 155 }, { "epoch": 0.00156, "grad_norm": 0.6937352973699618, "learning_rate": 0.000468, "loss": 7.5366, "step": 156 }, { "epoch": 0.00157, "grad_norm": 0.6334556877551312, "learning_rate": 0.000471, "loss": 7.53, "step": 157 }, { "epoch": 0.00158, "grad_norm": 0.9707170671679272, "learning_rate": 0.00047400000000000003, "loss": 7.5153, "step": 158 }, { "epoch": 0.00159, "grad_norm": 1.0240750882994218, "learning_rate": 0.000477, "loss": 7.5076, "step": 159 }, { "epoch": 0.0016, "grad_norm": 0.9322894026235434, "learning_rate": 0.00048, "loss": 7.4836, "step": 160 }, { "epoch": 0.00161, "grad_norm": 0.5279037898518898, "learning_rate": 0.00048300000000000003, "loss": 7.4527, "step": 161 }, { "epoch": 0.00162, "grad_norm": 0.687356662308957, "learning_rate": 0.00048600000000000005, "loss": 7.4601, "step": 162 }, { "epoch": 0.00163, "grad_norm": 0.5623951705594973, "learning_rate": 0.0004890000000000001, "loss": 7.4388, "step": 163 }, { "epoch": 0.00164, "grad_norm": 0.5581337114560441, "learning_rate": 0.000492, "loss": 7.4399, "step": 164 }, { "epoch": 0.00165, "grad_norm": 0.5516159301488641, "learning_rate": 0.000495, "loss": 7.4126, "step": 165 }, { "epoch": 0.00166, "grad_norm": 0.5242244578051735, "learning_rate": 0.0004980000000000001, "loss": 7.3876, "step": 166 }, { "epoch": 0.00167, "grad_norm": 0.45997959649003123, "learning_rate": 0.000501, "loss": 7.3779, "step": 167 }, { "epoch": 0.00168, "grad_norm": 0.5436289820614866, "learning_rate": 0.000504, "loss": 7.3569, "step": 168 }, { "epoch": 0.00169, "grad_norm": 0.4983067598465849, "learning_rate": 0.0005070000000000001, "loss": 7.3495, "step": 169 }, { "epoch": 0.0017, "grad_norm": 0.4402852485923817, "learning_rate": 0.00051, "loss": 7.3316, "step": 170 }, { "epoch": 0.00171, "grad_norm": 0.5221521396945228, "learning_rate": 0.000513, "loss": 7.3138, "step": 171 }, { "epoch": 0.00172, "grad_norm": 0.45544347662440743, "learning_rate": 0.000516, "loss": 7.3129, "step": 172 }, { "epoch": 0.00173, "grad_norm": 0.4745602833877857, "learning_rate": 0.0005189999999999999, "loss": 7.2961, "step": 173 }, { "epoch": 0.00174, "grad_norm": 0.5121580158942174, "learning_rate": 0.000522, "loss": 7.311, "step": 174 }, { "epoch": 0.00175, "grad_norm": 0.680505499537256, "learning_rate": 0.000525, "loss": 7.2769, "step": 175 }, { "epoch": 0.00176, "grad_norm": 0.7210959926983863, "learning_rate": 0.0005279999999999999, "loss": 7.2549, "step": 176 }, { "epoch": 0.00177, "grad_norm": 0.7510526045152774, "learning_rate": 0.000531, "loss": 7.2572, "step": 177 }, { "epoch": 0.00178, "grad_norm": 0.7343359791017195, "learning_rate": 0.000534, "loss": 7.2506, "step": 178 }, { "epoch": 0.00179, "grad_norm": 0.6185909230661502, "learning_rate": 0.000537, "loss": 7.2371, "step": 179 }, { "epoch": 0.0018, "grad_norm": 0.42887581745789505, "learning_rate": 0.00054, "loss": 7.2041, "step": 180 }, { "epoch": 0.00181, "grad_norm": 0.5359194273155663, "learning_rate": 0.000543, "loss": 7.1961, "step": 181 }, { "epoch": 0.00182, "grad_norm": 0.4369454993609359, "learning_rate": 0.000546, "loss": 7.188, "step": 182 }, { "epoch": 0.00183, "grad_norm": 0.41001210068633426, "learning_rate": 0.000549, "loss": 7.1769, "step": 183 }, { "epoch": 0.00184, "grad_norm": 0.4396906853721559, "learning_rate": 0.000552, "loss": 7.1646, "step": 184 }, { "epoch": 0.00185, "grad_norm": 0.4311329351383538, "learning_rate": 0.000555, "loss": 7.1403, "step": 185 }, { "epoch": 0.00186, "grad_norm": 0.44013221474943204, "learning_rate": 0.000558, "loss": 7.1495, "step": 186 }, { "epoch": 0.00187, "grad_norm": 0.42719641993731927, "learning_rate": 0.000561, "loss": 7.1244, "step": 187 }, { "epoch": 0.00188, "grad_norm": 0.39192004433941763, "learning_rate": 0.000564, "loss": 7.1022, "step": 188 }, { "epoch": 0.00189, "grad_norm": 0.5474017259570552, "learning_rate": 0.000567, "loss": 7.1002, "step": 189 }, { "epoch": 0.0019, "grad_norm": 0.5320208949420774, "learning_rate": 0.00057, "loss": 7.0824, "step": 190 }, { "epoch": 0.00191, "grad_norm": 0.7661733453917681, "learning_rate": 0.000573, "loss": 7.0764, "step": 191 }, { "epoch": 0.00192, "grad_norm": 0.9858385620454592, "learning_rate": 0.000576, "loss": 7.0775, "step": 192 }, { "epoch": 0.00193, "grad_norm": 0.9591489873604585, "learning_rate": 0.000579, "loss": 7.0725, "step": 193 }, { "epoch": 0.00194, "grad_norm": 0.8774582305545432, "learning_rate": 0.000582, "loss": 7.0715, "step": 194 }, { "epoch": 0.00195, "grad_norm": 0.9015156284206135, "learning_rate": 0.000585, "loss": 7.0487, "step": 195 }, { "epoch": 0.00196, "grad_norm": 0.8000421121121074, "learning_rate": 0.000588, "loss": 7.0339, "step": 196 }, { "epoch": 0.00197, "grad_norm": 0.8468645119231468, "learning_rate": 0.000591, "loss": 7.0329, "step": 197 }, { "epoch": 0.00198, "grad_norm": 0.6924161058762034, "learning_rate": 0.000594, "loss": 7.0197, "step": 198 }, { "epoch": 0.00199, "grad_norm": 0.5671884633245193, "learning_rate": 0.0005970000000000001, "loss": 6.9992, "step": 199 }, { "epoch": 0.002, "grad_norm": 0.5557515599131739, "learning_rate": 0.0006000000000000001, "loss": 6.9982, "step": 200 }, { "epoch": 0.00201, "grad_norm": 0.4619645912916116, "learning_rate": 0.000603, "loss": 6.9694, "step": 201 }, { "epoch": 0.00202, "grad_norm": 0.4929222182059183, "learning_rate": 0.0006060000000000001, "loss": 6.9601, "step": 202 }, { "epoch": 0.00203, "grad_norm": 0.37702087689736097, "learning_rate": 0.0006090000000000001, "loss": 6.9599, "step": 203 }, { "epoch": 0.00204, "grad_norm": 0.45518892473332057, "learning_rate": 0.000612, "loss": 6.9476, "step": 204 }, { "epoch": 0.00205, "grad_norm": 0.384809834745988, "learning_rate": 0.000615, "loss": 6.9322, "step": 205 }, { "epoch": 0.00206, "grad_norm": 0.38976835520829006, "learning_rate": 0.000618, "loss": 6.9255, "step": 206 }, { "epoch": 0.00207, "grad_norm": 0.4375749283114229, "learning_rate": 0.000621, "loss": 6.9166, "step": 207 }, { "epoch": 0.00208, "grad_norm": 0.6317927736115376, "learning_rate": 0.000624, "loss": 6.9157, "step": 208 }, { "epoch": 0.00209, "grad_norm": 0.7772617938427908, "learning_rate": 0.000627, "loss": 6.9007, "step": 209 }, { "epoch": 0.0021, "grad_norm": 1.2298132092555871, "learning_rate": 0.00063, "loss": 6.9015, "step": 210 }, { "epoch": 0.00211, "grad_norm": 0.7435502579532725, "learning_rate": 0.000633, "loss": 6.8883, "step": 211 }, { "epoch": 0.00212, "grad_norm": 0.3794792840193541, "learning_rate": 0.000636, "loss": 6.8693, "step": 212 }, { "epoch": 0.00213, "grad_norm": 0.688114698891817, "learning_rate": 0.000639, "loss": 6.8589, "step": 213 }, { "epoch": 0.00214, "grad_norm": 0.49868168831557785, "learning_rate": 0.000642, "loss": 6.8504, "step": 214 }, { "epoch": 0.00215, "grad_norm": 0.6292678113208914, "learning_rate": 0.000645, "loss": 6.8506, "step": 215 }, { "epoch": 0.00216, "grad_norm": 0.3673676300147008, "learning_rate": 0.000648, "loss": 6.8383, "step": 216 }, { "epoch": 0.00217, "grad_norm": 0.4511278951279821, "learning_rate": 0.000651, "loss": 6.8131, "step": 217 }, { "epoch": 0.00218, "grad_norm": 0.3783399316849399, "learning_rate": 0.000654, "loss": 6.8146, "step": 218 }, { "epoch": 0.00219, "grad_norm": 0.3651267362185199, "learning_rate": 0.000657, "loss": 6.7867, "step": 219 }, { "epoch": 0.0022, "grad_norm": 0.4708567379601776, "learning_rate": 0.00066, "loss": 6.8074, "step": 220 }, { "epoch": 0.00221, "grad_norm": 0.5218809374007617, "learning_rate": 0.0006630000000000001, "loss": 6.7837, "step": 221 }, { "epoch": 0.00222, "grad_norm": 0.7518789100021657, "learning_rate": 0.000666, "loss": 6.7774, "step": 222 }, { "epoch": 0.00223, "grad_norm": 0.9964198530393009, "learning_rate": 0.000669, "loss": 6.7824, "step": 223 }, { "epoch": 0.00224, "grad_norm": 0.807522133159797, "learning_rate": 0.0006720000000000001, "loss": 6.7748, "step": 224 }, { "epoch": 0.00225, "grad_norm": 0.6803785939854445, "learning_rate": 0.000675, "loss": 6.7563, "step": 225 }, { "epoch": 0.00226, "grad_norm": 1.0762485305388094, "learning_rate": 0.000678, "loss": 6.7681, "step": 226 }, { "epoch": 0.00227, "grad_norm": 0.9129777590140102, "learning_rate": 0.0006810000000000001, "loss": 6.734, "step": 227 }, { "epoch": 0.00228, "grad_norm": 1.299087365220233, "learning_rate": 0.000684, "loss": 6.7517, "step": 228 }, { "epoch": 0.00229, "grad_norm": 0.7392729796935101, "learning_rate": 0.000687, "loss": 6.7331, "step": 229 }, { "epoch": 0.0023, "grad_norm": 0.6158971034672628, "learning_rate": 0.0006900000000000001, "loss": 6.7089, "step": 230 }, { "epoch": 0.00231, "grad_norm": 0.7016695647497186, "learning_rate": 0.000693, "loss": 6.6956, "step": 231 }, { "epoch": 0.00232, "grad_norm": 0.6434185638703606, "learning_rate": 0.000696, "loss": 6.7082, "step": 232 }, { "epoch": 0.00233, "grad_norm": 0.6044879418578446, "learning_rate": 0.0006990000000000001, "loss": 6.6921, "step": 233 }, { "epoch": 0.00234, "grad_norm": 0.4480061456613071, "learning_rate": 0.000702, "loss": 6.679, "step": 234 }, { "epoch": 0.00235, "grad_norm": 0.4492075259819563, "learning_rate": 0.000705, "loss": 6.6661, "step": 235 }, { "epoch": 0.00236, "grad_norm": 0.3889271878969786, "learning_rate": 0.000708, "loss": 6.6458, "step": 236 }, { "epoch": 0.00237, "grad_norm": 0.41607485698419117, "learning_rate": 0.0007109999999999999, "loss": 6.6575, "step": 237 }, { "epoch": 0.00238, "grad_norm": 0.3515958046168432, "learning_rate": 0.000714, "loss": 6.6432, "step": 238 }, { "epoch": 0.00239, "grad_norm": 0.4049983983930021, "learning_rate": 0.000717, "loss": 6.6274, "step": 239 }, { "epoch": 0.0024, "grad_norm": 0.3247748587680522, "learning_rate": 0.0007199999999999999, "loss": 6.6414, "step": 240 }, { "epoch": 0.00241, "grad_norm": 0.36262136655648425, "learning_rate": 0.000723, "loss": 6.6267, "step": 241 }, { "epoch": 0.00242, "grad_norm": 0.3238608381517686, "learning_rate": 0.000726, "loss": 6.598, "step": 242 }, { "epoch": 0.00243, "grad_norm": 0.31681258130419926, "learning_rate": 0.000729, "loss": 6.5877, "step": 243 }, { "epoch": 0.00244, "grad_norm": 0.3616370322868285, "learning_rate": 0.000732, "loss": 6.5911, "step": 244 }, { "epoch": 0.00245, "grad_norm": 0.4634076592576489, "learning_rate": 0.000735, "loss": 6.5815, "step": 245 }, { "epoch": 0.00246, "grad_norm": 0.7486687333799987, "learning_rate": 0.000738, "loss": 6.5693, "step": 246 }, { "epoch": 0.00247, "grad_norm": 1.662590620725261, "learning_rate": 0.000741, "loss": 6.5942, "step": 247 }, { "epoch": 0.00248, "grad_norm": 0.7962452736060305, "learning_rate": 0.000744, "loss": 6.5799, "step": 248 }, { "epoch": 0.00249, "grad_norm": 1.091558045553116, "learning_rate": 0.000747, "loss": 6.5681, "step": 249 }, { "epoch": 0.0025, "grad_norm": 1.1566118279124307, "learning_rate": 0.00075, "loss": 6.5821, "step": 250 }, { "epoch": 0.00251, "grad_norm": 0.7784764288643516, "learning_rate": 0.000753, "loss": 6.5311, "step": 251 }, { "epoch": 0.00252, "grad_norm": 0.8327004278614876, "learning_rate": 0.000756, "loss": 6.5476, "step": 252 }, { "epoch": 0.00253, "grad_norm": 0.8184368732684698, "learning_rate": 0.000759, "loss": 6.5327, "step": 253 }, { "epoch": 0.00254, "grad_norm": 1.1486609061260633, "learning_rate": 0.000762, "loss": 6.541, "step": 254 }, { "epoch": 0.00255, "grad_norm": 0.8085196503687528, "learning_rate": 0.0007650000000000001, "loss": 6.533, "step": 255 }, { "epoch": 0.00256, "grad_norm": 0.6227788989369496, "learning_rate": 0.000768, "loss": 6.5039, "step": 256 }, { "epoch": 0.00257, "grad_norm": 0.7880922816831363, "learning_rate": 0.000771, "loss": 6.5163, "step": 257 }, { "epoch": 0.00258, "grad_norm": 0.8888953354311911, "learning_rate": 0.0007740000000000001, "loss": 6.497, "step": 258 }, { "epoch": 0.00259, "grad_norm": 0.6556759438280303, "learning_rate": 0.000777, "loss": 6.491, "step": 259 }, { "epoch": 0.0026, "grad_norm": 0.5556045743581063, "learning_rate": 0.0007800000000000001, "loss": 6.4736, "step": 260 }, { "epoch": 0.00261, "grad_norm": 0.5757853625150707, "learning_rate": 0.0007830000000000001, "loss": 6.4835, "step": 261 }, { "epoch": 0.00262, "grad_norm": 0.4702991355160636, "learning_rate": 0.000786, "loss": 6.4605, "step": 262 }, { "epoch": 0.00263, "grad_norm": 0.5236270398277312, "learning_rate": 0.0007890000000000001, "loss": 6.4668, "step": 263 }, { "epoch": 0.00264, "grad_norm": 0.57918798311576, "learning_rate": 0.0007920000000000001, "loss": 6.4494, "step": 264 }, { "epoch": 0.00265, "grad_norm": 0.48386144230733374, "learning_rate": 0.000795, "loss": 6.4394, "step": 265 }, { "epoch": 0.00266, "grad_norm": 0.44170660781498655, "learning_rate": 0.0007980000000000001, "loss": 6.4372, "step": 266 }, { "epoch": 0.00267, "grad_norm": 0.35002993788441544, "learning_rate": 0.0008010000000000001, "loss": 6.4273, "step": 267 }, { "epoch": 0.00268, "grad_norm": 0.49450621569184094, "learning_rate": 0.000804, "loss": 6.426, "step": 268 }, { "epoch": 0.00269, "grad_norm": 0.5917094821390496, "learning_rate": 0.0008070000000000001, "loss": 6.4104, "step": 269 }, { "epoch": 0.0027, "grad_norm": 0.8610573273382283, "learning_rate": 0.0008100000000000001, "loss": 6.4165, "step": 270 }, { "epoch": 0.00271, "grad_norm": 0.9687752227819599, "learning_rate": 0.000813, "loss": 6.4342, "step": 271 }, { "epoch": 0.00272, "grad_norm": 0.8252297813091672, "learning_rate": 0.0008160000000000001, "loss": 6.4109, "step": 272 }, { "epoch": 0.00273, "grad_norm": 1.0509340869742279, "learning_rate": 0.0008190000000000001, "loss": 6.4091, "step": 273 }, { "epoch": 0.00274, "grad_norm": 1.2492890299831212, "learning_rate": 0.000822, "loss": 6.3984, "step": 274 }, { "epoch": 0.00275, "grad_norm": 0.7449833128353471, "learning_rate": 0.0008250000000000001, "loss": 6.3937, "step": 275 }, { "epoch": 0.00276, "grad_norm": 0.650217944416595, "learning_rate": 0.0008280000000000001, "loss": 6.383, "step": 276 }, { "epoch": 0.00277, "grad_norm": 0.6897831829588578, "learning_rate": 0.0008310000000000001, "loss": 6.3791, "step": 277 }, { "epoch": 0.00278, "grad_norm": 0.6845751052836879, "learning_rate": 0.0008340000000000001, "loss": 6.3625, "step": 278 }, { "epoch": 0.00279, "grad_norm": 0.6895680336476154, "learning_rate": 0.0008370000000000001, "loss": 6.3607, "step": 279 }, { "epoch": 0.0028, "grad_norm": 0.7962805776833526, "learning_rate": 0.0008400000000000001, "loss": 6.3497, "step": 280 }, { "epoch": 0.00281, "grad_norm": 0.7281805890288613, "learning_rate": 0.0008430000000000001, "loss": 6.3358, "step": 281 }, { "epoch": 0.00282, "grad_norm": 0.6077351466654652, "learning_rate": 0.000846, "loss": 6.3343, "step": 282 }, { "epoch": 0.00283, "grad_norm": 0.7021452115418912, "learning_rate": 0.0008489999999999999, "loss": 6.3268, "step": 283 }, { "epoch": 0.00284, "grad_norm": 0.8067589645945545, "learning_rate": 0.0008519999999999999, "loss": 6.3217, "step": 284 }, { "epoch": 0.00285, "grad_norm": 0.7238661685202905, "learning_rate": 0.000855, "loss": 6.3229, "step": 285 }, { "epoch": 0.00286, "grad_norm": 0.6566110557704579, "learning_rate": 0.0008579999999999999, "loss": 6.3002, "step": 286 }, { "epoch": 0.00287, "grad_norm": 0.5448339024770075, "learning_rate": 0.000861, "loss": 6.3016, "step": 287 }, { "epoch": 0.00288, "grad_norm": 0.4516947530472597, "learning_rate": 0.000864, "loss": 6.279, "step": 288 }, { "epoch": 0.00289, "grad_norm": 0.5415480649305122, "learning_rate": 0.0008669999999999999, "loss": 6.2929, "step": 289 }, { "epoch": 0.0029, "grad_norm": 0.5812113854990302, "learning_rate": 0.00087, "loss": 6.2743, "step": 290 }, { "epoch": 0.00291, "grad_norm": 0.5620937871349196, "learning_rate": 0.000873, "loss": 6.2679, "step": 291 }, { "epoch": 0.00292, "grad_norm": 0.43362286284353735, "learning_rate": 0.0008759999999999999, "loss": 6.2618, "step": 292 }, { "epoch": 0.00293, "grad_norm": 0.563953479113573, "learning_rate": 0.000879, "loss": 6.2491, "step": 293 }, { "epoch": 0.00294, "grad_norm": 0.5784107789505917, "learning_rate": 0.000882, "loss": 6.2636, "step": 294 }, { "epoch": 0.00295, "grad_norm": 0.5410292555611181, "learning_rate": 0.0008849999999999999, "loss": 6.2293, "step": 295 }, { "epoch": 0.00296, "grad_norm": 0.5413778994680785, "learning_rate": 0.000888, "loss": 6.2428, "step": 296 }, { "epoch": 0.00297, "grad_norm": 0.7587824592105622, "learning_rate": 0.000891, "loss": 6.2186, "step": 297 }, { "epoch": 0.00298, "grad_norm": 1.2390223277612324, "learning_rate": 0.0008939999999999999, "loss": 6.2454, "step": 298 }, { "epoch": 0.00299, "grad_norm": 1.0051162436627608, "learning_rate": 0.000897, "loss": 6.2195, "step": 299 }, { "epoch": 0.003, "grad_norm": 0.9301924220055711, "learning_rate": 0.0009, "loss": 6.2223, "step": 300 }, { "epoch": 0.00301, "grad_norm": 0.9092669241988458, "learning_rate": 0.0009029999999999999, "loss": 6.2133, "step": 301 }, { "epoch": 0.00302, "grad_norm": 1.0280476917286576, "learning_rate": 0.000906, "loss": 6.2045, "step": 302 }, { "epoch": 0.00303, "grad_norm": 0.955686909103224, "learning_rate": 0.000909, "loss": 6.2159, "step": 303 }, { "epoch": 0.00304, "grad_norm": 0.924153033838841, "learning_rate": 0.000912, "loss": 6.2068, "step": 304 }, { "epoch": 0.00305, "grad_norm": 1.0129569357421315, "learning_rate": 0.000915, "loss": 6.2162, "step": 305 }, { "epoch": 0.00306, "grad_norm": 0.908196615580651, "learning_rate": 0.000918, "loss": 6.1982, "step": 306 }, { "epoch": 0.00307, "grad_norm": 0.9412115799039344, "learning_rate": 0.000921, "loss": 6.1922, "step": 307 }, { "epoch": 0.00308, "grad_norm": 1.0133210251014897, "learning_rate": 0.000924, "loss": 6.1801, "step": 308 }, { "epoch": 0.00309, "grad_norm": 0.7885110954474147, "learning_rate": 0.000927, "loss": 6.1916, "step": 309 }, { "epoch": 0.0031, "grad_norm": 0.8502930513548597, "learning_rate": 0.00093, "loss": 6.1767, "step": 310 }, { "epoch": 0.00311, "grad_norm": 0.8830498260903274, "learning_rate": 0.000933, "loss": 6.1706, "step": 311 }, { "epoch": 0.00312, "grad_norm": 0.7731682808518218, "learning_rate": 0.000936, "loss": 6.1483, "step": 312 }, { "epoch": 0.00313, "grad_norm": 0.5034693382195244, "learning_rate": 0.0009390000000000001, "loss": 6.1541, "step": 313 }, { "epoch": 0.00314, "grad_norm": 0.5944206771214186, "learning_rate": 0.000942, "loss": 6.1615, "step": 314 }, { "epoch": 0.00315, "grad_norm": 0.6198949865082991, "learning_rate": 0.000945, "loss": 6.1382, "step": 315 }, { "epoch": 0.00316, "grad_norm": 0.6322301787116348, "learning_rate": 0.0009480000000000001, "loss": 6.112, "step": 316 }, { "epoch": 0.00317, "grad_norm": 0.6034799354522895, "learning_rate": 0.000951, "loss": 6.1294, "step": 317 }, { "epoch": 0.00318, "grad_norm": 0.594812791501401, "learning_rate": 0.000954, "loss": 6.1243, "step": 318 }, { "epoch": 0.00319, "grad_norm": 0.5121880287121718, "learning_rate": 0.0009570000000000001, "loss": 6.1104, "step": 319 }, { "epoch": 0.0032, "grad_norm": 0.4383869814323231, "learning_rate": 0.00096, "loss": 6.1078, "step": 320 }, { "epoch": 0.00321, "grad_norm": 0.46253996577569373, "learning_rate": 0.000963, "loss": 6.0957, "step": 321 }, { "epoch": 0.00322, "grad_norm": 0.5330974668531825, "learning_rate": 0.0009660000000000001, "loss": 6.0852, "step": 322 }, { "epoch": 0.00323, "grad_norm": 0.4269664457046623, "learning_rate": 0.000969, "loss": 6.0805, "step": 323 }, { "epoch": 0.00324, "grad_norm": 0.4120363869720864, "learning_rate": 0.0009720000000000001, "loss": 6.0833, "step": 324 }, { "epoch": 0.00325, "grad_norm": 0.43852524771556284, "learning_rate": 0.0009750000000000001, "loss": 6.0494, "step": 325 }, { "epoch": 0.00326, "grad_norm": 0.5390182954459198, "learning_rate": 0.0009780000000000001, "loss": 6.0675, "step": 326 }, { "epoch": 0.00327, "grad_norm": 0.7655676469366879, "learning_rate": 0.000981, "loss": 6.0729, "step": 327 }, { "epoch": 0.00328, "grad_norm": 1.3095809996152918, "learning_rate": 0.000984, "loss": 6.0661, "step": 328 }, { "epoch": 0.00329, "grad_norm": 0.9398963728555475, "learning_rate": 0.000987, "loss": 6.0593, "step": 329 }, { "epoch": 0.0033, "grad_norm": 0.9687365266328093, "learning_rate": 0.00099, "loss": 6.0595, "step": 330 }, { "epoch": 0.00331, "grad_norm": 1.0572725295338647, "learning_rate": 0.0009930000000000002, "loss": 6.0709, "step": 331 }, { "epoch": 0.00332, "grad_norm": 0.7804253243852415, "learning_rate": 0.0009960000000000001, "loss": 6.0449, "step": 332 }, { "epoch": 0.00333, "grad_norm": 0.9477444786898163, "learning_rate": 0.000999, "loss": 6.0352, "step": 333 }, { "epoch": 0.00334, "grad_norm": 1.119636024931192, "learning_rate": 0.001002, "loss": 6.0509, "step": 334 }, { "epoch": 0.00335, "grad_norm": 1.0398096390896505, "learning_rate": 0.001005, "loss": 6.0495, "step": 335 }, { "epoch": 0.00336, "grad_norm": 0.8946811171098662, "learning_rate": 0.001008, "loss": 6.0348, "step": 336 }, { "epoch": 0.00337, "grad_norm": 1.0240126692017184, "learning_rate": 0.0010110000000000002, "loss": 6.0215, "step": 337 }, { "epoch": 0.00338, "grad_norm": 1.0245564805630587, "learning_rate": 0.0010140000000000001, "loss": 6.0414, "step": 338 }, { "epoch": 0.00339, "grad_norm": 0.9606863659347135, "learning_rate": 0.0010170000000000001, "loss": 6.0439, "step": 339 }, { "epoch": 0.0034, "grad_norm": 0.7549695201896788, "learning_rate": 0.00102, "loss": 6.0067, "step": 340 }, { "epoch": 0.00341, "grad_norm": 0.7981881225838676, "learning_rate": 0.001023, "loss": 5.9935, "step": 341 }, { "epoch": 0.00342, "grad_norm": 0.6476806075689499, "learning_rate": 0.001026, "loss": 5.9919, "step": 342 }, { "epoch": 0.00343, "grad_norm": 0.6180530813345404, "learning_rate": 0.0010290000000000002, "loss": 5.9943, "step": 343 }, { "epoch": 0.00344, "grad_norm": 0.5904235085071222, "learning_rate": 0.001032, "loss": 5.9824, "step": 344 }, { "epoch": 0.00345, "grad_norm": 0.5662246103658737, "learning_rate": 0.001035, "loss": 5.9736, "step": 345 }, { "epoch": 0.00346, "grad_norm": 0.5815647759401746, "learning_rate": 0.0010379999999999999, "loss": 5.9787, "step": 346 }, { "epoch": 0.00347, "grad_norm": 0.5922585456492798, "learning_rate": 0.001041, "loss": 5.9505, "step": 347 }, { "epoch": 0.00348, "grad_norm": 0.546770754957902, "learning_rate": 0.001044, "loss": 5.9566, "step": 348 }, { "epoch": 0.00349, "grad_norm": 0.5259270224752429, "learning_rate": 0.001047, "loss": 5.9388, "step": 349 }, { "epoch": 0.0035, "grad_norm": 0.5639453872626701, "learning_rate": 0.00105, "loss": 5.96, "step": 350 }, { "epoch": 0.00351, "grad_norm": 0.562716119705, "learning_rate": 0.001053, "loss": 5.9575, "step": 351 }, { "epoch": 0.00352, "grad_norm": 0.6276157353012132, "learning_rate": 0.0010559999999999999, "loss": 5.9357, "step": 352 }, { "epoch": 0.00353, "grad_norm": 0.6899105072430818, "learning_rate": 0.001059, "loss": 5.939, "step": 353 }, { "epoch": 0.00354, "grad_norm": 0.7414855002893123, "learning_rate": 0.001062, "loss": 5.9142, "step": 354 }, { "epoch": 0.00355, "grad_norm": 0.8275245559939801, "learning_rate": 0.001065, "loss": 5.932, "step": 355 }, { "epoch": 0.00356, "grad_norm": 0.8334699202442165, "learning_rate": 0.001068, "loss": 5.925, "step": 356 }, { "epoch": 0.00357, "grad_norm": 0.733463396370079, "learning_rate": 0.001071, "loss": 5.9147, "step": 357 }, { "epoch": 0.00358, "grad_norm": 0.5898600387972718, "learning_rate": 0.001074, "loss": 5.9022, "step": 358 }, { "epoch": 0.00359, "grad_norm": 0.53807120274405, "learning_rate": 0.001077, "loss": 5.8933, "step": 359 }, { "epoch": 0.0036, "grad_norm": 0.5868526923580266, "learning_rate": 0.00108, "loss": 5.8984, "step": 360 }, { "epoch": 0.00361, "grad_norm": 0.6561371025439784, "learning_rate": 0.001083, "loss": 5.8927, "step": 361 }, { "epoch": 0.00362, "grad_norm": 1.003392418604553, "learning_rate": 0.001086, "loss": 5.8918, "step": 362 }, { "epoch": 0.00363, "grad_norm": 1.2318504963643775, "learning_rate": 0.001089, "loss": 5.899, "step": 363 }, { "epoch": 0.00364, "grad_norm": 0.7073595215993886, "learning_rate": 0.001092, "loss": 5.8956, "step": 364 }, { "epoch": 0.00365, "grad_norm": 0.6834408409212124, "learning_rate": 0.001095, "loss": 5.8924, "step": 365 }, { "epoch": 0.00366, "grad_norm": 0.808409762735137, "learning_rate": 0.001098, "loss": 5.8732, "step": 366 }, { "epoch": 0.00367, "grad_norm": 1.150363353399194, "learning_rate": 0.001101, "loss": 5.8871, "step": 367 }, { "epoch": 0.00368, "grad_norm": 1.5759048240116487, "learning_rate": 0.001104, "loss": 5.9113, "step": 368 }, { "epoch": 0.00369, "grad_norm": 0.6706600035773841, "learning_rate": 0.001107, "loss": 5.8826, "step": 369 }, { "epoch": 0.0037, "grad_norm": 1.0165372867603926, "learning_rate": 0.00111, "loss": 5.8773, "step": 370 }, { "epoch": 0.00371, "grad_norm": 1.199271508025637, "learning_rate": 0.001113, "loss": 5.8595, "step": 371 }, { "epoch": 0.00372, "grad_norm": 0.8890339964058547, "learning_rate": 0.001116, "loss": 5.8693, "step": 372 }, { "epoch": 0.00373, "grad_norm": 1.0853395614111072, "learning_rate": 0.001119, "loss": 5.8674, "step": 373 }, { "epoch": 0.00374, "grad_norm": 0.9530307319001724, "learning_rate": 0.001122, "loss": 5.8703, "step": 374 }, { "epoch": 0.00375, "grad_norm": 0.934750801302772, "learning_rate": 0.0011250000000000001, "loss": 5.8719, "step": 375 }, { "epoch": 0.00376, "grad_norm": 0.7986123477309184, "learning_rate": 0.001128, "loss": 5.863, "step": 376 }, { "epoch": 0.00377, "grad_norm": 1.034328210427756, "learning_rate": 0.001131, "loss": 5.8644, "step": 377 }, { "epoch": 0.00378, "grad_norm": 1.1394107219722458, "learning_rate": 0.001134, "loss": 5.8702, "step": 378 }, { "epoch": 0.00379, "grad_norm": 0.6778105205682995, "learning_rate": 0.001137, "loss": 5.8386, "step": 379 }, { "epoch": 0.0038, "grad_norm": 0.7735792074169382, "learning_rate": 0.00114, "loss": 5.8619, "step": 380 }, { "epoch": 0.00381, "grad_norm": 0.882590402307869, "learning_rate": 0.0011430000000000001, "loss": 5.83, "step": 381 }, { "epoch": 0.00382, "grad_norm": 0.8860425089624719, "learning_rate": 0.001146, "loss": 5.8197, "step": 382 }, { "epoch": 0.00383, "grad_norm": 1.0396684860073488, "learning_rate": 0.001149, "loss": 5.836, "step": 383 }, { "epoch": 0.00384, "grad_norm": 0.9683520339794217, "learning_rate": 0.001152, "loss": 5.8386, "step": 384 }, { "epoch": 0.00385, "grad_norm": 0.8210443052214895, "learning_rate": 0.001155, "loss": 5.8215, "step": 385 }, { "epoch": 0.00386, "grad_norm": 1.030920073327714, "learning_rate": 0.001158, "loss": 5.8336, "step": 386 }, { "epoch": 0.00387, "grad_norm": 0.8032114385497527, "learning_rate": 0.0011610000000000001, "loss": 5.8025, "step": 387 }, { "epoch": 0.00388, "grad_norm": 0.6803620347459473, "learning_rate": 0.001164, "loss": 5.7978, "step": 388 }, { "epoch": 0.00389, "grad_norm": 0.6752304208768743, "learning_rate": 0.001167, "loss": 5.8027, "step": 389 }, { "epoch": 0.0039, "grad_norm": 0.6054825081153106, "learning_rate": 0.00117, "loss": 5.7828, "step": 390 }, { "epoch": 0.00391, "grad_norm": 0.5156470046541872, "learning_rate": 0.001173, "loss": 5.7863, "step": 391 }, { "epoch": 0.00392, "grad_norm": 0.49609211852516366, "learning_rate": 0.001176, "loss": 5.7945, "step": 392 }, { "epoch": 0.00393, "grad_norm": 0.42817727304572534, "learning_rate": 0.0011790000000000001, "loss": 5.7664, "step": 393 }, { "epoch": 0.00394, "grad_norm": 0.43894767278563757, "learning_rate": 0.001182, "loss": 5.7539, "step": 394 }, { "epoch": 0.00395, "grad_norm": 0.41850660912289844, "learning_rate": 0.001185, "loss": 5.7366, "step": 395 }, { "epoch": 0.00396, "grad_norm": 0.41745423249833347, "learning_rate": 0.001188, "loss": 5.7516, "step": 396 }, { "epoch": 0.00397, "grad_norm": 0.40474070631964676, "learning_rate": 0.001191, "loss": 5.7433, "step": 397 }, { "epoch": 0.00398, "grad_norm": 0.3677722018443306, "learning_rate": 0.0011940000000000002, "loss": 5.7479, "step": 398 }, { "epoch": 0.00399, "grad_norm": 0.3480206547108819, "learning_rate": 0.0011970000000000001, "loss": 5.7478, "step": 399 }, { "epoch": 0.004, "grad_norm": 0.3434828622202681, "learning_rate": 0.0012000000000000001, "loss": 5.7345, "step": 400 }, { "epoch": 0.00401, "grad_norm": 0.34918136204349326, "learning_rate": 0.001203, "loss": 5.7155, "step": 401 }, { "epoch": 0.00402, "grad_norm": 0.30554980038341767, "learning_rate": 0.001206, "loss": 5.7215, "step": 402 }, { "epoch": 0.00403, "grad_norm": 0.38840665384838735, "learning_rate": 0.001209, "loss": 5.7184, "step": 403 }, { "epoch": 0.00404, "grad_norm": 0.5409513056663879, "learning_rate": 0.0012120000000000002, "loss": 5.714, "step": 404 }, { "epoch": 0.00405, "grad_norm": 0.845020924848713, "learning_rate": 0.0012150000000000002, "loss": 5.7181, "step": 405 }, { "epoch": 0.00406, "grad_norm": 1.1911410915070972, "learning_rate": 0.0012180000000000001, "loss": 5.7205, "step": 406 }, { "epoch": 0.00407, "grad_norm": 0.6577283880630926, "learning_rate": 0.0012209999999999999, "loss": 5.6994, "step": 407 }, { "epoch": 0.00408, "grad_norm": 0.7475745975098248, "learning_rate": 0.001224, "loss": 5.7213, "step": 408 }, { "epoch": 0.00409, "grad_norm": 0.9872391126413178, "learning_rate": 0.001227, "loss": 5.7126, "step": 409 }, { "epoch": 0.0041, "grad_norm": 1.1359034721668335, "learning_rate": 0.00123, "loss": 5.7088, "step": 410 }, { "epoch": 0.00411, "grad_norm": 1.3596329145222696, "learning_rate": 0.001233, "loss": 5.7402, "step": 411 }, { "epoch": 0.00412, "grad_norm": 0.7538358474928969, "learning_rate": 0.001236, "loss": 5.7066, "step": 412 }, { "epoch": 0.00413, "grad_norm": 0.9465320539051596, "learning_rate": 0.0012389999999999999, "loss": 5.7197, "step": 413 }, { "epoch": 0.00414, "grad_norm": 0.9262933655624658, "learning_rate": 0.001242, "loss": 5.6978, "step": 414 }, { "epoch": 0.00415, "grad_norm": 1.1564175286146172, "learning_rate": 0.001245, "loss": 5.7105, "step": 415 }, { "epoch": 0.00416, "grad_norm": 1.1001247072345506, "learning_rate": 0.001248, "loss": 5.6929, "step": 416 }, { "epoch": 0.00417, "grad_norm": 1.0416153435685582, "learning_rate": 0.001251, "loss": 5.7199, "step": 417 }, { "epoch": 0.00418, "grad_norm": 1.0281555694116995, "learning_rate": 0.001254, "loss": 5.6999, "step": 418 }, { "epoch": 0.00419, "grad_norm": 1.1154617103247704, "learning_rate": 0.0012569999999999999, "loss": 5.6876, "step": 419 }, { "epoch": 0.0042, "grad_norm": 0.9999912825556322, "learning_rate": 0.00126, "loss": 5.7045, "step": 420 }, { "epoch": 0.00421, "grad_norm": 0.9729638313238949, "learning_rate": 0.001263, "loss": 5.6933, "step": 421 }, { "epoch": 0.00422, "grad_norm": 1.0297208241186608, "learning_rate": 0.001266, "loss": 5.6894, "step": 422 }, { "epoch": 0.00423, "grad_norm": 0.8381062456476874, "learning_rate": 0.001269, "loss": 5.6811, "step": 423 }, { "epoch": 0.00424, "grad_norm": 0.73825928337582, "learning_rate": 0.001272, "loss": 5.6773, "step": 424 }, { "epoch": 0.00425, "grad_norm": 0.8151693610653118, "learning_rate": 0.001275, "loss": 5.6815, "step": 425 }, { "epoch": 0.00426, "grad_norm": 0.9877723869544494, "learning_rate": 0.001278, "loss": 5.6881, "step": 426 }, { "epoch": 0.00427, "grad_norm": 0.8875063218226354, "learning_rate": 0.001281, "loss": 5.6676, "step": 427 }, { "epoch": 0.00428, "grad_norm": 0.8761493100474917, "learning_rate": 0.001284, "loss": 5.6437, "step": 428 }, { "epoch": 0.00429, "grad_norm": 1.109439755296158, "learning_rate": 0.001287, "loss": 5.668, "step": 429 }, { "epoch": 0.0043, "grad_norm": 0.8726361797071612, "learning_rate": 0.00129, "loss": 5.6611, "step": 430 }, { "epoch": 0.00431, "grad_norm": 0.5601358924900582, "learning_rate": 0.001293, "loss": 5.6342, "step": 431 }, { "epoch": 0.00432, "grad_norm": 0.5940988397687699, "learning_rate": 0.001296, "loss": 5.6448, "step": 432 }, { "epoch": 0.00433, "grad_norm": 0.480538337754196, "learning_rate": 0.001299, "loss": 5.6336, "step": 433 }, { "epoch": 0.00434, "grad_norm": 0.5069699406038057, "learning_rate": 0.001302, "loss": 5.6331, "step": 434 }, { "epoch": 0.00435, "grad_norm": 0.417220569666801, "learning_rate": 0.001305, "loss": 5.6227, "step": 435 }, { "epoch": 0.00436, "grad_norm": 0.46597565539240443, "learning_rate": 0.001308, "loss": 5.6039, "step": 436 }, { "epoch": 0.00437, "grad_norm": 0.37605303006482044, "learning_rate": 0.001311, "loss": 5.6161, "step": 437 }, { "epoch": 0.00438, "grad_norm": 0.3526339213940271, "learning_rate": 0.001314, "loss": 5.5977, "step": 438 }, { "epoch": 0.00439, "grad_norm": 0.3618369277094543, "learning_rate": 0.001317, "loss": 5.6246, "step": 439 }, { "epoch": 0.0044, "grad_norm": 0.3223158135938896, "learning_rate": 0.00132, "loss": 5.5938, "step": 440 }, { "epoch": 0.00441, "grad_norm": 0.3386640445759432, "learning_rate": 0.001323, "loss": 5.5905, "step": 441 }, { "epoch": 0.00442, "grad_norm": 0.3397360216396013, "learning_rate": 0.0013260000000000001, "loss": 5.5838, "step": 442 }, { "epoch": 0.00443, "grad_norm": 0.34532354892574607, "learning_rate": 0.001329, "loss": 5.5832, "step": 443 }, { "epoch": 0.00444, "grad_norm": 0.37928556611065656, "learning_rate": 0.001332, "loss": 5.5853, "step": 444 }, { "epoch": 0.00445, "grad_norm": 0.44947894711961484, "learning_rate": 0.001335, "loss": 5.5723, "step": 445 }, { "epoch": 0.00446, "grad_norm": 0.5239308309237933, "learning_rate": 0.001338, "loss": 5.5751, "step": 446 }, { "epoch": 0.00447, "grad_norm": 0.60889528761374, "learning_rate": 0.001341, "loss": 5.5777, "step": 447 }, { "epoch": 0.00448, "grad_norm": 0.6150556040535831, "learning_rate": 0.0013440000000000001, "loss": 5.561, "step": 448 }, { "epoch": 0.00449, "grad_norm": 0.5444850551876294, "learning_rate": 0.001347, "loss": 5.5623, "step": 449 }, { "epoch": 0.0045, "grad_norm": 0.6163688303714219, "learning_rate": 0.00135, "loss": 5.5617, "step": 450 }, { "epoch": 0.00451, "grad_norm": 0.7972728836658292, "learning_rate": 0.001353, "loss": 5.5614, "step": 451 }, { "epoch": 0.00452, "grad_norm": 0.7711764764543457, "learning_rate": 0.001356, "loss": 5.5454, "step": 452 }, { "epoch": 0.00453, "grad_norm": 0.7702356986189732, "learning_rate": 0.001359, "loss": 5.5379, "step": 453 }, { "epoch": 0.00454, "grad_norm": 1.0838160417982272, "learning_rate": 0.0013620000000000001, "loss": 5.5686, "step": 454 }, { "epoch": 0.00455, "grad_norm": 1.152655585803101, "learning_rate": 0.0013650000000000001, "loss": 5.5757, "step": 455 }, { "epoch": 0.00456, "grad_norm": 1.2776355160689266, "learning_rate": 0.001368, "loss": 5.5831, "step": 456 }, { "epoch": 0.00457, "grad_norm": 1.091292995937963, "learning_rate": 0.001371, "loss": 5.5727, "step": 457 }, { "epoch": 0.00458, "grad_norm": 0.8963405103823251, "learning_rate": 0.001374, "loss": 5.5735, "step": 458 }, { "epoch": 0.00459, "grad_norm": 1.0168648046101516, "learning_rate": 0.0013770000000000002, "loss": 5.5669, "step": 459 }, { "epoch": 0.0046, "grad_norm": 1.296131601782866, "learning_rate": 0.0013800000000000002, "loss": 5.5665, "step": 460 }, { "epoch": 0.00461, "grad_norm": 0.9634420565591739, "learning_rate": 0.0013830000000000001, "loss": 5.556, "step": 461 }, { "epoch": 0.00462, "grad_norm": 0.9383281224355017, "learning_rate": 0.001386, "loss": 5.5634, "step": 462 }, { "epoch": 0.00463, "grad_norm": 1.2569585597421309, "learning_rate": 0.001389, "loss": 5.5542, "step": 463 }, { "epoch": 0.00464, "grad_norm": 0.9874595595654581, "learning_rate": 0.001392, "loss": 5.5689, "step": 464 }, { "epoch": 0.00465, "grad_norm": 1.085595749506429, "learning_rate": 0.0013950000000000002, "loss": 5.5385, "step": 465 }, { "epoch": 0.00466, "grad_norm": 1.0673943770446899, "learning_rate": 0.0013980000000000002, "loss": 5.5603, "step": 466 }, { "epoch": 0.00467, "grad_norm": 0.8139501043376736, "learning_rate": 0.0014010000000000001, "loss": 5.5432, "step": 467 }, { "epoch": 0.00468, "grad_norm": 0.7494382793960519, "learning_rate": 0.001404, "loss": 5.5245, "step": 468 }, { "epoch": 0.00469, "grad_norm": 0.7634992086588068, "learning_rate": 0.001407, "loss": 5.5282, "step": 469 }, { "epoch": 0.0047, "grad_norm": 0.8018093758476836, "learning_rate": 0.00141, "loss": 5.5404, "step": 470 }, { "epoch": 0.00471, "grad_norm": 0.7418690809708749, "learning_rate": 0.001413, "loss": 5.5115, "step": 471 }, { "epoch": 0.00472, "grad_norm": 0.7355325431039438, "learning_rate": 0.001416, "loss": 5.5216, "step": 472 }, { "epoch": 0.00473, "grad_norm": 0.709026539269664, "learning_rate": 0.001419, "loss": 5.5305, "step": 473 }, { "epoch": 0.00474, "grad_norm": 0.5742329758009745, "learning_rate": 0.0014219999999999999, "loss": 5.5064, "step": 474 }, { "epoch": 0.00475, "grad_norm": 0.5859758403725885, "learning_rate": 0.001425, "loss": 5.4971, "step": 475 }, { "epoch": 0.00476, "grad_norm": 0.6365922795308678, "learning_rate": 0.001428, "loss": 5.5308, "step": 476 }, { "epoch": 0.00477, "grad_norm": 0.6539516343537074, "learning_rate": 0.001431, "loss": 5.4846, "step": 477 }, { "epoch": 0.00478, "grad_norm": 0.6446859909585969, "learning_rate": 0.001434, "loss": 5.5074, "step": 478 }, { "epoch": 0.00479, "grad_norm": 0.7791938725908187, "learning_rate": 0.001437, "loss": 5.4897, "step": 479 }, { "epoch": 0.0048, "grad_norm": 0.7608695568360718, "learning_rate": 0.0014399999999999999, "loss": 5.4855, "step": 480 }, { "epoch": 0.00481, "grad_norm": 0.5435552532069989, "learning_rate": 0.001443, "loss": 5.4813, "step": 481 }, { "epoch": 0.00482, "grad_norm": 0.4961185149512517, "learning_rate": 0.001446, "loss": 5.4538, "step": 482 }, { "epoch": 0.00483, "grad_norm": 0.5120902463904886, "learning_rate": 0.001449, "loss": 5.4636, "step": 483 }, { "epoch": 0.00484, "grad_norm": 0.418197369903841, "learning_rate": 0.001452, "loss": 5.464, "step": 484 }, { "epoch": 0.00485, "grad_norm": 0.36311406822078424, "learning_rate": 0.001455, "loss": 5.4671, "step": 485 }, { "epoch": 0.00486, "grad_norm": 0.3913777576995821, "learning_rate": 0.001458, "loss": 5.4393, "step": 486 }, { "epoch": 0.00487, "grad_norm": 0.36874474197662527, "learning_rate": 0.001461, "loss": 5.4517, "step": 487 }, { "epoch": 0.00488, "grad_norm": 0.41593519822402414, "learning_rate": 0.001464, "loss": 5.43, "step": 488 }, { "epoch": 0.00489, "grad_norm": 0.4051383375955623, "learning_rate": 0.001467, "loss": 5.4204, "step": 489 }, { "epoch": 0.0049, "grad_norm": 0.4948319932753325, "learning_rate": 0.00147, "loss": 5.434, "step": 490 }, { "epoch": 0.00491, "grad_norm": 0.6887805434617323, "learning_rate": 0.001473, "loss": 5.4357, "step": 491 }, { "epoch": 0.00492, "grad_norm": 0.912047432270828, "learning_rate": 0.001476, "loss": 5.4432, "step": 492 }, { "epoch": 0.00493, "grad_norm": 0.8950009277905591, "learning_rate": 0.001479, "loss": 5.4415, "step": 493 }, { "epoch": 0.00494, "grad_norm": 0.807683799932231, "learning_rate": 0.001482, "loss": 5.4427, "step": 494 }, { "epoch": 0.00495, "grad_norm": 0.976722169908224, "learning_rate": 0.001485, "loss": 5.4601, "step": 495 }, { "epoch": 0.00496, "grad_norm": 0.7224780529872387, "learning_rate": 0.001488, "loss": 5.4314, "step": 496 }, { "epoch": 0.00497, "grad_norm": 0.7139613919522917, "learning_rate": 0.001491, "loss": 5.4172, "step": 497 }, { "epoch": 0.00498, "grad_norm": 0.7556637282468179, "learning_rate": 0.001494, "loss": 5.4443, "step": 498 }, { "epoch": 0.00499, "grad_norm": 0.8519321120342865, "learning_rate": 0.001497, "loss": 5.4223, "step": 499 }, { "epoch": 0.005, "grad_norm": 0.9868888032280079, "learning_rate": 0.0015, "loss": 5.4308, "step": 500 }, { "epoch": 0.00501, "grad_norm": 1.2028724714017198, "learning_rate": 0.001503, "loss": 5.4458, "step": 501 }, { "epoch": 0.00502, "grad_norm": 0.9548534640519003, "learning_rate": 0.001506, "loss": 5.4405, "step": 502 }, { "epoch": 0.00503, "grad_norm": 0.9423651398338494, "learning_rate": 0.0015090000000000001, "loss": 5.4484, "step": 503 }, { "epoch": 0.00504, "grad_norm": 0.9672711526274779, "learning_rate": 0.001512, "loss": 5.4328, "step": 504 }, { "epoch": 0.00505, "grad_norm": 0.9474889600476256, "learning_rate": 0.001515, "loss": 5.4283, "step": 505 }, { "epoch": 0.00506, "grad_norm": 1.3039451405080307, "learning_rate": 0.001518, "loss": 5.4432, "step": 506 }, { "epoch": 0.00507, "grad_norm": 1.2269782118632737, "learning_rate": 0.001521, "loss": 5.4452, "step": 507 }, { "epoch": 0.00508, "grad_norm": 0.8597426683067237, "learning_rate": 0.001524, "loss": 5.4268, "step": 508 }, { "epoch": 0.00509, "grad_norm": 0.9388657090373522, "learning_rate": 0.0015270000000000001, "loss": 5.4178, "step": 509 }, { "epoch": 0.0051, "grad_norm": 0.9594427813189665, "learning_rate": 0.0015300000000000001, "loss": 5.4356, "step": 510 }, { "epoch": 0.00511, "grad_norm": 1.04563577032056, "learning_rate": 0.001533, "loss": 5.4212, "step": 511 }, { "epoch": 0.00512, "grad_norm": 0.733703407645156, "learning_rate": 0.001536, "loss": 5.4003, "step": 512 }, { "epoch": 0.00513, "grad_norm": 0.8415210942026606, "learning_rate": 0.001539, "loss": 5.423, "step": 513 }, { "epoch": 0.00514, "grad_norm": 0.8791751992621939, "learning_rate": 0.001542, "loss": 5.4064, "step": 514 }, { "epoch": 0.00515, "grad_norm": 0.8161499995578689, "learning_rate": 0.0015450000000000001, "loss": 5.4094, "step": 515 }, { "epoch": 0.00516, "grad_norm": 0.878383695319614, "learning_rate": 0.0015480000000000001, "loss": 5.4087, "step": 516 }, { "epoch": 0.00517, "grad_norm": 0.9768725869756134, "learning_rate": 0.001551, "loss": 5.4055, "step": 517 }, { "epoch": 0.00518, "grad_norm": 0.8865017723772849, "learning_rate": 0.001554, "loss": 5.3907, "step": 518 }, { "epoch": 0.00519, "grad_norm": 0.8308797688973832, "learning_rate": 0.001557, "loss": 5.3905, "step": 519 }, { "epoch": 0.0052, "grad_norm": 0.6978413162257922, "learning_rate": 0.0015600000000000002, "loss": 5.3938, "step": 520 }, { "epoch": 0.00521, "grad_norm": 0.6562689530690187, "learning_rate": 0.0015630000000000002, "loss": 5.3676, "step": 521 }, { "epoch": 0.00522, "grad_norm": 0.5577523148431155, "learning_rate": 0.0015660000000000001, "loss": 5.3673, "step": 522 }, { "epoch": 0.00523, "grad_norm": 0.5298728018270966, "learning_rate": 0.001569, "loss": 5.3784, "step": 523 }, { "epoch": 0.00524, "grad_norm": 0.44216623389663734, "learning_rate": 0.001572, "loss": 5.3811, "step": 524 }, { "epoch": 0.00525, "grad_norm": 0.3702182111689363, "learning_rate": 0.001575, "loss": 5.369, "step": 525 }, { "epoch": 0.00526, "grad_norm": 0.40289552198632295, "learning_rate": 0.0015780000000000002, "loss": 5.3444, "step": 526 }, { "epoch": 0.00527, "grad_norm": 0.38284579814689895, "learning_rate": 0.0015810000000000002, "loss": 5.3551, "step": 527 }, { "epoch": 0.00528, "grad_norm": 0.33950473031510653, "learning_rate": 0.0015840000000000001, "loss": 5.3339, "step": 528 }, { "epoch": 0.00529, "grad_norm": 0.3777758983585419, "learning_rate": 0.001587, "loss": 5.343, "step": 529 }, { "epoch": 0.0053, "grad_norm": 0.4257451161382566, "learning_rate": 0.00159, "loss": 5.3483, "step": 530 }, { "epoch": 0.00531, "grad_norm": 0.5472045550610978, "learning_rate": 0.001593, "loss": 5.3387, "step": 531 }, { "epoch": 0.00532, "grad_norm": 0.696856419507981, "learning_rate": 0.0015960000000000002, "loss": 5.3455, "step": 532 }, { "epoch": 0.00533, "grad_norm": 0.7975941430607876, "learning_rate": 0.0015990000000000002, "loss": 5.3447, "step": 533 }, { "epoch": 0.00534, "grad_norm": 0.6576622452461942, "learning_rate": 0.0016020000000000001, "loss": 5.3339, "step": 534 }, { "epoch": 0.00535, "grad_norm": 0.5879483820814444, "learning_rate": 0.001605, "loss": 5.3347, "step": 535 }, { "epoch": 0.00536, "grad_norm": 0.8212636469840171, "learning_rate": 0.001608, "loss": 5.3454, "step": 536 }, { "epoch": 0.00537, "grad_norm": 0.7740232031924225, "learning_rate": 0.0016110000000000002, "loss": 5.3418, "step": 537 }, { "epoch": 0.00538, "grad_norm": 0.683098129060214, "learning_rate": 0.0016140000000000002, "loss": 5.3143, "step": 538 }, { "epoch": 0.00539, "grad_norm": 0.9170551975741953, "learning_rate": 0.0016170000000000002, "loss": 5.3245, "step": 539 }, { "epoch": 0.0054, "grad_norm": 0.765080131514484, "learning_rate": 0.0016200000000000001, "loss": 5.3262, "step": 540 }, { "epoch": 0.00541, "grad_norm": 0.658457026305436, "learning_rate": 0.001623, "loss": 5.3137, "step": 541 }, { "epoch": 0.00542, "grad_norm": 0.601942869875084, "learning_rate": 0.001626, "loss": 5.3315, "step": 542 }, { "epoch": 0.00543, "grad_norm": 0.6751097730454854, "learning_rate": 0.0016290000000000002, "loss": 5.2998, "step": 543 }, { "epoch": 0.00544, "grad_norm": 0.6943293389301006, "learning_rate": 0.0016320000000000002, "loss": 5.3191, "step": 544 }, { "epoch": 0.00545, "grad_norm": 0.757591621302123, "learning_rate": 0.0016350000000000002, "loss": 5.3293, "step": 545 }, { "epoch": 0.00546, "grad_norm": 0.825685624372282, "learning_rate": 0.0016380000000000001, "loss": 5.3061, "step": 546 }, { "epoch": 0.00547, "grad_norm": 0.8411824339962438, "learning_rate": 0.001641, "loss": 5.3051, "step": 547 }, { "epoch": 0.00548, "grad_norm": 0.8225125547688507, "learning_rate": 0.001644, "loss": 5.303, "step": 548 }, { "epoch": 0.00549, "grad_norm": 0.7692066087665821, "learning_rate": 0.0016470000000000002, "loss": 5.3016, "step": 549 }, { "epoch": 0.0055, "grad_norm": 0.7541680263658305, "learning_rate": 0.0016500000000000002, "loss": 5.3099, "step": 550 }, { "epoch": 0.00551, "grad_norm": 0.8632990593818363, "learning_rate": 0.0016530000000000002, "loss": 5.3123, "step": 551 }, { "epoch": 0.00552, "grad_norm": 0.9083583396116485, "learning_rate": 0.0016560000000000001, "loss": 5.3063, "step": 552 }, { "epoch": 0.00553, "grad_norm": 0.9125681148017897, "learning_rate": 0.001659, "loss": 5.304, "step": 553 }, { "epoch": 0.00554, "grad_norm": 0.9360850923631763, "learning_rate": 0.0016620000000000003, "loss": 5.3021, "step": 554 }, { "epoch": 0.00555, "grad_norm": 0.9519012966124738, "learning_rate": 0.0016650000000000002, "loss": 5.3154, "step": 555 }, { "epoch": 0.00556, "grad_norm": 0.9297335237671114, "learning_rate": 0.0016680000000000002, "loss": 5.2962, "step": 556 }, { "epoch": 0.00557, "grad_norm": 1.1187109821510643, "learning_rate": 0.0016710000000000002, "loss": 5.2935, "step": 557 }, { "epoch": 0.00558, "grad_norm": 0.9868599839032521, "learning_rate": 0.0016740000000000001, "loss": 5.3219, "step": 558 }, { "epoch": 0.00559, "grad_norm": 0.8786610310513777, "learning_rate": 0.001677, "loss": 5.2938, "step": 559 }, { "epoch": 0.0056, "grad_norm": 0.9118163270219521, "learning_rate": 0.0016800000000000003, "loss": 5.2767, "step": 560 }, { "epoch": 0.00561, "grad_norm": 0.8649386991384733, "learning_rate": 0.0016830000000000003, "loss": 5.3004, "step": 561 }, { "epoch": 0.00562, "grad_norm": 0.5982058914442256, "learning_rate": 0.0016860000000000002, "loss": 5.279, "step": 562 }, { "epoch": 0.00563, "grad_norm": 0.5830709316445766, "learning_rate": 0.001689, "loss": 5.2821, "step": 563 }, { "epoch": 0.00564, "grad_norm": 0.5564672250442253, "learning_rate": 0.001692, "loss": 5.2582, "step": 564 }, { "epoch": 0.00565, "grad_norm": 0.5903966881939692, "learning_rate": 0.001695, "loss": 5.2619, "step": 565 }, { "epoch": 0.00566, "grad_norm": 0.5777761648359326, "learning_rate": 0.0016979999999999999, "loss": 5.2552, "step": 566 }, { "epoch": 0.00567, "grad_norm": 0.6069030603134064, "learning_rate": 0.0017009999999999998, "loss": 5.2491, "step": 567 }, { "epoch": 0.00568, "grad_norm": 0.5693132087018719, "learning_rate": 0.0017039999999999998, "loss": 5.2604, "step": 568 }, { "epoch": 0.00569, "grad_norm": 0.47662406778838745, "learning_rate": 0.001707, "loss": 5.2359, "step": 569 }, { "epoch": 0.0057, "grad_norm": 0.49231021705037487, "learning_rate": 0.00171, "loss": 5.253, "step": 570 }, { "epoch": 0.00571, "grad_norm": 0.4167352661720621, "learning_rate": 0.001713, "loss": 5.2491, "step": 571 }, { "epoch": 0.00572, "grad_norm": 0.39011854138215074, "learning_rate": 0.0017159999999999999, "loss": 5.2258, "step": 572 }, { "epoch": 0.00573, "grad_norm": 0.45971389455263184, "learning_rate": 0.0017189999999999998, "loss": 5.2313, "step": 573 }, { "epoch": 0.00574, "grad_norm": 0.4643694238461635, "learning_rate": 0.001722, "loss": 5.2399, "step": 574 }, { "epoch": 0.00575, "grad_norm": 0.4439840100405838, "learning_rate": 0.001725, "loss": 5.2168, "step": 575 }, { "epoch": 0.00576, "grad_norm": 0.4794795740566699, "learning_rate": 0.001728, "loss": 5.2109, "step": 576 }, { "epoch": 0.00577, "grad_norm": 0.5445912068882687, "learning_rate": 0.001731, "loss": 5.2354, "step": 577 }, { "epoch": 0.00578, "grad_norm": 0.5881619532039025, "learning_rate": 0.0017339999999999999, "loss": 5.2156, "step": 578 }, { "epoch": 0.00579, "grad_norm": 0.6688964155004951, "learning_rate": 0.0017369999999999998, "loss": 5.2064, "step": 579 }, { "epoch": 0.0058, "grad_norm": 0.6446644513052245, "learning_rate": 0.00174, "loss": 5.2223, "step": 580 }, { "epoch": 0.00581, "grad_norm": 0.7385299343409043, "learning_rate": 0.001743, "loss": 5.2222, "step": 581 }, { "epoch": 0.00582, "grad_norm": 1.0486135149054512, "learning_rate": 0.001746, "loss": 5.2114, "step": 582 }, { "epoch": 0.00583, "grad_norm": 0.9184260585056472, "learning_rate": 0.001749, "loss": 5.2374, "step": 583 }, { "epoch": 0.00584, "grad_norm": 0.7724931135788974, "learning_rate": 0.0017519999999999999, "loss": 5.2235, "step": 584 }, { "epoch": 0.00585, "grad_norm": 0.903942587317279, "learning_rate": 0.0017549999999999998, "loss": 5.2218, "step": 585 }, { "epoch": 0.00586, "grad_norm": 0.8496888678331875, "learning_rate": 0.001758, "loss": 5.2272, "step": 586 }, { "epoch": 0.00587, "grad_norm": 0.8580070219006531, "learning_rate": 0.001761, "loss": 5.2094, "step": 587 }, { "epoch": 0.00588, "grad_norm": 1.0100984711915582, "learning_rate": 0.001764, "loss": 5.233, "step": 588 }, { "epoch": 0.00589, "grad_norm": 0.9311738464832717, "learning_rate": 0.001767, "loss": 5.2289, "step": 589 }, { "epoch": 0.0059, "grad_norm": 0.9363758859588256, "learning_rate": 0.0017699999999999999, "loss": 5.2155, "step": 590 }, { "epoch": 0.00591, "grad_norm": 0.9695025163620248, "learning_rate": 0.001773, "loss": 5.2165, "step": 591 }, { "epoch": 0.00592, "grad_norm": 0.9805216711841677, "learning_rate": 0.001776, "loss": 5.2417, "step": 592 }, { "epoch": 0.00593, "grad_norm": 0.9827241162755458, "learning_rate": 0.001779, "loss": 5.2392, "step": 593 }, { "epoch": 0.00594, "grad_norm": 1.1445433178010636, "learning_rate": 0.001782, "loss": 5.2522, "step": 594 }, { "epoch": 0.00595, "grad_norm": 0.950699335886284, "learning_rate": 0.001785, "loss": 5.2351, "step": 595 }, { "epoch": 0.00596, "grad_norm": 0.8373647963925929, "learning_rate": 0.0017879999999999999, "loss": 5.2269, "step": 596 }, { "epoch": 0.00597, "grad_norm": 0.9799263768822638, "learning_rate": 0.001791, "loss": 5.2118, "step": 597 }, { "epoch": 0.00598, "grad_norm": 0.9413379648187816, "learning_rate": 0.001794, "loss": 5.2215, "step": 598 }, { "epoch": 0.00599, "grad_norm": 0.8392983563516706, "learning_rate": 0.001797, "loss": 5.2191, "step": 599 }, { "epoch": 0.006, "grad_norm": 0.8414286288934597, "learning_rate": 0.0018, "loss": 5.2134, "step": 600 }, { "epoch": 0.00601, "grad_norm": 0.8993201270126511, "learning_rate": 0.001803, "loss": 5.2089, "step": 601 }, { "epoch": 0.00602, "grad_norm": 0.8678008919316418, "learning_rate": 0.0018059999999999999, "loss": 5.1957, "step": 602 }, { "epoch": 0.00603, "grad_norm": 0.6967382011990112, "learning_rate": 0.001809, "loss": 5.2122, "step": 603 }, { "epoch": 0.00604, "grad_norm": 0.5312401482691863, "learning_rate": 0.001812, "loss": 5.1788, "step": 604 }, { "epoch": 0.00605, "grad_norm": 0.5795052729398412, "learning_rate": 0.001815, "loss": 5.1915, "step": 605 }, { "epoch": 0.00606, "grad_norm": 0.6345039250131549, "learning_rate": 0.001818, "loss": 5.1894, "step": 606 }, { "epoch": 0.00607, "grad_norm": 0.6716049737606092, "learning_rate": 0.001821, "loss": 5.1916, "step": 607 }, { "epoch": 0.00608, "grad_norm": 0.631863512626499, "learning_rate": 0.001824, "loss": 5.1762, "step": 608 }, { "epoch": 0.00609, "grad_norm": 0.5345713642167306, "learning_rate": 0.001827, "loss": 5.177, "step": 609 }, { "epoch": 0.0061, "grad_norm": 0.455626202663485, "learning_rate": 0.00183, "loss": 5.1434, "step": 610 }, { "epoch": 0.00611, "grad_norm": 0.46197362206927406, "learning_rate": 0.001833, "loss": 5.1489, "step": 611 }, { "epoch": 0.00612, "grad_norm": 0.3943653846553885, "learning_rate": 0.001836, "loss": 5.1488, "step": 612 }, { "epoch": 0.00613, "grad_norm": 0.4053887102586243, "learning_rate": 0.001839, "loss": 5.1409, "step": 613 }, { "epoch": 0.00614, "grad_norm": 0.44363743610311057, "learning_rate": 0.001842, "loss": 5.1611, "step": 614 }, { "epoch": 0.00615, "grad_norm": 0.40025757691173514, "learning_rate": 0.001845, "loss": 5.145, "step": 615 }, { "epoch": 0.00616, "grad_norm": 0.4067321452673224, "learning_rate": 0.001848, "loss": 5.1299, "step": 616 }, { "epoch": 0.00617, "grad_norm": 0.42403873536126996, "learning_rate": 0.001851, "loss": 5.1397, "step": 617 }, { "epoch": 0.00618, "grad_norm": 0.44192084147381183, "learning_rate": 0.001854, "loss": 5.1265, "step": 618 }, { "epoch": 0.00619, "grad_norm": 0.5104655865853382, "learning_rate": 0.001857, "loss": 5.1299, "step": 619 }, { "epoch": 0.0062, "grad_norm": 0.6226145292933671, "learning_rate": 0.00186, "loss": 5.118, "step": 620 }, { "epoch": 0.00621, "grad_norm": 0.745237667035521, "learning_rate": 0.001863, "loss": 5.1237, "step": 621 }, { "epoch": 0.00622, "grad_norm": 0.7986865810762169, "learning_rate": 0.001866, "loss": 5.1267, "step": 622 }, { "epoch": 0.00623, "grad_norm": 0.7723208155309241, "learning_rate": 0.001869, "loss": 5.1084, "step": 623 }, { "epoch": 0.00624, "grad_norm": 0.8171878766946273, "learning_rate": 0.001872, "loss": 5.1246, "step": 624 }, { "epoch": 0.00625, "grad_norm": 0.7607424856276187, "learning_rate": 0.001875, "loss": 5.1359, "step": 625 }, { "epoch": 0.00626, "grad_norm": 0.7005729458198662, "learning_rate": 0.0018780000000000001, "loss": 5.1104, "step": 626 }, { "epoch": 0.00627, "grad_norm": 0.6735260479679158, "learning_rate": 0.001881, "loss": 5.1015, "step": 627 }, { "epoch": 0.00628, "grad_norm": 0.780486998163919, "learning_rate": 0.001884, "loss": 5.1349, "step": 628 }, { "epoch": 0.00629, "grad_norm": 0.8746830311438225, "learning_rate": 0.001887, "loss": 5.1097, "step": 629 }, { "epoch": 0.0063, "grad_norm": 0.9536711950620466, "learning_rate": 0.00189, "loss": 5.1137, "step": 630 }, { "epoch": 0.00631, "grad_norm": 0.8628106812141149, "learning_rate": 0.0018930000000000002, "loss": 5.1035, "step": 631 }, { "epoch": 0.00632, "grad_norm": 0.9320890333045916, "learning_rate": 0.0018960000000000001, "loss": 5.1281, "step": 632 }, { "epoch": 0.00633, "grad_norm": 1.0593968691082751, "learning_rate": 0.001899, "loss": 5.1321, "step": 633 }, { "epoch": 0.00634, "grad_norm": 0.943443350801409, "learning_rate": 0.001902, "loss": 5.1182, "step": 634 }, { "epoch": 0.00635, "grad_norm": 0.9628484504873114, "learning_rate": 0.001905, "loss": 5.1089, "step": 635 }, { "epoch": 0.00636, "grad_norm": 1.1043383962750646, "learning_rate": 0.001908, "loss": 5.1289, "step": 636 }, { "epoch": 0.00637, "grad_norm": 0.8987493866500654, "learning_rate": 0.0019110000000000002, "loss": 5.1351, "step": 637 }, { "epoch": 0.00638, "grad_norm": 0.9251804377428581, "learning_rate": 0.0019140000000000001, "loss": 5.1288, "step": 638 }, { "epoch": 0.00639, "grad_norm": 0.854418425044198, "learning_rate": 0.001917, "loss": 5.0998, "step": 639 }, { "epoch": 0.0064, "grad_norm": 0.9324816679284724, "learning_rate": 0.00192, "loss": 5.1038, "step": 640 }, { "epoch": 0.00641, "grad_norm": 0.9892889234371413, "learning_rate": 0.001923, "loss": 5.1163, "step": 641 }, { "epoch": 0.00642, "grad_norm": 1.0346602459121752, "learning_rate": 0.001926, "loss": 5.106, "step": 642 }, { "epoch": 0.00643, "grad_norm": 0.8661994645957561, "learning_rate": 0.0019290000000000002, "loss": 5.117, "step": 643 }, { "epoch": 0.00644, "grad_norm": 0.8724056100423225, "learning_rate": 0.0019320000000000001, "loss": 5.0889, "step": 644 }, { "epoch": 0.00645, "grad_norm": 0.8584186184200229, "learning_rate": 0.001935, "loss": 5.1004, "step": 645 }, { "epoch": 0.00646, "grad_norm": 0.7360558672224548, "learning_rate": 0.001938, "loss": 5.0955, "step": 646 }, { "epoch": 0.00647, "grad_norm": 0.7977702647925389, "learning_rate": 0.001941, "loss": 5.1058, "step": 647 }, { "epoch": 0.00648, "grad_norm": 0.7872116543506851, "learning_rate": 0.0019440000000000002, "loss": 5.0908, "step": 648 }, { "epoch": 0.00649, "grad_norm": 0.7104658813349117, "learning_rate": 0.0019470000000000002, "loss": 5.0718, "step": 649 }, { "epoch": 0.0065, "grad_norm": 0.7453763255239747, "learning_rate": 0.0019500000000000001, "loss": 5.0953, "step": 650 }, { "epoch": 0.00651, "grad_norm": 0.7781624388594444, "learning_rate": 0.001953, "loss": 5.0758, "step": 651 }, { "epoch": 0.00652, "grad_norm": 0.7616046275009601, "learning_rate": 0.0019560000000000003, "loss": 5.0661, "step": 652 }, { "epoch": 0.00653, "grad_norm": 0.5945469625366651, "learning_rate": 0.0019590000000000002, "loss": 5.0539, "step": 653 }, { "epoch": 0.00654, "grad_norm": 0.6024408595794577, "learning_rate": 0.001962, "loss": 5.0374, "step": 654 }, { "epoch": 0.00655, "grad_norm": 0.5905307565923603, "learning_rate": 0.001965, "loss": 5.048, "step": 655 }, { "epoch": 0.00656, "grad_norm": 0.5236322372626927, "learning_rate": 0.001968, "loss": 5.04, "step": 656 }, { "epoch": 0.00657, "grad_norm": 0.5283416618835216, "learning_rate": 0.001971, "loss": 5.0223, "step": 657 }, { "epoch": 0.00658, "grad_norm": 0.5563146586062104, "learning_rate": 0.001974, "loss": 5.0415, "step": 658 }, { "epoch": 0.00659, "grad_norm": 0.6297873363395704, "learning_rate": 0.001977, "loss": 5.0241, "step": 659 }, { "epoch": 0.0066, "grad_norm": 0.5780538180580159, "learning_rate": 0.00198, "loss": 5.0197, "step": 660 }, { "epoch": 0.00661, "grad_norm": 0.5505266007864265, "learning_rate": 0.001983, "loss": 4.9938, "step": 661 }, { "epoch": 0.00662, "grad_norm": 0.6077995116545319, "learning_rate": 0.0019860000000000004, "loss": 4.9946, "step": 662 }, { "epoch": 0.00663, "grad_norm": 0.693544741809416, "learning_rate": 0.0019890000000000003, "loss": 5.01, "step": 663 }, { "epoch": 0.00664, "grad_norm": 0.801492715102265, "learning_rate": 0.0019920000000000003, "loss": 5.0017, "step": 664 }, { "epoch": 0.00665, "grad_norm": 0.8803866066519176, "learning_rate": 0.0019950000000000002, "loss": 4.9845, "step": 665 }, { "epoch": 0.00666, "grad_norm": 0.9133314823416234, "learning_rate": 0.001998, "loss": 5.0048, "step": 666 }, { "epoch": 0.00667, "grad_norm": 0.9006055773427947, "learning_rate": 0.002001, "loss": 4.9967, "step": 667 }, { "epoch": 0.00668, "grad_norm": 0.8268536663020751, "learning_rate": 0.002004, "loss": 5.0164, "step": 668 }, { "epoch": 0.00669, "grad_norm": 1.1034218430158187, "learning_rate": 0.002007, "loss": 5.0358, "step": 669 }, { "epoch": 0.0067, "grad_norm": 1.0710112933622913, "learning_rate": 0.00201, "loss": 5.0286, "step": 670 }, { "epoch": 0.00671, "grad_norm": 1.0384170566197124, "learning_rate": 0.002013, "loss": 5.0085, "step": 671 }, { "epoch": 0.00672, "grad_norm": 0.9247071512184438, "learning_rate": 0.002016, "loss": 4.9973, "step": 672 }, { "epoch": 0.00673, "grad_norm": 0.9905174678816935, "learning_rate": 0.002019, "loss": 5.0151, "step": 673 }, { "epoch": 0.00674, "grad_norm": 0.8930692065183647, "learning_rate": 0.0020220000000000004, "loss": 5.0014, "step": 674 }, { "epoch": 0.00675, "grad_norm": 1.0067908289229996, "learning_rate": 0.0020250000000000003, "loss": 5.0182, "step": 675 }, { "epoch": 0.00676, "grad_norm": 0.972209366764115, "learning_rate": 0.0020280000000000003, "loss": 5.0082, "step": 676 }, { "epoch": 0.00677, "grad_norm": 1.0440090424594235, "learning_rate": 0.0020310000000000003, "loss": 5.0109, "step": 677 }, { "epoch": 0.00678, "grad_norm": 1.1292207401563255, "learning_rate": 0.0020340000000000002, "loss": 5.0278, "step": 678 }, { "epoch": 0.00679, "grad_norm": 0.9872089241398577, "learning_rate": 0.002037, "loss": 5.0054, "step": 679 }, { "epoch": 0.0068, "grad_norm": 0.8748535789650627, "learning_rate": 0.00204, "loss": 5.0004, "step": 680 }, { "epoch": 0.00681, "grad_norm": 1.0039141852392888, "learning_rate": 0.002043, "loss": 5.0018, "step": 681 }, { "epoch": 0.00682, "grad_norm": 1.0414085118011525, "learning_rate": 0.002046, "loss": 5.0026, "step": 682 }, { "epoch": 0.00683, "grad_norm": 0.8083819733286619, "learning_rate": 0.002049, "loss": 4.9746, "step": 683 }, { "epoch": 0.00684, "grad_norm": 0.618682103661302, "learning_rate": 0.002052, "loss": 4.9803, "step": 684 }, { "epoch": 0.00685, "grad_norm": 0.5775508276061283, "learning_rate": 0.0020550000000000004, "loss": 4.9792, "step": 685 }, { "epoch": 0.00686, "grad_norm": 0.5359292821422027, "learning_rate": 0.0020580000000000004, "loss": 4.9508, "step": 686 }, { "epoch": 0.00687, "grad_norm": 0.5439874489161504, "learning_rate": 0.0020610000000000003, "loss": 4.9456, "step": 687 }, { "epoch": 0.00688, "grad_norm": 0.6521058673701751, "learning_rate": 0.002064, "loss": 4.9472, "step": 688 }, { "epoch": 0.00689, "grad_norm": 0.7201992210148584, "learning_rate": 0.002067, "loss": 4.9514, "step": 689 }, { "epoch": 0.0069, "grad_norm": 0.6329359289093167, "learning_rate": 0.00207, "loss": 4.9393, "step": 690 }, { "epoch": 0.00691, "grad_norm": 0.5452341441548267, "learning_rate": 0.0020729999999999998, "loss": 4.9065, "step": 691 }, { "epoch": 0.00692, "grad_norm": 0.524421270821715, "learning_rate": 0.0020759999999999997, "loss": 4.9264, "step": 692 }, { "epoch": 0.00693, "grad_norm": 0.5237198583423548, "learning_rate": 0.0020789999999999997, "loss": 4.9275, "step": 693 }, { "epoch": 0.00694, "grad_norm": 0.5943284166970615, "learning_rate": 0.002082, "loss": 4.9065, "step": 694 }, { "epoch": 0.00695, "grad_norm": 0.6331066584123409, "learning_rate": 0.002085, "loss": 4.9096, "step": 695 }, { "epoch": 0.00696, "grad_norm": 0.6552141599381052, "learning_rate": 0.002088, "loss": 4.9103, "step": 696 }, { "epoch": 0.00697, "grad_norm": 0.7459258980378775, "learning_rate": 0.002091, "loss": 4.9188, "step": 697 }, { "epoch": 0.00698, "grad_norm": 0.902413176094811, "learning_rate": 0.002094, "loss": 4.9109, "step": 698 }, { "epoch": 0.00699, "grad_norm": 0.9488326760888935, "learning_rate": 0.002097, "loss": 4.9116, "step": 699 }, { "epoch": 0.007, "grad_norm": 0.9189763967394823, "learning_rate": 0.0021, "loss": 4.929, "step": 700 }, { "epoch": 0.00701, "grad_norm": 0.8924436046521577, "learning_rate": 0.002103, "loss": 4.9083, "step": 701 }, { "epoch": 0.00702, "grad_norm": 0.8863614629240012, "learning_rate": 0.002106, "loss": 4.9092, "step": 702 }, { "epoch": 0.00703, "grad_norm": 0.7940709541538681, "learning_rate": 0.0021089999999999998, "loss": 4.891, "step": 703 }, { "epoch": 0.00704, "grad_norm": 0.7939787736751149, "learning_rate": 0.0021119999999999997, "loss": 4.8964, "step": 704 }, { "epoch": 0.00705, "grad_norm": 1.0444656030359551, "learning_rate": 0.002115, "loss": 4.9151, "step": 705 }, { "epoch": 0.00706, "grad_norm": 1.1431311909042268, "learning_rate": 0.002118, "loss": 4.9234, "step": 706 }, { "epoch": 0.00707, "grad_norm": 0.8384635334186645, "learning_rate": 0.002121, "loss": 4.8902, "step": 707 }, { "epoch": 0.00708, "grad_norm": 0.9594405079672866, "learning_rate": 0.002124, "loss": 4.8745, "step": 708 }, { "epoch": 0.00709, "grad_norm": 0.8900382869322284, "learning_rate": 0.002127, "loss": 4.9161, "step": 709 }, { "epoch": 0.0071, "grad_norm": 0.8868570415283396, "learning_rate": 0.00213, "loss": 4.9065, "step": 710 }, { "epoch": 0.00711, "grad_norm": 0.8610490944817158, "learning_rate": 0.002133, "loss": 4.8804, "step": 711 }, { "epoch": 0.00712, "grad_norm": 0.8727164938852855, "learning_rate": 0.002136, "loss": 4.9046, "step": 712 }, { "epoch": 0.00713, "grad_norm": 0.8857525308493206, "learning_rate": 0.002139, "loss": 4.9135, "step": 713 }, { "epoch": 0.00714, "grad_norm": 0.9495661806955594, "learning_rate": 0.002142, "loss": 4.918, "step": 714 }, { "epoch": 0.00715, "grad_norm": 1.0263024097609161, "learning_rate": 0.0021449999999999998, "loss": 4.8857, "step": 715 }, { "epoch": 0.00716, "grad_norm": 0.8876358680026493, "learning_rate": 0.002148, "loss": 4.8749, "step": 716 }, { "epoch": 0.00717, "grad_norm": 0.8225498605776377, "learning_rate": 0.002151, "loss": 4.8925, "step": 717 }, { "epoch": 0.00718, "grad_norm": 0.628552485265691, "learning_rate": 0.002154, "loss": 4.8659, "step": 718 }, { "epoch": 0.00719, "grad_norm": 0.6584104654465238, "learning_rate": 0.002157, "loss": 4.8747, "step": 719 }, { "epoch": 0.0072, "grad_norm": 0.6698592474865601, "learning_rate": 0.00216, "loss": 4.8635, "step": 720 }, { "epoch": 0.00721, "grad_norm": 0.6673590176314685, "learning_rate": 0.002163, "loss": 4.8639, "step": 721 }, { "epoch": 0.00722, "grad_norm": 0.6674098225397388, "learning_rate": 0.002166, "loss": 4.8386, "step": 722 }, { "epoch": 0.00723, "grad_norm": 0.6090726175552883, "learning_rate": 0.002169, "loss": 4.8464, "step": 723 }, { "epoch": 0.00724, "grad_norm": 0.6325507361418539, "learning_rate": 0.002172, "loss": 4.8403, "step": 724 }, { "epoch": 0.00725, "grad_norm": 0.6927587431932604, "learning_rate": 0.002175, "loss": 4.8341, "step": 725 }, { "epoch": 0.00726, "grad_norm": 0.7422551683158218, "learning_rate": 0.002178, "loss": 4.8448, "step": 726 }, { "epoch": 0.00727, "grad_norm": 0.7946686392459241, "learning_rate": 0.0021809999999999998, "loss": 4.8264, "step": 727 }, { "epoch": 0.00728, "grad_norm": 0.651194780867581, "learning_rate": 0.002184, "loss": 4.8373, "step": 728 }, { "epoch": 0.00729, "grad_norm": 0.5507866158426874, "learning_rate": 0.002187, "loss": 4.8279, "step": 729 }, { "epoch": 0.0073, "grad_norm": 0.5770531279665235, "learning_rate": 0.00219, "loss": 4.8256, "step": 730 }, { "epoch": 0.00731, "grad_norm": 0.6604772562967653, "learning_rate": 0.002193, "loss": 4.8198, "step": 731 }, { "epoch": 0.00732, "grad_norm": 0.7902754963422924, "learning_rate": 0.002196, "loss": 4.832, "step": 732 }, { "epoch": 0.00733, "grad_norm": 0.8530754775219535, "learning_rate": 0.002199, "loss": 4.814, "step": 733 }, { "epoch": 0.00734, "grad_norm": 0.808835939559151, "learning_rate": 0.002202, "loss": 4.8365, "step": 734 }, { "epoch": 0.00735, "grad_norm": 0.7793455630355729, "learning_rate": 0.002205, "loss": 4.8484, "step": 735 }, { "epoch": 0.00736, "grad_norm": 0.8899384636665534, "learning_rate": 0.002208, "loss": 4.8322, "step": 736 }, { "epoch": 0.00737, "grad_norm": 1.0166382867407526, "learning_rate": 0.002211, "loss": 4.8145, "step": 737 }, { "epoch": 0.00738, "grad_norm": 0.9857126416807526, "learning_rate": 0.002214, "loss": 4.8213, "step": 738 }, { "epoch": 0.00739, "grad_norm": 0.8982016923721464, "learning_rate": 0.0022170000000000002, "loss": 4.809, "step": 739 }, { "epoch": 0.0074, "grad_norm": 0.8517808259905928, "learning_rate": 0.00222, "loss": 4.8138, "step": 740 }, { "epoch": 0.00741, "grad_norm": 0.6571636698582773, "learning_rate": 0.002223, "loss": 4.778, "step": 741 }, { "epoch": 0.00742, "grad_norm": 0.5983530041008951, "learning_rate": 0.002226, "loss": 4.8043, "step": 742 }, { "epoch": 0.00743, "grad_norm": 0.613767022264535, "learning_rate": 0.002229, "loss": 4.772, "step": 743 }, { "epoch": 0.00744, "grad_norm": 0.5746332772613801, "learning_rate": 0.002232, "loss": 4.7755, "step": 744 }, { "epoch": 0.00745, "grad_norm": 0.5491916195482714, "learning_rate": 0.002235, "loss": 4.7792, "step": 745 }, { "epoch": 0.00746, "grad_norm": 0.6224704559098753, "learning_rate": 0.002238, "loss": 4.7785, "step": 746 }, { "epoch": 0.00747, "grad_norm": 0.721423929849636, "learning_rate": 0.002241, "loss": 4.766, "step": 747 }, { "epoch": 0.00748, "grad_norm": 0.8211263973312402, "learning_rate": 0.002244, "loss": 4.7825, "step": 748 }, { "epoch": 0.00749, "grad_norm": 0.9630311220772746, "learning_rate": 0.002247, "loss": 4.7687, "step": 749 }, { "epoch": 0.0075, "grad_norm": 1.0671208539128567, "learning_rate": 0.0022500000000000003, "loss": 4.8201, "step": 750 }, { "epoch": 0.00751, "grad_norm": 1.0285377005887373, "learning_rate": 0.0022530000000000002, "loss": 4.8053, "step": 751 }, { "epoch": 0.00752, "grad_norm": 0.9937387135055332, "learning_rate": 0.002256, "loss": 4.7635, "step": 752 }, { "epoch": 0.00753, "grad_norm": 0.8939099354397223, "learning_rate": 0.002259, "loss": 4.8048, "step": 753 }, { "epoch": 0.00754, "grad_norm": 0.958657384547811, "learning_rate": 0.002262, "loss": 4.816, "step": 754 }, { "epoch": 0.00755, "grad_norm": 0.8579165829387244, "learning_rate": 0.002265, "loss": 4.7809, "step": 755 }, { "epoch": 0.00756, "grad_norm": 0.8147493051985796, "learning_rate": 0.002268, "loss": 4.7687, "step": 756 }, { "epoch": 0.00757, "grad_norm": 0.9899674342411924, "learning_rate": 0.002271, "loss": 4.7845, "step": 757 }, { "epoch": 0.00758, "grad_norm": 1.2327208130150207, "learning_rate": 0.002274, "loss": 4.7967, "step": 758 }, { "epoch": 0.00759, "grad_norm": 0.8760473410677282, "learning_rate": 0.002277, "loss": 4.7951, "step": 759 }, { "epoch": 0.0076, "grad_norm": 0.9553393765090987, "learning_rate": 0.00228, "loss": 4.7842, "step": 760 }, { "epoch": 0.00761, "grad_norm": 1.088855928225056, "learning_rate": 0.002283, "loss": 4.769, "step": 761 }, { "epoch": 0.00762, "grad_norm": 0.9818480683263884, "learning_rate": 0.0022860000000000003, "loss": 4.7512, "step": 762 }, { "epoch": 0.00763, "grad_norm": 0.9731540924632093, "learning_rate": 0.0022890000000000002, "loss": 4.7931, "step": 763 }, { "epoch": 0.00764, "grad_norm": 1.0508884929557651, "learning_rate": 0.002292, "loss": 4.8167, "step": 764 }, { "epoch": 0.00765, "grad_norm": 1.0020863769727308, "learning_rate": 0.002295, "loss": 4.7984, "step": 765 }, { "epoch": 0.00766, "grad_norm": 1.1527463652354557, "learning_rate": 0.002298, "loss": 4.8085, "step": 766 }, { "epoch": 0.00767, "grad_norm": 0.9657952239159258, "learning_rate": 0.002301, "loss": 4.7959, "step": 767 }, { "epoch": 0.00768, "grad_norm": 1.0234917976922082, "learning_rate": 0.002304, "loss": 4.8012, "step": 768 }, { "epoch": 0.00769, "grad_norm": 0.9850893067060651, "learning_rate": 0.002307, "loss": 4.8144, "step": 769 }, { "epoch": 0.0077, "grad_norm": 0.9062134932024389, "learning_rate": 0.00231, "loss": 4.7653, "step": 770 }, { "epoch": 0.00771, "grad_norm": 0.8476285286232204, "learning_rate": 0.002313, "loss": 4.7979, "step": 771 }, { "epoch": 0.00772, "grad_norm": 0.9122213123018311, "learning_rate": 0.002316, "loss": 4.7851, "step": 772 }, { "epoch": 0.00773, "grad_norm": 1.0718910624781612, "learning_rate": 0.0023190000000000003, "loss": 4.8052, "step": 773 }, { "epoch": 0.00774, "grad_norm": 0.7792131883523417, "learning_rate": 0.0023220000000000003, "loss": 4.7945, "step": 774 }, { "epoch": 0.00775, "grad_norm": 0.7995411986928386, "learning_rate": 0.0023250000000000002, "loss": 4.7914, "step": 775 }, { "epoch": 0.00776, "grad_norm": 0.7054590225014301, "learning_rate": 0.002328, "loss": 4.7883, "step": 776 }, { "epoch": 0.00777, "grad_norm": 0.6505869359405926, "learning_rate": 0.002331, "loss": 4.7585, "step": 777 }, { "epoch": 0.00778, "grad_norm": 0.6484695284206986, "learning_rate": 0.002334, "loss": 4.7652, "step": 778 }, { "epoch": 0.00779, "grad_norm": 0.6047799586124271, "learning_rate": 0.002337, "loss": 4.7239, "step": 779 }, { "epoch": 0.0078, "grad_norm": 0.5436502526586032, "learning_rate": 0.00234, "loss": 4.7364, "step": 780 }, { "epoch": 0.00781, "grad_norm": 0.5682167623371829, "learning_rate": 0.002343, "loss": 4.7355, "step": 781 }, { "epoch": 0.00782, "grad_norm": 0.628910610744215, "learning_rate": 0.002346, "loss": 4.7356, "step": 782 }, { "epoch": 0.00783, "grad_norm": 0.5845457532196663, "learning_rate": 0.002349, "loss": 4.7086, "step": 783 }, { "epoch": 0.00784, "grad_norm": 0.6345965495249546, "learning_rate": 0.002352, "loss": 4.723, "step": 784 }, { "epoch": 0.00785, "grad_norm": 0.4959017064429773, "learning_rate": 0.0023550000000000003, "loss": 4.7138, "step": 785 }, { "epoch": 0.00786, "grad_norm": 0.4358915945164792, "learning_rate": 0.0023580000000000003, "loss": 4.7131, "step": 786 }, { "epoch": 0.00787, "grad_norm": 0.4259891850743534, "learning_rate": 0.0023610000000000003, "loss": 4.7113, "step": 787 }, { "epoch": 0.00788, "grad_norm": 0.4529949912379573, "learning_rate": 0.002364, "loss": 4.7077, "step": 788 }, { "epoch": 0.00789, "grad_norm": 0.4944753699230628, "learning_rate": 0.002367, "loss": 4.7334, "step": 789 }, { "epoch": 0.0079, "grad_norm": 0.6295031827770177, "learning_rate": 0.00237, "loss": 4.7144, "step": 790 }, { "epoch": 0.00791, "grad_norm": 0.8474014146600959, "learning_rate": 0.002373, "loss": 4.6777, "step": 791 }, { "epoch": 0.00792, "grad_norm": 0.8040402155973354, "learning_rate": 0.002376, "loss": 4.7267, "step": 792 }, { "epoch": 0.00793, "grad_norm": 0.5568568781614048, "learning_rate": 0.002379, "loss": 4.7006, "step": 793 }, { "epoch": 0.00794, "grad_norm": 0.8220014797505664, "learning_rate": 0.002382, "loss": 4.7246, "step": 794 }, { "epoch": 0.00795, "grad_norm": 0.6551332917875898, "learning_rate": 0.002385, "loss": 4.6938, "step": 795 }, { "epoch": 0.00796, "grad_norm": 0.5666814801389223, "learning_rate": 0.0023880000000000004, "loss": 4.6826, "step": 796 }, { "epoch": 0.00797, "grad_norm": 0.654430341218369, "learning_rate": 0.0023910000000000003, "loss": 4.7248, "step": 797 }, { "epoch": 0.00798, "grad_norm": 0.5511512463730408, "learning_rate": 0.0023940000000000003, "loss": 4.6864, "step": 798 }, { "epoch": 0.00799, "grad_norm": 0.5084174359945534, "learning_rate": 0.0023970000000000003, "loss": 4.7161, "step": 799 }, { "epoch": 0.008, "grad_norm": 0.41968398472595075, "learning_rate": 0.0024000000000000002, "loss": 4.6652, "step": 800 }, { "epoch": 0.00801, "grad_norm": 0.4546309911468048, "learning_rate": 0.002403, "loss": 4.6967, "step": 801 }, { "epoch": 0.00802, "grad_norm": 0.430018994115786, "learning_rate": 0.002406, "loss": 4.6827, "step": 802 }, { "epoch": 0.00803, "grad_norm": 0.44614830716967085, "learning_rate": 0.002409, "loss": 4.6617, "step": 803 }, { "epoch": 0.00804, "grad_norm": 0.44722400844593674, "learning_rate": 0.002412, "loss": 4.6598, "step": 804 }, { "epoch": 0.00805, "grad_norm": 0.5179127215582825, "learning_rate": 0.002415, "loss": 4.6599, "step": 805 }, { "epoch": 0.00806, "grad_norm": 0.5610832008078775, "learning_rate": 0.002418, "loss": 4.677, "step": 806 }, { "epoch": 0.00807, "grad_norm": 0.5167453223410896, "learning_rate": 0.0024210000000000004, "loss": 4.6671, "step": 807 }, { "epoch": 0.00808, "grad_norm": 0.46468933196331563, "learning_rate": 0.0024240000000000004, "loss": 4.6511, "step": 808 }, { "epoch": 0.00809, "grad_norm": 0.5221883532574668, "learning_rate": 0.0024270000000000003, "loss": 4.6468, "step": 809 }, { "epoch": 0.0081, "grad_norm": 0.4992566900849729, "learning_rate": 0.0024300000000000003, "loss": 4.6744, "step": 810 }, { "epoch": 0.00811, "grad_norm": 0.4854147467055134, "learning_rate": 0.0024330000000000003, "loss": 4.646, "step": 811 }, { "epoch": 0.00812, "grad_norm": 0.650970729431075, "learning_rate": 0.0024360000000000002, "loss": 4.6307, "step": 812 }, { "epoch": 0.00813, "grad_norm": 0.8160691589494683, "learning_rate": 0.0024389999999999998, "loss": 4.6711, "step": 813 }, { "epoch": 0.00814, "grad_norm": 0.9918101747931352, "learning_rate": 0.0024419999999999997, "loss": 4.6946, "step": 814 }, { "epoch": 0.00815, "grad_norm": 1.247963175893729, "learning_rate": 0.0024449999999999997, "loss": 4.7226, "step": 815 }, { "epoch": 0.00816, "grad_norm": 0.8376200515557375, "learning_rate": 0.002448, "loss": 4.6777, "step": 816 }, { "epoch": 0.00817, "grad_norm": 0.9161032619759178, "learning_rate": 0.002451, "loss": 4.6939, "step": 817 }, { "epoch": 0.00818, "grad_norm": 1.0914649908014256, "learning_rate": 0.002454, "loss": 4.6886, "step": 818 }, { "epoch": 0.00819, "grad_norm": 0.9806171410774952, "learning_rate": 0.002457, "loss": 4.712, "step": 819 }, { "epoch": 0.0082, "grad_norm": 0.992236077471004, "learning_rate": 0.00246, "loss": 4.6918, "step": 820 }, { "epoch": 0.00821, "grad_norm": 1.0594557870263281, "learning_rate": 0.002463, "loss": 4.6759, "step": 821 }, { "epoch": 0.00822, "grad_norm": 1.0346800919438124, "learning_rate": 0.002466, "loss": 4.6853, "step": 822 }, { "epoch": 0.00823, "grad_norm": 0.9573573191186882, "learning_rate": 0.002469, "loss": 4.6833, "step": 823 }, { "epoch": 0.00824, "grad_norm": 1.1123514933123841, "learning_rate": 0.002472, "loss": 4.714, "step": 824 }, { "epoch": 0.00825, "grad_norm": 0.8463845700248506, "learning_rate": 0.0024749999999999998, "loss": 4.7191, "step": 825 }, { "epoch": 0.00826, "grad_norm": 0.8444785606085857, "learning_rate": 0.0024779999999999997, "loss": 4.672, "step": 826 }, { "epoch": 0.00827, "grad_norm": 0.9726341870117121, "learning_rate": 0.002481, "loss": 4.7078, "step": 827 }, { "epoch": 0.00828, "grad_norm": 0.9106448417621353, "learning_rate": 0.002484, "loss": 4.7003, "step": 828 }, { "epoch": 0.00829, "grad_norm": 0.7565680418878746, "learning_rate": 0.002487, "loss": 4.6856, "step": 829 }, { "epoch": 0.0083, "grad_norm": 0.8537774465977133, "learning_rate": 0.00249, "loss": 4.7017, "step": 830 }, { "epoch": 0.00831, "grad_norm": 0.9023323948099834, "learning_rate": 0.002493, "loss": 4.6871, "step": 831 }, { "epoch": 0.00832, "grad_norm": 0.8524529451127855, "learning_rate": 0.002496, "loss": 4.6815, "step": 832 }, { "epoch": 0.00833, "grad_norm": 0.9428655185832147, "learning_rate": 0.002499, "loss": 4.6808, "step": 833 }, { "epoch": 0.00834, "grad_norm": 0.9597220185428569, "learning_rate": 0.002502, "loss": 4.6879, "step": 834 }, { "epoch": 0.00835, "grad_norm": 0.7735101632354252, "learning_rate": 0.002505, "loss": 4.6724, "step": 835 }, { "epoch": 0.00836, "grad_norm": 0.9597202731139803, "learning_rate": 0.002508, "loss": 4.7061, "step": 836 }, { "epoch": 0.00837, "grad_norm": 0.9520863539431935, "learning_rate": 0.0025109999999999998, "loss": 4.6636, "step": 837 }, { "epoch": 0.00838, "grad_norm": 0.7800128524395746, "learning_rate": 0.0025139999999999997, "loss": 4.6721, "step": 838 }, { "epoch": 0.00839, "grad_norm": 0.8122589832425033, "learning_rate": 0.002517, "loss": 4.675, "step": 839 }, { "epoch": 0.0084, "grad_norm": 0.8183344402395425, "learning_rate": 0.00252, "loss": 4.6669, "step": 840 }, { "epoch": 0.00841, "grad_norm": 0.6918735110390536, "learning_rate": 0.002523, "loss": 4.6489, "step": 841 }, { "epoch": 0.00842, "grad_norm": 0.6201385747244391, "learning_rate": 0.002526, "loss": 4.6423, "step": 842 }, { "epoch": 0.00843, "grad_norm": 0.606127970479136, "learning_rate": 0.002529, "loss": 4.6465, "step": 843 }, { "epoch": 0.00844, "grad_norm": 0.5515773209874846, "learning_rate": 0.002532, "loss": 4.6607, "step": 844 }, { "epoch": 0.00845, "grad_norm": 0.6203742299859808, "learning_rate": 0.002535, "loss": 4.6293, "step": 845 }, { "epoch": 0.00846, "grad_norm": 0.5875832865020281, "learning_rate": 0.002538, "loss": 4.6474, "step": 846 }, { "epoch": 0.00847, "grad_norm": 0.5703256353430879, "learning_rate": 0.002541, "loss": 4.6282, "step": 847 }, { "epoch": 0.00848, "grad_norm": 0.602830367643936, "learning_rate": 0.002544, "loss": 4.6269, "step": 848 }, { "epoch": 0.00849, "grad_norm": 0.6741507039909044, "learning_rate": 0.002547, "loss": 4.6233, "step": 849 }, { "epoch": 0.0085, "grad_norm": 0.6288739006540759, "learning_rate": 0.00255, "loss": 4.6341, "step": 850 }, { "epoch": 0.00851, "grad_norm": 0.5820099008678455, "learning_rate": 0.002553, "loss": 4.644, "step": 851 }, { "epoch": 0.00852, "grad_norm": 0.586123912558797, "learning_rate": 0.002556, "loss": 4.6367, "step": 852 }, { "epoch": 0.00853, "grad_norm": 0.5127813487098001, "learning_rate": 0.002559, "loss": 4.6085, "step": 853 }, { "epoch": 0.00854, "grad_norm": 0.4730499644759358, "learning_rate": 0.002562, "loss": 4.6029, "step": 854 }, { "epoch": 0.00855, "grad_norm": 0.44708869980986227, "learning_rate": 0.002565, "loss": 4.5799, "step": 855 }, { "epoch": 0.00856, "grad_norm": 0.466044480858233, "learning_rate": 0.002568, "loss": 4.6142, "step": 856 }, { "epoch": 0.00857, "grad_norm": 0.5382201915945353, "learning_rate": 0.002571, "loss": 4.6036, "step": 857 }, { "epoch": 0.00858, "grad_norm": 0.6780662034295477, "learning_rate": 0.002574, "loss": 4.609, "step": 858 }, { "epoch": 0.00859, "grad_norm": 0.9086610382483981, "learning_rate": 0.002577, "loss": 4.6039, "step": 859 }, { "epoch": 0.0086, "grad_norm": 0.8563688949272525, "learning_rate": 0.00258, "loss": 4.6531, "step": 860 }, { "epoch": 0.00861, "grad_norm": 0.5965670098126366, "learning_rate": 0.0025830000000000002, "loss": 4.6461, "step": 861 }, { "epoch": 0.00862, "grad_norm": 0.7975052365958228, "learning_rate": 0.002586, "loss": 4.611, "step": 862 }, { "epoch": 0.00863, "grad_norm": 0.650099032572018, "learning_rate": 0.002589, "loss": 4.6284, "step": 863 }, { "epoch": 0.00864, "grad_norm": 0.6277114763068243, "learning_rate": 0.002592, "loss": 4.5809, "step": 864 }, { "epoch": 0.00865, "grad_norm": 0.7499269309750987, "learning_rate": 0.002595, "loss": 4.6024, "step": 865 }, { "epoch": 0.00866, "grad_norm": 0.7085973518403954, "learning_rate": 0.002598, "loss": 4.6233, "step": 866 }, { "epoch": 0.00867, "grad_norm": 0.605538925445329, "learning_rate": 0.002601, "loss": 4.6101, "step": 867 }, { "epoch": 0.00868, "grad_norm": 0.48200776305054654, "learning_rate": 0.002604, "loss": 4.586, "step": 868 }, { "epoch": 0.00869, "grad_norm": 0.5266950965425763, "learning_rate": 0.002607, "loss": 4.5966, "step": 869 }, { "epoch": 0.0087, "grad_norm": 0.48953699231607295, "learning_rate": 0.00261, "loss": 4.5832, "step": 870 }, { "epoch": 0.00871, "grad_norm": 0.5478274928438833, "learning_rate": 0.002613, "loss": 4.5992, "step": 871 }, { "epoch": 0.00872, "grad_norm": 0.6263670684952429, "learning_rate": 0.002616, "loss": 4.5864, "step": 872 }, { "epoch": 0.00873, "grad_norm": 0.6672951132542, "learning_rate": 0.0026190000000000002, "loss": 4.5977, "step": 873 }, { "epoch": 0.00874, "grad_norm": 0.677096167715366, "learning_rate": 0.002622, "loss": 4.617, "step": 874 }, { "epoch": 0.00875, "grad_norm": 0.6959913524482387, "learning_rate": 0.002625, "loss": 4.5696, "step": 875 }, { "epoch": 0.00876, "grad_norm": 0.6682762743495083, "learning_rate": 0.002628, "loss": 4.5958, "step": 876 }, { "epoch": 0.00877, "grad_norm": 0.6375266502117092, "learning_rate": 0.002631, "loss": 4.612, "step": 877 }, { "epoch": 0.00878, "grad_norm": 0.7079418482290942, "learning_rate": 0.002634, "loss": 4.5486, "step": 878 }, { "epoch": 0.00879, "grad_norm": 0.6282689223941402, "learning_rate": 0.002637, "loss": 4.578, "step": 879 }, { "epoch": 0.0088, "grad_norm": 0.5460943252882049, "learning_rate": 0.00264, "loss": 4.5852, "step": 880 }, { "epoch": 0.00881, "grad_norm": 0.5723972494402886, "learning_rate": 0.002643, "loss": 4.5869, "step": 881 }, { "epoch": 0.00882, "grad_norm": 0.647818443655113, "learning_rate": 0.002646, "loss": 4.588, "step": 882 }, { "epoch": 0.00883, "grad_norm": 0.8827068805337381, "learning_rate": 0.002649, "loss": 4.5935, "step": 883 }, { "epoch": 0.00884, "grad_norm": 1.3000201706023533, "learning_rate": 0.0026520000000000003, "loss": 4.6052, "step": 884 }, { "epoch": 0.00885, "grad_norm": 0.7527768384442359, "learning_rate": 0.0026550000000000002, "loss": 4.5797, "step": 885 }, { "epoch": 0.00886, "grad_norm": 0.7313595200920677, "learning_rate": 0.002658, "loss": 4.6019, "step": 886 }, { "epoch": 0.00887, "grad_norm": 0.553327654847044, "learning_rate": 0.002661, "loss": 4.5828, "step": 887 }, { "epoch": 0.00888, "grad_norm": 0.6064219625843388, "learning_rate": 0.002664, "loss": 4.5894, "step": 888 }, { "epoch": 0.00889, "grad_norm": 0.6392357596846293, "learning_rate": 0.002667, "loss": 4.5422, "step": 889 }, { "epoch": 0.0089, "grad_norm": 0.6860123914477424, "learning_rate": 0.00267, "loss": 4.5989, "step": 890 }, { "epoch": 0.00891, "grad_norm": 0.7088960904364014, "learning_rate": 0.002673, "loss": 4.5822, "step": 891 }, { "epoch": 0.00892, "grad_norm": 0.7157207147763361, "learning_rate": 0.002676, "loss": 4.5934, "step": 892 }, { "epoch": 0.00893, "grad_norm": 0.7412527752908875, "learning_rate": 0.002679, "loss": 4.5709, "step": 893 }, { "epoch": 0.00894, "grad_norm": 0.8084836835989728, "learning_rate": 0.002682, "loss": 4.5639, "step": 894 }, { "epoch": 0.00895, "grad_norm": 0.9923307111818513, "learning_rate": 0.0026850000000000003, "loss": 4.5864, "step": 895 }, { "epoch": 0.00896, "grad_norm": 1.2171682577354312, "learning_rate": 0.0026880000000000003, "loss": 4.6057, "step": 896 }, { "epoch": 0.00897, "grad_norm": 0.797478427208377, "learning_rate": 0.0026910000000000002, "loss": 4.5989, "step": 897 }, { "epoch": 0.00898, "grad_norm": 0.7928728804117916, "learning_rate": 0.002694, "loss": 4.594, "step": 898 }, { "epoch": 0.00899, "grad_norm": 0.8357403035452178, "learning_rate": 0.002697, "loss": 4.5983, "step": 899 }, { "epoch": 0.009, "grad_norm": 0.8448290091163538, "learning_rate": 0.0027, "loss": 4.6292, "step": 900 }, { "epoch": 0.00901, "grad_norm": 0.9488092229670547, "learning_rate": 0.002703, "loss": 4.5868, "step": 901 }, { "epoch": 0.00902, "grad_norm": 0.9434404658743749, "learning_rate": 0.002706, "loss": 4.5999, "step": 902 }, { "epoch": 0.00903, "grad_norm": 1.0122099567822476, "learning_rate": 0.002709, "loss": 4.6102, "step": 903 }, { "epoch": 0.00904, "grad_norm": 0.9358691681287052, "learning_rate": 0.002712, "loss": 4.5848, "step": 904 }, { "epoch": 0.00905, "grad_norm": 0.8321510442485943, "learning_rate": 0.002715, "loss": 4.5984, "step": 905 }, { "epoch": 0.00906, "grad_norm": 0.8914473393947665, "learning_rate": 0.002718, "loss": 4.6112, "step": 906 }, { "epoch": 0.00907, "grad_norm": 0.9883982303638487, "learning_rate": 0.0027210000000000003, "loss": 4.6386, "step": 907 }, { "epoch": 0.00908, "grad_norm": 0.86073203349026, "learning_rate": 0.0027240000000000003, "loss": 4.6116, "step": 908 }, { "epoch": 0.00909, "grad_norm": 0.7773747412069614, "learning_rate": 0.0027270000000000003, "loss": 4.6163, "step": 909 }, { "epoch": 0.0091, "grad_norm": 0.7370585718531062, "learning_rate": 0.0027300000000000002, "loss": 4.6234, "step": 910 }, { "epoch": 0.00911, "grad_norm": 0.6906269071273593, "learning_rate": 0.002733, "loss": 4.5785, "step": 911 }, { "epoch": 0.00912, "grad_norm": 0.6578032292778252, "learning_rate": 0.002736, "loss": 4.5778, "step": 912 }, { "epoch": 0.00913, "grad_norm": 0.6528626059582382, "learning_rate": 0.002739, "loss": 4.5704, "step": 913 }, { "epoch": 0.00914, "grad_norm": 0.599731896856576, "learning_rate": 0.002742, "loss": 4.595, "step": 914 }, { "epoch": 0.00915, "grad_norm": 0.5922054086035364, "learning_rate": 0.002745, "loss": 4.5555, "step": 915 }, { "epoch": 0.00916, "grad_norm": 0.528646140228931, "learning_rate": 0.002748, "loss": 4.5304, "step": 916 }, { "epoch": 0.00917, "grad_norm": 0.5305158198561161, "learning_rate": 0.002751, "loss": 4.5419, "step": 917 }, { "epoch": 0.00918, "grad_norm": 0.4736382884122071, "learning_rate": 0.0027540000000000004, "loss": 4.5569, "step": 918 }, { "epoch": 0.00919, "grad_norm": 0.45838817911808083, "learning_rate": 0.0027570000000000003, "loss": 4.5357, "step": 919 }, { "epoch": 0.0092, "grad_norm": 0.4361472184478695, "learning_rate": 0.0027600000000000003, "loss": 4.5493, "step": 920 }, { "epoch": 0.00921, "grad_norm": 0.43931126757575867, "learning_rate": 0.0027630000000000003, "loss": 4.5336, "step": 921 }, { "epoch": 0.00922, "grad_norm": 0.46920285200840567, "learning_rate": 0.0027660000000000002, "loss": 4.5412, "step": 922 }, { "epoch": 0.00923, "grad_norm": 0.5996209745093354, "learning_rate": 0.002769, "loss": 4.5218, "step": 923 }, { "epoch": 0.00924, "grad_norm": 0.702528530852281, "learning_rate": 0.002772, "loss": 4.5564, "step": 924 }, { "epoch": 0.00925, "grad_norm": 0.6651779892024297, "learning_rate": 0.002775, "loss": 4.5288, "step": 925 }, { "epoch": 0.00926, "grad_norm": 0.5723855300218565, "learning_rate": 0.002778, "loss": 4.5426, "step": 926 }, { "epoch": 0.00927, "grad_norm": 0.6080148945846373, "learning_rate": 0.002781, "loss": 4.5043, "step": 927 }, { "epoch": 0.00928, "grad_norm": 0.6957035289314476, "learning_rate": 0.002784, "loss": 4.5502, "step": 928 }, { "epoch": 0.00929, "grad_norm": 0.7366634335117053, "learning_rate": 0.0027870000000000004, "loss": 4.5403, "step": 929 }, { "epoch": 0.0093, "grad_norm": 0.7796102692352119, "learning_rate": 0.0027900000000000004, "loss": 4.5265, "step": 930 }, { "epoch": 0.00931, "grad_norm": 0.7116909602884058, "learning_rate": 0.0027930000000000003, "loss": 4.5333, "step": 931 }, { "epoch": 0.00932, "grad_norm": 0.8319854024158545, "learning_rate": 0.0027960000000000003, "loss": 4.5481, "step": 932 }, { "epoch": 0.00933, "grad_norm": 0.8001418279766108, "learning_rate": 0.0027990000000000003, "loss": 4.5286, "step": 933 }, { "epoch": 0.00934, "grad_norm": 0.6519619232143173, "learning_rate": 0.0028020000000000002, "loss": 4.5422, "step": 934 }, { "epoch": 0.00935, "grad_norm": 0.7580737482550882, "learning_rate": 0.002805, "loss": 4.5559, "step": 935 }, { "epoch": 0.00936, "grad_norm": 0.8499038627491867, "learning_rate": 0.002808, "loss": 4.5679, "step": 936 }, { "epoch": 0.00937, "grad_norm": 0.7597302495348821, "learning_rate": 0.002811, "loss": 4.5708, "step": 937 }, { "epoch": 0.00938, "grad_norm": 0.9878821641788273, "learning_rate": 0.002814, "loss": 4.5609, "step": 938 }, { "epoch": 0.00939, "grad_norm": 0.9691729918808772, "learning_rate": 0.002817, "loss": 4.563, "step": 939 }, { "epoch": 0.0094, "grad_norm": 0.8937843559478598, "learning_rate": 0.00282, "loss": 4.56, "step": 940 }, { "epoch": 0.00941, "grad_norm": 0.9477839045288606, "learning_rate": 0.002823, "loss": 4.5409, "step": 941 }, { "epoch": 0.00942, "grad_norm": 1.088560613057821, "learning_rate": 0.002826, "loss": 4.5819, "step": 942 }, { "epoch": 0.00943, "grad_norm": 0.8020128186220904, "learning_rate": 0.002829, "loss": 4.556, "step": 943 }, { "epoch": 0.00944, "grad_norm": 0.7970499406732843, "learning_rate": 0.002832, "loss": 4.5652, "step": 944 }, { "epoch": 0.00945, "grad_norm": 0.760430287307007, "learning_rate": 0.002835, "loss": 4.567, "step": 945 }, { "epoch": 0.00946, "grad_norm": 0.8410168172764453, "learning_rate": 0.002838, "loss": 4.5808, "step": 946 }, { "epoch": 0.00947, "grad_norm": 0.8502364092306604, "learning_rate": 0.0028409999999999998, "loss": 4.5581, "step": 947 }, { "epoch": 0.00948, "grad_norm": 0.7534324730199542, "learning_rate": 0.0028439999999999997, "loss": 4.533, "step": 948 }, { "epoch": 0.00949, "grad_norm": 0.8075715283027973, "learning_rate": 0.002847, "loss": 4.5789, "step": 949 }, { "epoch": 0.0095, "grad_norm": 0.8790685187514339, "learning_rate": 0.00285, "loss": 4.5764, "step": 950 }, { "epoch": 0.00951, "grad_norm": 0.8527621336415785, "learning_rate": 0.002853, "loss": 4.552, "step": 951 }, { "epoch": 0.00952, "grad_norm": 0.793648162843131, "learning_rate": 0.002856, "loss": 4.5771, "step": 952 }, { "epoch": 0.00953, "grad_norm": 0.7100823051409002, "learning_rate": 0.002859, "loss": 4.5151, "step": 953 }, { "epoch": 0.00954, "grad_norm": 0.776086010454581, "learning_rate": 0.002862, "loss": 4.5748, "step": 954 }, { "epoch": 0.00955, "grad_norm": 0.7357834745016256, "learning_rate": 0.002865, "loss": 4.5651, "step": 955 }, { "epoch": 0.00956, "grad_norm": 0.6871788604084053, "learning_rate": 0.002868, "loss": 4.5378, "step": 956 }, { "epoch": 0.00957, "grad_norm": 0.6293704920093642, "learning_rate": 0.002871, "loss": 4.5585, "step": 957 }, { "epoch": 0.00958, "grad_norm": 0.6933721545151298, "learning_rate": 0.002874, "loss": 4.5402, "step": 958 }, { "epoch": 0.00959, "grad_norm": 0.6216290945191316, "learning_rate": 0.002877, "loss": 4.5294, "step": 959 }, { "epoch": 0.0096, "grad_norm": 0.44090482568449035, "learning_rate": 0.0028799999999999997, "loss": 4.5205, "step": 960 }, { "epoch": 0.00961, "grad_norm": 0.5026549244936088, "learning_rate": 0.002883, "loss": 4.4973, "step": 961 }, { "epoch": 0.00962, "grad_norm": 0.46550744372429714, "learning_rate": 0.002886, "loss": 4.5199, "step": 962 }, { "epoch": 0.00963, "grad_norm": 0.4817462883709995, "learning_rate": 0.002889, "loss": 4.5204, "step": 963 }, { "epoch": 0.00964, "grad_norm": 0.5021989893794454, "learning_rate": 0.002892, "loss": 4.5105, "step": 964 }, { "epoch": 0.00965, "grad_norm": 0.6331237702649058, "learning_rate": 0.002895, "loss": 4.4888, "step": 965 }, { "epoch": 0.00966, "grad_norm": 0.7186463225121739, "learning_rate": 0.002898, "loss": 4.5122, "step": 966 }, { "epoch": 0.00967, "grad_norm": 0.835541291398658, "learning_rate": 0.002901, "loss": 4.5497, "step": 967 }, { "epoch": 0.00968, "grad_norm": 0.7770950591910699, "learning_rate": 0.002904, "loss": 4.5246, "step": 968 }, { "epoch": 0.00969, "grad_norm": 0.6421972738290654, "learning_rate": 0.002907, "loss": 4.5465, "step": 969 }, { "epoch": 0.0097, "grad_norm": 0.6170493579190435, "learning_rate": 0.00291, "loss": 4.5014, "step": 970 }, { "epoch": 0.00971, "grad_norm": 0.671893763395282, "learning_rate": 0.002913, "loss": 4.5134, "step": 971 }, { "epoch": 0.00972, "grad_norm": 0.5384349268117217, "learning_rate": 0.002916, "loss": 4.51, "step": 972 }, { "epoch": 0.00973, "grad_norm": 0.6570052261370841, "learning_rate": 0.002919, "loss": 4.5075, "step": 973 }, { "epoch": 0.00974, "grad_norm": 0.6469437996214488, "learning_rate": 0.002922, "loss": 4.5042, "step": 974 }, { "epoch": 0.00975, "grad_norm": 0.5139434995269291, "learning_rate": 0.002925, "loss": 4.5141, "step": 975 }, { "epoch": 0.00976, "grad_norm": 0.540350404123188, "learning_rate": 0.002928, "loss": 4.4984, "step": 976 }, { "epoch": 0.00977, "grad_norm": 0.5640158884340003, "learning_rate": 0.002931, "loss": 4.5359, "step": 977 }, { "epoch": 0.00978, "grad_norm": 0.5471232322596488, "learning_rate": 0.002934, "loss": 4.5069, "step": 978 }, { "epoch": 0.00979, "grad_norm": 0.6391692840311302, "learning_rate": 0.002937, "loss": 4.4972, "step": 979 }, { "epoch": 0.0098, "grad_norm": 0.7065424241899814, "learning_rate": 0.00294, "loss": 4.4963, "step": 980 }, { "epoch": 0.00981, "grad_norm": 0.6903724510426201, "learning_rate": 0.002943, "loss": 4.5078, "step": 981 }, { "epoch": 0.00982, "grad_norm": 0.653302049548968, "learning_rate": 0.002946, "loss": 4.4991, "step": 982 }, { "epoch": 0.00983, "grad_norm": 0.7786836590229197, "learning_rate": 0.0029490000000000002, "loss": 4.5061, "step": 983 }, { "epoch": 0.00984, "grad_norm": 0.8944946184941699, "learning_rate": 0.002952, "loss": 4.5043, "step": 984 }, { "epoch": 0.00985, "grad_norm": 0.9434362337974719, "learning_rate": 0.002955, "loss": 4.5335, "step": 985 }, { "epoch": 0.00986, "grad_norm": 0.9312010498282314, "learning_rate": 0.002958, "loss": 4.4908, "step": 986 }, { "epoch": 0.00987, "grad_norm": 0.7600059379317153, "learning_rate": 0.002961, "loss": 4.5038, "step": 987 }, { "epoch": 0.00988, "grad_norm": 0.6949983731085813, "learning_rate": 0.002964, "loss": 4.533, "step": 988 }, { "epoch": 0.00989, "grad_norm": 0.6958207764457225, "learning_rate": 0.002967, "loss": 4.4744, "step": 989 }, { "epoch": 0.0099, "grad_norm": 0.8314159874496235, "learning_rate": 0.00297, "loss": 4.5218, "step": 990 }, { "epoch": 0.00991, "grad_norm": 0.90109982550625, "learning_rate": 0.002973, "loss": 4.5224, "step": 991 }, { "epoch": 0.00992, "grad_norm": 0.951208720196979, "learning_rate": 0.002976, "loss": 4.5281, "step": 992 }, { "epoch": 0.00993, "grad_norm": 1.034654706401041, "learning_rate": 0.002979, "loss": 4.539, "step": 993 }, { "epoch": 0.00994, "grad_norm": 1.0078120294806783, "learning_rate": 0.002982, "loss": 4.5459, "step": 994 }, { "epoch": 0.00995, "grad_norm": 0.9905056664043064, "learning_rate": 0.0029850000000000002, "loss": 4.5461, "step": 995 }, { "epoch": 0.00996, "grad_norm": 1.1603429750232241, "learning_rate": 0.002988, "loss": 4.5605, "step": 996 }, { "epoch": 0.00997, "grad_norm": 0.8595677567729485, "learning_rate": 0.002991, "loss": 4.5366, "step": 997 }, { "epoch": 0.00998, "grad_norm": 0.9672785170741734, "learning_rate": 0.002994, "loss": 4.5467, "step": 998 }, { "epoch": 0.00999, "grad_norm": 0.8855476122040005, "learning_rate": 0.002997, "loss": 4.5306, "step": 999 }, { "epoch": 0.01, "grad_norm": 0.7479993027173912, "learning_rate": 0.003, "loss": 4.5436, "step": 1000 }, { "epoch": 0.01001, "grad_norm": 0.7121555496393158, "learning_rate": 0.003, "loss": 4.5401, "step": 1001 }, { "epoch": 0.01002, "grad_norm": 0.72505396659877, "learning_rate": 0.003, "loss": 4.5099, "step": 1002 }, { "epoch": 0.01003, "grad_norm": 0.7482409624375888, "learning_rate": 0.003, "loss": 4.5346, "step": 1003 }, { "epoch": 0.01004, "grad_norm": 0.5922239695560899, "learning_rate": 0.003, "loss": 4.513, "step": 1004 }, { "epoch": 0.01005, "grad_norm": 0.7163386651042714, "learning_rate": 0.003, "loss": 4.4897, "step": 1005 }, { "epoch": 0.01006, "grad_norm": 0.7664341729228217, "learning_rate": 0.003, "loss": 4.4943, "step": 1006 }, { "epoch": 0.01007, "grad_norm": 0.6325498197242884, "learning_rate": 0.003, "loss": 4.5316, "step": 1007 }, { "epoch": 0.01008, "grad_norm": 0.5334181570982156, "learning_rate": 0.003, "loss": 4.5325, "step": 1008 }, { "epoch": 0.01009, "grad_norm": 0.4756687788935663, "learning_rate": 0.003, "loss": 4.4998, "step": 1009 }, { "epoch": 0.0101, "grad_norm": 0.44081077367096977, "learning_rate": 0.003, "loss": 4.5025, "step": 1010 }, { "epoch": 0.01011, "grad_norm": 0.4706704982402646, "learning_rate": 0.003, "loss": 4.5082, "step": 1011 }, { "epoch": 0.01012, "grad_norm": 0.47223639219621605, "learning_rate": 0.003, "loss": 4.492, "step": 1012 }, { "epoch": 0.01013, "grad_norm": 0.4611333961201937, "learning_rate": 0.003, "loss": 4.5135, "step": 1013 }, { "epoch": 0.01014, "grad_norm": 0.36636268932008986, "learning_rate": 0.003, "loss": 4.4528, "step": 1014 }, { "epoch": 0.01015, "grad_norm": 0.4484711002206511, "learning_rate": 0.003, "loss": 4.4948, "step": 1015 }, { "epoch": 0.01016, "grad_norm": 0.553209882568818, "learning_rate": 0.003, "loss": 4.5082, "step": 1016 }, { "epoch": 0.01017, "grad_norm": 0.8898002239967567, "learning_rate": 0.003, "loss": 4.4579, "step": 1017 }, { "epoch": 0.01018, "grad_norm": 0.9938726896514534, "learning_rate": 0.003, "loss": 4.5557, "step": 1018 }, { "epoch": 0.01019, "grad_norm": 0.7008138530645361, "learning_rate": 0.003, "loss": 4.5003, "step": 1019 }, { "epoch": 0.0102, "grad_norm": 1.006685411589756, "learning_rate": 0.003, "loss": 4.5244, "step": 1020 }, { "epoch": 0.01021, "grad_norm": 0.6295405897908188, "learning_rate": 0.003, "loss": 4.4915, "step": 1021 }, { "epoch": 0.01022, "grad_norm": 0.6613412501940705, "learning_rate": 0.003, "loss": 4.5063, "step": 1022 }, { "epoch": 0.01023, "grad_norm": 0.6284055579476905, "learning_rate": 0.003, "loss": 4.4914, "step": 1023 }, { "epoch": 0.01024, "grad_norm": 0.6735486877543648, "learning_rate": 0.003, "loss": 4.4573, "step": 1024 }, { "epoch": 0.01025, "grad_norm": 0.6253815453908939, "learning_rate": 0.003, "loss": 4.4832, "step": 1025 }, { "epoch": 0.01026, "grad_norm": 0.5932239048767913, "learning_rate": 0.003, "loss": 4.4735, "step": 1026 }, { "epoch": 0.01027, "grad_norm": 0.6318859057234201, "learning_rate": 0.003, "loss": 4.4952, "step": 1027 }, { "epoch": 0.01028, "grad_norm": 0.6588163663127304, "learning_rate": 0.003, "loss": 4.4763, "step": 1028 }, { "epoch": 0.01029, "grad_norm": 0.55721560335989, "learning_rate": 0.003, "loss": 4.4524, "step": 1029 }, { "epoch": 0.0103, "grad_norm": 0.5468199872259095, "learning_rate": 0.003, "loss": 4.4809, "step": 1030 }, { "epoch": 0.01031, "grad_norm": 0.5601179932704955, "learning_rate": 0.003, "loss": 4.4934, "step": 1031 }, { "epoch": 0.01032, "grad_norm": 0.5514443283118416, "learning_rate": 0.003, "loss": 4.464, "step": 1032 }, { "epoch": 0.01033, "grad_norm": 0.49777794102419176, "learning_rate": 0.003, "loss": 4.496, "step": 1033 }, { "epoch": 0.01034, "grad_norm": 0.5163173300909295, "learning_rate": 0.003, "loss": 4.4625, "step": 1034 }, { "epoch": 0.01035, "grad_norm": 0.4798196088286999, "learning_rate": 0.003, "loss": 4.4573, "step": 1035 }, { "epoch": 0.01036, "grad_norm": 0.3958499097440651, "learning_rate": 0.003, "loss": 4.4459, "step": 1036 }, { "epoch": 0.01037, "grad_norm": 0.3643003447603396, "learning_rate": 0.003, "loss": 4.4698, "step": 1037 }, { "epoch": 0.01038, "grad_norm": 0.40989678679265595, "learning_rate": 0.003, "loss": 4.4667, "step": 1038 }, { "epoch": 0.01039, "grad_norm": 0.3544910886173235, "learning_rate": 0.003, "loss": 4.4556, "step": 1039 }, { "epoch": 0.0104, "grad_norm": 0.3719879786284374, "learning_rate": 0.003, "loss": 4.4667, "step": 1040 }, { "epoch": 0.01041, "grad_norm": 0.3770068025965155, "learning_rate": 0.003, "loss": 4.4469, "step": 1041 }, { "epoch": 0.01042, "grad_norm": 0.4469030721524434, "learning_rate": 0.003, "loss": 4.4752, "step": 1042 }, { "epoch": 0.01043, "grad_norm": 0.7272268008417163, "learning_rate": 0.003, "loss": 4.4274, "step": 1043 }, { "epoch": 0.01044, "grad_norm": 0.8992907359513324, "learning_rate": 0.003, "loss": 4.4949, "step": 1044 }, { "epoch": 0.01045, "grad_norm": 0.8302310255488462, "learning_rate": 0.003, "loss": 4.4953, "step": 1045 }, { "epoch": 0.01046, "grad_norm": 0.9975061123002056, "learning_rate": 0.003, "loss": 4.4816, "step": 1046 }, { "epoch": 0.01047, "grad_norm": 0.8067658442263902, "learning_rate": 0.003, "loss": 4.4954, "step": 1047 }, { "epoch": 0.01048, "grad_norm": 0.7572637033913173, "learning_rate": 0.003, "loss": 4.4741, "step": 1048 }, { "epoch": 0.01049, "grad_norm": 0.8994748129621607, "learning_rate": 0.003, "loss": 4.4647, "step": 1049 }, { "epoch": 0.0105, "grad_norm": 0.7962039985766673, "learning_rate": 0.003, "loss": 4.5006, "step": 1050 }, { "epoch": 0.01051, "grad_norm": 0.8207643682537104, "learning_rate": 0.003, "loss": 4.4811, "step": 1051 }, { "epoch": 0.01052, "grad_norm": 0.7458558375660994, "learning_rate": 0.003, "loss": 4.4881, "step": 1052 }, { "epoch": 0.01053, "grad_norm": 0.6759075645027314, "learning_rate": 0.003, "loss": 4.4946, "step": 1053 }, { "epoch": 0.01054, "grad_norm": 0.6790074754239308, "learning_rate": 0.003, "loss": 4.4852, "step": 1054 }, { "epoch": 0.01055, "grad_norm": 0.6920845821894167, "learning_rate": 0.003, "loss": 4.4644, "step": 1055 }, { "epoch": 0.01056, "grad_norm": 0.5219544124128492, "learning_rate": 0.003, "loss": 4.4569, "step": 1056 }, { "epoch": 0.01057, "grad_norm": 0.6021767240952963, "learning_rate": 0.003, "loss": 4.4651, "step": 1057 }, { "epoch": 0.01058, "grad_norm": 0.5219741420101096, "learning_rate": 0.003, "loss": 4.4552, "step": 1058 }, { "epoch": 0.01059, "grad_norm": 0.509217286199775, "learning_rate": 0.003, "loss": 4.4557, "step": 1059 }, { "epoch": 0.0106, "grad_norm": 0.6119681984085901, "learning_rate": 0.003, "loss": 4.4506, "step": 1060 }, { "epoch": 0.01061, "grad_norm": 0.6018802462486371, "learning_rate": 0.003, "loss": 4.4451, "step": 1061 }, { "epoch": 0.01062, "grad_norm": 0.6072220138777784, "learning_rate": 0.003, "loss": 4.4398, "step": 1062 }, { "epoch": 0.01063, "grad_norm": 0.5793152524763621, "learning_rate": 0.003, "loss": 4.4372, "step": 1063 }, { "epoch": 0.01064, "grad_norm": 0.740973950389461, "learning_rate": 0.003, "loss": 4.4576, "step": 1064 }, { "epoch": 0.01065, "grad_norm": 0.8998946238241571, "learning_rate": 0.003, "loss": 4.4756, "step": 1065 }, { "epoch": 0.01066, "grad_norm": 0.889057289027363, "learning_rate": 0.003, "loss": 4.4727, "step": 1066 }, { "epoch": 0.01067, "grad_norm": 0.9442468447502006, "learning_rate": 0.003, "loss": 4.4902, "step": 1067 }, { "epoch": 0.01068, "grad_norm": 1.018364097076677, "learning_rate": 0.003, "loss": 4.4857, "step": 1068 }, { "epoch": 0.01069, "grad_norm": 0.9799434684826753, "learning_rate": 0.003, "loss": 4.4924, "step": 1069 }, { "epoch": 0.0107, "grad_norm": 0.8992499083099482, "learning_rate": 0.003, "loss": 4.5446, "step": 1070 }, { "epoch": 0.01071, "grad_norm": 0.8735440073637682, "learning_rate": 0.003, "loss": 4.4651, "step": 1071 }, { "epoch": 0.01072, "grad_norm": 0.8443267804596191, "learning_rate": 0.003, "loss": 4.5031, "step": 1072 }, { "epoch": 0.01073, "grad_norm": 0.7866639405001601, "learning_rate": 0.003, "loss": 4.4993, "step": 1073 }, { "epoch": 0.01074, "grad_norm": 0.7412592488158714, "learning_rate": 0.003, "loss": 4.4612, "step": 1074 }, { "epoch": 0.01075, "grad_norm": 0.7055777296919327, "learning_rate": 0.003, "loss": 4.4949, "step": 1075 }, { "epoch": 0.01076, "grad_norm": 0.6602103525167045, "learning_rate": 0.003, "loss": 4.4903, "step": 1076 }, { "epoch": 0.01077, "grad_norm": 0.6221585201435259, "learning_rate": 0.003, "loss": 4.4791, "step": 1077 }, { "epoch": 0.01078, "grad_norm": 0.6312740380808073, "learning_rate": 0.003, "loss": 4.4373, "step": 1078 }, { "epoch": 0.01079, "grad_norm": 0.5465751942660528, "learning_rate": 0.003, "loss": 4.4641, "step": 1079 }, { "epoch": 0.0108, "grad_norm": 0.5040258102852178, "learning_rate": 0.003, "loss": 4.4392, "step": 1080 }, { "epoch": 0.01081, "grad_norm": 0.4557353176092044, "learning_rate": 0.003, "loss": 4.4439, "step": 1081 }, { "epoch": 0.01082, "grad_norm": 0.43657211030445375, "learning_rate": 0.003, "loss": 4.4598, "step": 1082 }, { "epoch": 0.01083, "grad_norm": 0.37468227521199704, "learning_rate": 0.003, "loss": 4.4419, "step": 1083 }, { "epoch": 0.01084, "grad_norm": 0.3702365616744669, "learning_rate": 0.003, "loss": 4.4465, "step": 1084 }, { "epoch": 0.01085, "grad_norm": 0.3248887668649726, "learning_rate": 0.003, "loss": 4.4323, "step": 1085 }, { "epoch": 0.01086, "grad_norm": 0.28646275942866467, "learning_rate": 0.003, "loss": 4.4059, "step": 1086 }, { "epoch": 0.01087, "grad_norm": 0.3109295903469056, "learning_rate": 0.003, "loss": 4.4215, "step": 1087 }, { "epoch": 0.01088, "grad_norm": 0.31305777007935864, "learning_rate": 0.003, "loss": 4.454, "step": 1088 }, { "epoch": 0.01089, "grad_norm": 0.3202194647994524, "learning_rate": 0.003, "loss": 4.423, "step": 1089 }, { "epoch": 0.0109, "grad_norm": 0.3546167101166988, "learning_rate": 0.003, "loss": 4.4466, "step": 1090 }, { "epoch": 0.01091, "grad_norm": 0.40979643182380626, "learning_rate": 0.003, "loss": 4.4162, "step": 1091 }, { "epoch": 0.01092, "grad_norm": 0.44852044367674554, "learning_rate": 0.003, "loss": 4.4131, "step": 1092 }, { "epoch": 0.01093, "grad_norm": 0.5395428853759895, "learning_rate": 0.003, "loss": 4.4351, "step": 1093 }, { "epoch": 0.01094, "grad_norm": 0.7419472173607777, "learning_rate": 0.003, "loss": 4.4344, "step": 1094 }, { "epoch": 0.01095, "grad_norm": 1.3268112301670232, "learning_rate": 0.003, "loss": 4.4875, "step": 1095 }, { "epoch": 0.01096, "grad_norm": 0.8107867356113418, "learning_rate": 0.003, "loss": 4.4576, "step": 1096 }, { "epoch": 0.01097, "grad_norm": 0.6635063888056452, "learning_rate": 0.003, "loss": 4.4388, "step": 1097 }, { "epoch": 0.01098, "grad_norm": 0.9662363707419442, "learning_rate": 0.003, "loss": 4.474, "step": 1098 }, { "epoch": 0.01099, "grad_norm": 0.7696101380141932, "learning_rate": 0.003, "loss": 4.4405, "step": 1099 }, { "epoch": 0.011, "grad_norm": 0.7450236895824817, "learning_rate": 0.003, "loss": 4.4451, "step": 1100 }, { "epoch": 0.01101, "grad_norm": 0.7254968126029079, "learning_rate": 0.003, "loss": 4.4513, "step": 1101 }, { "epoch": 0.01102, "grad_norm": 0.7537628081039499, "learning_rate": 0.003, "loss": 4.4588, "step": 1102 }, { "epoch": 0.01103, "grad_norm": 0.7123173358553468, "learning_rate": 0.003, "loss": 4.4551, "step": 1103 }, { "epoch": 0.01104, "grad_norm": 0.6033148725235706, "learning_rate": 0.003, "loss": 4.4384, "step": 1104 }, { "epoch": 0.01105, "grad_norm": 0.7223359575565769, "learning_rate": 0.003, "loss": 4.4643, "step": 1105 }, { "epoch": 0.01106, "grad_norm": 0.6749769688136343, "learning_rate": 0.003, "loss": 4.4355, "step": 1106 }, { "epoch": 0.01107, "grad_norm": 0.6519921729558845, "learning_rate": 0.003, "loss": 4.443, "step": 1107 }, { "epoch": 0.01108, "grad_norm": 0.6442390747026363, "learning_rate": 0.003, "loss": 4.4155, "step": 1108 }, { "epoch": 0.01109, "grad_norm": 0.5210761074063659, "learning_rate": 0.003, "loss": 4.4258, "step": 1109 }, { "epoch": 0.0111, "grad_norm": 0.49112961811162964, "learning_rate": 0.003, "loss": 4.4439, "step": 1110 }, { "epoch": 0.01111, "grad_norm": 0.5154689325771941, "learning_rate": 0.003, "loss": 4.4228, "step": 1111 }, { "epoch": 0.01112, "grad_norm": 0.5147734027357755, "learning_rate": 0.003, "loss": 4.4383, "step": 1112 }, { "epoch": 0.01113, "grad_norm": 0.4905886669705696, "learning_rate": 0.003, "loss": 4.4046, "step": 1113 }, { "epoch": 0.01114, "grad_norm": 0.47555859430135783, "learning_rate": 0.003, "loss": 4.4166, "step": 1114 }, { "epoch": 0.01115, "grad_norm": 0.42768329796719207, "learning_rate": 0.003, "loss": 4.4369, "step": 1115 }, { "epoch": 0.01116, "grad_norm": 0.4281709726428523, "learning_rate": 0.003, "loss": 4.4286, "step": 1116 }, { "epoch": 0.01117, "grad_norm": 0.4124788023594352, "learning_rate": 0.003, "loss": 4.414, "step": 1117 }, { "epoch": 0.01118, "grad_norm": 0.4531459371875401, "learning_rate": 0.003, "loss": 4.415, "step": 1118 }, { "epoch": 0.01119, "grad_norm": 0.5115410769007107, "learning_rate": 0.003, "loss": 4.4247, "step": 1119 }, { "epoch": 0.0112, "grad_norm": 0.6598099600325713, "learning_rate": 0.003, "loss": 4.4115, "step": 1120 }, { "epoch": 0.01121, "grad_norm": 0.7298362819040065, "learning_rate": 0.003, "loss": 4.4356, "step": 1121 }, { "epoch": 0.01122, "grad_norm": 0.6383918217981295, "learning_rate": 0.003, "loss": 4.4021, "step": 1122 }, { "epoch": 0.01123, "grad_norm": 0.7157274101955465, "learning_rate": 0.003, "loss": 4.3942, "step": 1123 }, { "epoch": 0.01124, "grad_norm": 0.6763302773515354, "learning_rate": 0.003, "loss": 4.4444, "step": 1124 }, { "epoch": 0.01125, "grad_norm": 0.5890253028490829, "learning_rate": 0.003, "loss": 4.4062, "step": 1125 }, { "epoch": 0.01126, "grad_norm": 0.6745452077137704, "learning_rate": 0.003, "loss": 4.4204, "step": 1126 }, { "epoch": 0.01127, "grad_norm": 0.6212265837956056, "learning_rate": 0.003, "loss": 4.446, "step": 1127 }, { "epoch": 0.01128, "grad_norm": 0.6579375902442356, "learning_rate": 0.003, "loss": 4.4285, "step": 1128 }, { "epoch": 0.01129, "grad_norm": 0.7038241088934594, "learning_rate": 0.003, "loss": 4.4197, "step": 1129 }, { "epoch": 0.0113, "grad_norm": 0.8537912734504166, "learning_rate": 0.003, "loss": 4.4099, "step": 1130 }, { "epoch": 0.01131, "grad_norm": 0.6676066708341881, "learning_rate": 0.003, "loss": 4.4184, "step": 1131 }, { "epoch": 0.01132, "grad_norm": 0.6262944206433371, "learning_rate": 0.003, "loss": 4.4418, "step": 1132 }, { "epoch": 0.01133, "grad_norm": 0.7096964978520276, "learning_rate": 0.003, "loss": 4.4077, "step": 1133 }, { "epoch": 0.01134, "grad_norm": 0.7063046600590702, "learning_rate": 0.003, "loss": 4.4219, "step": 1134 }, { "epoch": 0.01135, "grad_norm": 0.7389408975678705, "learning_rate": 0.003, "loss": 4.4511, "step": 1135 }, { "epoch": 0.01136, "grad_norm": 0.902932943298548, "learning_rate": 0.003, "loss": 4.4334, "step": 1136 }, { "epoch": 0.01137, "grad_norm": 0.8104098050855381, "learning_rate": 0.003, "loss": 4.4615, "step": 1137 }, { "epoch": 0.01138, "grad_norm": 0.6304382373186352, "learning_rate": 0.003, "loss": 4.4061, "step": 1138 }, { "epoch": 0.01139, "grad_norm": 0.6573985975094263, "learning_rate": 0.003, "loss": 4.4174, "step": 1139 }, { "epoch": 0.0114, "grad_norm": 0.6243548176788066, "learning_rate": 0.003, "loss": 4.4015, "step": 1140 }, { "epoch": 0.01141, "grad_norm": 0.6941761654842915, "learning_rate": 0.003, "loss": 4.4278, "step": 1141 }, { "epoch": 0.01142, "grad_norm": 0.6422841400280502, "learning_rate": 0.003, "loss": 4.4042, "step": 1142 }, { "epoch": 0.01143, "grad_norm": 0.6274623913648985, "learning_rate": 0.003, "loss": 4.4193, "step": 1143 }, { "epoch": 0.01144, "grad_norm": 0.6449672978265372, "learning_rate": 0.003, "loss": 4.3861, "step": 1144 }, { "epoch": 0.01145, "grad_norm": 0.8094555842673681, "learning_rate": 0.003, "loss": 4.4312, "step": 1145 }, { "epoch": 0.01146, "grad_norm": 0.8440842425424007, "learning_rate": 0.003, "loss": 4.4247, "step": 1146 }, { "epoch": 0.01147, "grad_norm": 0.8250289231813209, "learning_rate": 0.003, "loss": 4.4121, "step": 1147 }, { "epoch": 0.01148, "grad_norm": 0.9842608879774175, "learning_rate": 0.003, "loss": 4.4248, "step": 1148 }, { "epoch": 0.01149, "grad_norm": 1.140890319957312, "learning_rate": 0.003, "loss": 4.4515, "step": 1149 }, { "epoch": 0.0115, "grad_norm": 1.0049560913231448, "learning_rate": 0.003, "loss": 4.454, "step": 1150 }, { "epoch": 0.01151, "grad_norm": 0.9241096807179571, "learning_rate": 0.003, "loss": 4.4438, "step": 1151 }, { "epoch": 0.01152, "grad_norm": 0.8197935874101315, "learning_rate": 0.003, "loss": 4.4496, "step": 1152 }, { "epoch": 0.01153, "grad_norm": 0.8964418195355868, "learning_rate": 0.003, "loss": 4.4768, "step": 1153 }, { "epoch": 0.01154, "grad_norm": 0.7052475574958695, "learning_rate": 0.003, "loss": 4.4381, "step": 1154 }, { "epoch": 0.01155, "grad_norm": 0.6994161040159336, "learning_rate": 0.003, "loss": 4.425, "step": 1155 }, { "epoch": 0.01156, "grad_norm": 0.5866991021029688, "learning_rate": 0.003, "loss": 4.4267, "step": 1156 }, { "epoch": 0.01157, "grad_norm": 0.5890541796764595, "learning_rate": 0.003, "loss": 4.4503, "step": 1157 }, { "epoch": 0.01158, "grad_norm": 0.5558417103751193, "learning_rate": 0.003, "loss": 4.4146, "step": 1158 }, { "epoch": 0.01159, "grad_norm": 0.5494688252527891, "learning_rate": 0.003, "loss": 4.41, "step": 1159 }, { "epoch": 0.0116, "grad_norm": 0.5721755744732883, "learning_rate": 0.003, "loss": 4.4228, "step": 1160 }, { "epoch": 0.01161, "grad_norm": 0.6717218566295324, "learning_rate": 0.003, "loss": 4.4019, "step": 1161 }, { "epoch": 0.01162, "grad_norm": 0.7268973886687413, "learning_rate": 0.003, "loss": 4.4138, "step": 1162 }, { "epoch": 0.01163, "grad_norm": 0.5842283691456034, "learning_rate": 0.003, "loss": 4.4284, "step": 1163 }, { "epoch": 0.01164, "grad_norm": 0.4855856513804374, "learning_rate": 0.003, "loss": 4.4134, "step": 1164 }, { "epoch": 0.01165, "grad_norm": 0.4188604676147223, "learning_rate": 0.003, "loss": 4.4035, "step": 1165 }, { "epoch": 0.01166, "grad_norm": 0.3540156009620338, "learning_rate": 0.003, "loss": 4.4019, "step": 1166 }, { "epoch": 0.01167, "grad_norm": 0.34211903868248933, "learning_rate": 0.003, "loss": 4.3663, "step": 1167 }, { "epoch": 0.01168, "grad_norm": 0.3891962127459521, "learning_rate": 0.003, "loss": 4.374, "step": 1168 }, { "epoch": 0.01169, "grad_norm": 0.43062143986497, "learning_rate": 0.003, "loss": 4.4147, "step": 1169 }, { "epoch": 0.0117, "grad_norm": 0.5082945055708723, "learning_rate": 0.003, "loss": 4.404, "step": 1170 }, { "epoch": 0.01171, "grad_norm": 0.6470102757597544, "learning_rate": 0.003, "loss": 4.3885, "step": 1171 }, { "epoch": 0.01172, "grad_norm": 0.7362820078056241, "learning_rate": 0.003, "loss": 4.3869, "step": 1172 }, { "epoch": 0.01173, "grad_norm": 0.693269388291317, "learning_rate": 0.003, "loss": 4.4171, "step": 1173 }, { "epoch": 0.01174, "grad_norm": 0.5646769911878592, "learning_rate": 0.003, "loss": 4.3891, "step": 1174 }, { "epoch": 0.01175, "grad_norm": 0.5675605646992437, "learning_rate": 0.003, "loss": 4.3751, "step": 1175 }, { "epoch": 0.01176, "grad_norm": 0.5874888204753396, "learning_rate": 0.003, "loss": 4.4094, "step": 1176 }, { "epoch": 0.01177, "grad_norm": 0.4402531057426568, "learning_rate": 0.003, "loss": 4.3711, "step": 1177 }, { "epoch": 0.01178, "grad_norm": 0.542910388266956, "learning_rate": 0.003, "loss": 4.3959, "step": 1178 }, { "epoch": 0.01179, "grad_norm": 0.6077990883816204, "learning_rate": 0.003, "loss": 4.3751, "step": 1179 }, { "epoch": 0.0118, "grad_norm": 0.5935696360755632, "learning_rate": 0.003, "loss": 4.4393, "step": 1180 }, { "epoch": 0.01181, "grad_norm": 0.5077497618150401, "learning_rate": 0.003, "loss": 4.4227, "step": 1181 }, { "epoch": 0.01182, "grad_norm": 0.4525800009003507, "learning_rate": 0.003, "loss": 4.3791, "step": 1182 }, { "epoch": 0.01183, "grad_norm": 0.465730915302176, "learning_rate": 0.003, "loss": 4.3741, "step": 1183 }, { "epoch": 0.01184, "grad_norm": 0.5691375544130988, "learning_rate": 0.003, "loss": 4.4172, "step": 1184 }, { "epoch": 0.01185, "grad_norm": 0.6220094109188331, "learning_rate": 0.003, "loss": 4.3896, "step": 1185 }, { "epoch": 0.01186, "grad_norm": 0.7180162404965751, "learning_rate": 0.003, "loss": 4.417, "step": 1186 }, { "epoch": 0.01187, "grad_norm": 0.7918251317294935, "learning_rate": 0.003, "loss": 4.389, "step": 1187 }, { "epoch": 0.01188, "grad_norm": 0.8521642061482538, "learning_rate": 0.003, "loss": 4.4081, "step": 1188 }, { "epoch": 0.01189, "grad_norm": 0.884743262111852, "learning_rate": 0.003, "loss": 4.4173, "step": 1189 }, { "epoch": 0.0119, "grad_norm": 0.809315268231435, "learning_rate": 0.003, "loss": 4.3961, "step": 1190 }, { "epoch": 0.01191, "grad_norm": 0.8426026711506704, "learning_rate": 0.003, "loss": 4.3805, "step": 1191 }, { "epoch": 0.01192, "grad_norm": 0.8039017391990436, "learning_rate": 0.003, "loss": 4.4134, "step": 1192 }, { "epoch": 0.01193, "grad_norm": 1.0091767993853729, "learning_rate": 0.003, "loss": 4.4278, "step": 1193 }, { "epoch": 0.01194, "grad_norm": 1.021446612913599, "learning_rate": 0.003, "loss": 4.4458, "step": 1194 }, { "epoch": 0.01195, "grad_norm": 0.8197186317539255, "learning_rate": 0.003, "loss": 4.4136, "step": 1195 }, { "epoch": 0.01196, "grad_norm": 1.0663075325190898, "learning_rate": 0.003, "loss": 4.4346, "step": 1196 }, { "epoch": 0.01197, "grad_norm": 1.1163749964317249, "learning_rate": 0.003, "loss": 4.42, "step": 1197 }, { "epoch": 0.01198, "grad_norm": 0.7911818503418244, "learning_rate": 0.003, "loss": 4.4756, "step": 1198 }, { "epoch": 0.01199, "grad_norm": 0.7094954502714929, "learning_rate": 0.003, "loss": 4.4429, "step": 1199 }, { "epoch": 0.012, "grad_norm": 0.6371349856097592, "learning_rate": 0.003, "loss": 4.4311, "step": 1200 }, { "epoch": 0.01201, "grad_norm": 0.6670059038710348, "learning_rate": 0.003, "loss": 4.4586, "step": 1201 }, { "epoch": 0.01202, "grad_norm": 0.7057344621036731, "learning_rate": 0.003, "loss": 4.4253, "step": 1202 }, { "epoch": 0.01203, "grad_norm": 0.6813638781778574, "learning_rate": 0.003, "loss": 4.4314, "step": 1203 }, { "epoch": 0.01204, "grad_norm": 0.6487927748495244, "learning_rate": 0.003, "loss": 4.4252, "step": 1204 }, { "epoch": 0.01205, "grad_norm": 0.5702088035511471, "learning_rate": 0.003, "loss": 4.4134, "step": 1205 }, { "epoch": 0.01206, "grad_norm": 0.5185412999807271, "learning_rate": 0.003, "loss": 4.4527, "step": 1206 }, { "epoch": 0.01207, "grad_norm": 0.5529206635637681, "learning_rate": 0.003, "loss": 4.3748, "step": 1207 }, { "epoch": 0.01208, "grad_norm": 0.6027594396835911, "learning_rate": 0.003, "loss": 4.4355, "step": 1208 }, { "epoch": 0.01209, "grad_norm": 0.6294683615242583, "learning_rate": 0.003, "loss": 4.4139, "step": 1209 }, { "epoch": 0.0121, "grad_norm": 0.5634425369332556, "learning_rate": 0.003, "loss": 4.4142, "step": 1210 }, { "epoch": 0.01211, "grad_norm": 0.489983773731107, "learning_rate": 0.003, "loss": 4.3953, "step": 1211 }, { "epoch": 0.01212, "grad_norm": 0.48590408635878257, "learning_rate": 0.003, "loss": 4.4163, "step": 1212 }, { "epoch": 0.01213, "grad_norm": 0.4654031475329597, "learning_rate": 0.003, "loss": 4.3921, "step": 1213 }, { "epoch": 0.01214, "grad_norm": 0.4519325610344299, "learning_rate": 0.003, "loss": 4.3755, "step": 1214 }, { "epoch": 0.01215, "grad_norm": 0.4986453835908023, "learning_rate": 0.003, "loss": 4.3833, "step": 1215 }, { "epoch": 0.01216, "grad_norm": 0.6836671205807361, "learning_rate": 0.003, "loss": 4.3816, "step": 1216 }, { "epoch": 0.01217, "grad_norm": 0.8634560994181636, "learning_rate": 0.003, "loss": 4.4035, "step": 1217 }, { "epoch": 0.01218, "grad_norm": 0.7430166358012542, "learning_rate": 0.003, "loss": 4.3918, "step": 1218 }, { "epoch": 0.01219, "grad_norm": 0.5693399114743081, "learning_rate": 0.003, "loss": 4.3834, "step": 1219 }, { "epoch": 0.0122, "grad_norm": 0.6203041570130704, "learning_rate": 0.003, "loss": 4.3803, "step": 1220 }, { "epoch": 0.01221, "grad_norm": 0.6221216866599081, "learning_rate": 0.003, "loss": 4.3932, "step": 1221 }, { "epoch": 0.01222, "grad_norm": 0.5678017035835031, "learning_rate": 0.003, "loss": 4.3744, "step": 1222 }, { "epoch": 0.01223, "grad_norm": 0.4544260730538049, "learning_rate": 0.003, "loss": 4.3669, "step": 1223 }, { "epoch": 0.01224, "grad_norm": 0.4498657047747612, "learning_rate": 0.003, "loss": 4.3528, "step": 1224 }, { "epoch": 0.01225, "grad_norm": 0.44954702929076595, "learning_rate": 0.003, "loss": 4.3751, "step": 1225 }, { "epoch": 0.01226, "grad_norm": 0.45922987677597427, "learning_rate": 0.003, "loss": 4.3678, "step": 1226 }, { "epoch": 0.01227, "grad_norm": 0.444736072897216, "learning_rate": 0.003, "loss": 4.3955, "step": 1227 }, { "epoch": 0.01228, "grad_norm": 0.39964234776344315, "learning_rate": 0.003, "loss": 4.3749, "step": 1228 }, { "epoch": 0.01229, "grad_norm": 0.4304709469798317, "learning_rate": 0.003, "loss": 4.3636, "step": 1229 }, { "epoch": 0.0123, "grad_norm": 0.45096371519587236, "learning_rate": 0.003, "loss": 4.3946, "step": 1230 }, { "epoch": 0.01231, "grad_norm": 0.45260016324469293, "learning_rate": 0.003, "loss": 4.3662, "step": 1231 }, { "epoch": 0.01232, "grad_norm": 0.5056071407476121, "learning_rate": 0.003, "loss": 4.3416, "step": 1232 }, { "epoch": 0.01233, "grad_norm": 0.6137961234604853, "learning_rate": 0.003, "loss": 4.3562, "step": 1233 }, { "epoch": 0.01234, "grad_norm": 0.7580924108092045, "learning_rate": 0.003, "loss": 4.3771, "step": 1234 }, { "epoch": 0.01235, "grad_norm": 0.7682739117958468, "learning_rate": 0.003, "loss": 4.377, "step": 1235 }, { "epoch": 0.01236, "grad_norm": 0.7726095308103765, "learning_rate": 0.003, "loss": 4.379, "step": 1236 }, { "epoch": 0.01237, "grad_norm": 0.6515520396776402, "learning_rate": 0.003, "loss": 4.3868, "step": 1237 }, { "epoch": 0.01238, "grad_norm": 0.5911401797128396, "learning_rate": 0.003, "loss": 4.3811, "step": 1238 }, { "epoch": 0.01239, "grad_norm": 0.6868743136698521, "learning_rate": 0.003, "loss": 4.3689, "step": 1239 }, { "epoch": 0.0124, "grad_norm": 0.7235342138686123, "learning_rate": 0.003, "loss": 4.3578, "step": 1240 }, { "epoch": 0.01241, "grad_norm": 0.7086323715527123, "learning_rate": 0.003, "loss": 4.3714, "step": 1241 }, { "epoch": 0.01242, "grad_norm": 0.7273279516261576, "learning_rate": 0.003, "loss": 4.3772, "step": 1242 }, { "epoch": 0.01243, "grad_norm": 0.7241329186524991, "learning_rate": 0.003, "loss": 4.358, "step": 1243 }, { "epoch": 0.01244, "grad_norm": 0.659589685877625, "learning_rate": 0.003, "loss": 4.3697, "step": 1244 }, { "epoch": 0.01245, "grad_norm": 0.5679488489092839, "learning_rate": 0.003, "loss": 4.3588, "step": 1245 }, { "epoch": 0.01246, "grad_norm": 0.5843246373122641, "learning_rate": 0.003, "loss": 4.3356, "step": 1246 }, { "epoch": 0.01247, "grad_norm": 0.5386207185407424, "learning_rate": 0.003, "loss": 4.3884, "step": 1247 }, { "epoch": 0.01248, "grad_norm": 0.4947062329390487, "learning_rate": 0.003, "loss": 4.3718, "step": 1248 }, { "epoch": 0.01249, "grad_norm": 0.47539230004779465, "learning_rate": 0.003, "loss": 4.3597, "step": 1249 }, { "epoch": 0.0125, "grad_norm": 0.4876155686681362, "learning_rate": 0.003, "loss": 4.3353, "step": 1250 }, { "epoch": 0.01251, "grad_norm": 0.587984128950104, "learning_rate": 0.003, "loss": 4.3725, "step": 1251 }, { "epoch": 0.01252, "grad_norm": 0.74609447185642, "learning_rate": 0.003, "loss": 4.3585, "step": 1252 }, { "epoch": 0.01253, "grad_norm": 0.8297104750282515, "learning_rate": 0.003, "loss": 4.3426, "step": 1253 }, { "epoch": 0.01254, "grad_norm": 0.6714602053035584, "learning_rate": 0.003, "loss": 4.3879, "step": 1254 }, { "epoch": 0.01255, "grad_norm": 0.5575284697102026, "learning_rate": 0.003, "loss": 4.3328, "step": 1255 }, { "epoch": 0.01256, "grad_norm": 0.6531801875316168, "learning_rate": 0.003, "loss": 4.3617, "step": 1256 }, { "epoch": 0.01257, "grad_norm": 0.6725833064810157, "learning_rate": 0.003, "loss": 4.3523, "step": 1257 }, { "epoch": 0.01258, "grad_norm": 0.5415379584452896, "learning_rate": 0.003, "loss": 4.3533, "step": 1258 }, { "epoch": 0.01259, "grad_norm": 0.6189096611706483, "learning_rate": 0.003, "loss": 4.3565, "step": 1259 }, { "epoch": 0.0126, "grad_norm": 0.7719709473721972, "learning_rate": 0.003, "loss": 4.3943, "step": 1260 }, { "epoch": 0.01261, "grad_norm": 0.7141309260632757, "learning_rate": 0.003, "loss": 4.3828, "step": 1261 }, { "epoch": 0.01262, "grad_norm": 0.82201580215985, "learning_rate": 0.003, "loss": 4.3803, "step": 1262 }, { "epoch": 0.01263, "grad_norm": 0.7648394097891381, "learning_rate": 0.003, "loss": 4.3764, "step": 1263 }, { "epoch": 0.01264, "grad_norm": 0.6534936022081808, "learning_rate": 0.003, "loss": 4.3615, "step": 1264 }, { "epoch": 0.01265, "grad_norm": 0.8083064566897021, "learning_rate": 0.003, "loss": 4.3795, "step": 1265 }, { "epoch": 0.01266, "grad_norm": 0.7625627444580204, "learning_rate": 0.003, "loss": 4.3817, "step": 1266 }, { "epoch": 0.01267, "grad_norm": 0.7144669763786441, "learning_rate": 0.003, "loss": 4.3961, "step": 1267 }, { "epoch": 0.01268, "grad_norm": 0.6176763363932604, "learning_rate": 0.003, "loss": 4.3815, "step": 1268 }, { "epoch": 0.01269, "grad_norm": 0.5844219741679575, "learning_rate": 0.003, "loss": 4.3442, "step": 1269 }, { "epoch": 0.0127, "grad_norm": 0.5570920646541462, "learning_rate": 0.003, "loss": 4.3764, "step": 1270 }, { "epoch": 0.01271, "grad_norm": 0.665679672510672, "learning_rate": 0.003, "loss": 4.3628, "step": 1271 }, { "epoch": 0.01272, "grad_norm": 0.7700987517359916, "learning_rate": 0.003, "loss": 4.3638, "step": 1272 }, { "epoch": 0.01273, "grad_norm": 0.7473118276250652, "learning_rate": 0.003, "loss": 4.3932, "step": 1273 }, { "epoch": 0.01274, "grad_norm": 0.6491851354934152, "learning_rate": 0.003, "loss": 4.3789, "step": 1274 }, { "epoch": 0.01275, "grad_norm": 0.6744635149292406, "learning_rate": 0.003, "loss": 4.3494, "step": 1275 }, { "epoch": 0.01276, "grad_norm": 0.652401206080157, "learning_rate": 0.003, "loss": 4.3572, "step": 1276 }, { "epoch": 0.01277, "grad_norm": 0.8226392169345714, "learning_rate": 0.003, "loss": 4.3706, "step": 1277 }, { "epoch": 0.01278, "grad_norm": 0.8325255271008436, "learning_rate": 0.003, "loss": 4.3906, "step": 1278 }, { "epoch": 0.01279, "grad_norm": 0.7029576010205191, "learning_rate": 0.003, "loss": 4.3612, "step": 1279 }, { "epoch": 0.0128, "grad_norm": 0.6056600803694466, "learning_rate": 0.003, "loss": 4.3548, "step": 1280 }, { "epoch": 0.01281, "grad_norm": 0.6068824045406035, "learning_rate": 0.003, "loss": 4.3433, "step": 1281 }, { "epoch": 0.01282, "grad_norm": 0.5449477323585487, "learning_rate": 0.003, "loss": 4.3801, "step": 1282 }, { "epoch": 0.01283, "grad_norm": 0.5086376593278678, "learning_rate": 0.003, "loss": 4.3465, "step": 1283 }, { "epoch": 0.01284, "grad_norm": 0.49507526795006285, "learning_rate": 0.003, "loss": 4.3484, "step": 1284 }, { "epoch": 0.01285, "grad_norm": 0.4794063886412805, "learning_rate": 0.003, "loss": 4.3499, "step": 1285 }, { "epoch": 0.01286, "grad_norm": 0.4314216967223394, "learning_rate": 0.003, "loss": 4.37, "step": 1286 }, { "epoch": 0.01287, "grad_norm": 0.43751633400555473, "learning_rate": 0.003, "loss": 4.3355, "step": 1287 }, { "epoch": 0.01288, "grad_norm": 0.4402964980769675, "learning_rate": 0.003, "loss": 4.3477, "step": 1288 }, { "epoch": 0.01289, "grad_norm": 0.4330370123851316, "learning_rate": 0.003, "loss": 4.34, "step": 1289 }, { "epoch": 0.0129, "grad_norm": 0.4577360505134826, "learning_rate": 0.003, "loss": 4.3613, "step": 1290 }, { "epoch": 0.01291, "grad_norm": 0.4908761222350223, "learning_rate": 0.003, "loss": 4.3434, "step": 1291 }, { "epoch": 0.01292, "grad_norm": 0.5197962656056466, "learning_rate": 0.003, "loss": 4.3769, "step": 1292 }, { "epoch": 0.01293, "grad_norm": 0.5249407732060195, "learning_rate": 0.003, "loss": 4.3636, "step": 1293 }, { "epoch": 0.01294, "grad_norm": 0.611698986416238, "learning_rate": 0.003, "loss": 4.3337, "step": 1294 }, { "epoch": 0.01295, "grad_norm": 0.7156402524759599, "learning_rate": 0.003, "loss": 4.369, "step": 1295 }, { "epoch": 0.01296, "grad_norm": 0.8400871327514794, "learning_rate": 0.003, "loss": 4.3429, "step": 1296 }, { "epoch": 0.01297, "grad_norm": 0.9437097734689512, "learning_rate": 0.003, "loss": 4.3707, "step": 1297 }, { "epoch": 0.01298, "grad_norm": 0.9584374674542437, "learning_rate": 0.003, "loss": 4.3505, "step": 1298 }, { "epoch": 0.01299, "grad_norm": 0.8671660761915297, "learning_rate": 0.003, "loss": 4.3625, "step": 1299 }, { "epoch": 0.013, "grad_norm": 0.7502528073695865, "learning_rate": 0.003, "loss": 4.3495, "step": 1300 }, { "epoch": 0.01301, "grad_norm": 0.7063515582852367, "learning_rate": 0.003, "loss": 4.3619, "step": 1301 }, { "epoch": 0.01302, "grad_norm": 0.6149436229664508, "learning_rate": 0.003, "loss": 4.4034, "step": 1302 }, { "epoch": 0.01303, "grad_norm": 0.6398205689943912, "learning_rate": 0.003, "loss": 4.3511, "step": 1303 }, { "epoch": 0.01304, "grad_norm": 0.5125654671319883, "learning_rate": 0.003, "loss": 4.3743, "step": 1304 }, { "epoch": 0.01305, "grad_norm": 0.49151327704885306, "learning_rate": 0.003, "loss": 4.357, "step": 1305 }, { "epoch": 0.01306, "grad_norm": 0.449035259984806, "learning_rate": 0.003, "loss": 4.3496, "step": 1306 }, { "epoch": 0.01307, "grad_norm": 0.3786564732477656, "learning_rate": 0.003, "loss": 4.365, "step": 1307 }, { "epoch": 0.01308, "grad_norm": 0.40747339154306583, "learning_rate": 0.003, "loss": 4.3407, "step": 1308 }, { "epoch": 0.01309, "grad_norm": 0.3477777170477395, "learning_rate": 0.003, "loss": 4.3771, "step": 1309 }, { "epoch": 0.0131, "grad_norm": 0.33985557007437, "learning_rate": 0.003, "loss": 4.3221, "step": 1310 }, { "epoch": 0.01311, "grad_norm": 0.3781744690142208, "learning_rate": 0.003, "loss": 4.3621, "step": 1311 }, { "epoch": 0.01312, "grad_norm": 0.390430095045798, "learning_rate": 0.003, "loss": 4.3295, "step": 1312 }, { "epoch": 0.01313, "grad_norm": 0.4411339748954226, "learning_rate": 0.003, "loss": 4.3219, "step": 1313 }, { "epoch": 0.01314, "grad_norm": 0.4398621851217481, "learning_rate": 0.003, "loss": 4.3182, "step": 1314 }, { "epoch": 0.01315, "grad_norm": 0.4543520117536255, "learning_rate": 0.003, "loss": 4.3454, "step": 1315 }, { "epoch": 0.01316, "grad_norm": 0.4647119001484631, "learning_rate": 0.003, "loss": 4.3315, "step": 1316 }, { "epoch": 0.01317, "grad_norm": 0.6634726948283797, "learning_rate": 0.003, "loss": 4.3607, "step": 1317 }, { "epoch": 0.01318, "grad_norm": 1.0496539085022096, "learning_rate": 0.003, "loss": 4.3666, "step": 1318 }, { "epoch": 0.01319, "grad_norm": 1.3165114729988252, "learning_rate": 0.003, "loss": 4.392, "step": 1319 }, { "epoch": 0.0132, "grad_norm": 0.7650097283623507, "learning_rate": 0.003, "loss": 4.3723, "step": 1320 }, { "epoch": 0.01321, "grad_norm": 0.7692901736546863, "learning_rate": 0.003, "loss": 4.3563, "step": 1321 }, { "epoch": 0.01322, "grad_norm": 0.8430579356733313, "learning_rate": 0.003, "loss": 4.3811, "step": 1322 }, { "epoch": 0.01323, "grad_norm": 0.7787983772827455, "learning_rate": 0.003, "loss": 4.3749, "step": 1323 }, { "epoch": 0.01324, "grad_norm": 0.7585640630062677, "learning_rate": 0.003, "loss": 4.3595, "step": 1324 }, { "epoch": 0.01325, "grad_norm": 0.662871102926864, "learning_rate": 0.003, "loss": 4.3604, "step": 1325 }, { "epoch": 0.01326, "grad_norm": 0.6552542644296041, "learning_rate": 0.003, "loss": 4.3471, "step": 1326 }, { "epoch": 0.01327, "grad_norm": 0.6597442221568894, "learning_rate": 0.003, "loss": 4.3636, "step": 1327 }, { "epoch": 0.01328, "grad_norm": 0.6991774516177431, "learning_rate": 0.003, "loss": 4.3572, "step": 1328 }, { "epoch": 0.01329, "grad_norm": 0.7004568547673504, "learning_rate": 0.003, "loss": 4.323, "step": 1329 }, { "epoch": 0.0133, "grad_norm": 0.6451796491929548, "learning_rate": 0.003, "loss": 4.3619, "step": 1330 }, { "epoch": 0.01331, "grad_norm": 0.5603586741834496, "learning_rate": 0.003, "loss": 4.3461, "step": 1331 }, { "epoch": 0.01332, "grad_norm": 0.6118296137650349, "learning_rate": 0.003, "loss": 4.3674, "step": 1332 }, { "epoch": 0.01333, "grad_norm": 0.6556746462798643, "learning_rate": 0.003, "loss": 4.3385, "step": 1333 }, { "epoch": 0.01334, "grad_norm": 0.6569171248898434, "learning_rate": 0.003, "loss": 4.3292, "step": 1334 }, { "epoch": 0.01335, "grad_norm": 0.7308490257197903, "learning_rate": 0.003, "loss": 4.355, "step": 1335 }, { "epoch": 0.01336, "grad_norm": 0.7694263760249205, "learning_rate": 0.003, "loss": 4.3562, "step": 1336 }, { "epoch": 0.01337, "grad_norm": 0.829484027358136, "learning_rate": 0.003, "loss": 4.3739, "step": 1337 }, { "epoch": 0.01338, "grad_norm": 0.7394720058707402, "learning_rate": 0.003, "loss": 4.3375, "step": 1338 }, { "epoch": 0.01339, "grad_norm": 0.7663877331658486, "learning_rate": 0.003, "loss": 4.3614, "step": 1339 }, { "epoch": 0.0134, "grad_norm": 0.6814802672525545, "learning_rate": 0.003, "loss": 4.3431, "step": 1340 }, { "epoch": 0.01341, "grad_norm": 0.5804799698607723, "learning_rate": 0.003, "loss": 4.3566, "step": 1341 }, { "epoch": 0.01342, "grad_norm": 0.5489998904660267, "learning_rate": 0.003, "loss": 4.3242, "step": 1342 }, { "epoch": 0.01343, "grad_norm": 0.5528070757921824, "learning_rate": 0.003, "loss": 4.3308, "step": 1343 }, { "epoch": 0.01344, "grad_norm": 0.5249427916401641, "learning_rate": 0.003, "loss": 4.3286, "step": 1344 }, { "epoch": 0.01345, "grad_norm": 0.5796492875823627, "learning_rate": 0.003, "loss": 4.3698, "step": 1345 }, { "epoch": 0.01346, "grad_norm": 0.7791779206443923, "learning_rate": 0.003, "loss": 4.349, "step": 1346 }, { "epoch": 0.01347, "grad_norm": 0.9787862373774305, "learning_rate": 0.003, "loss": 4.3601, "step": 1347 }, { "epoch": 0.01348, "grad_norm": 1.124691751963475, "learning_rate": 0.003, "loss": 4.3695, "step": 1348 }, { "epoch": 0.01349, "grad_norm": 0.7239534112442599, "learning_rate": 0.003, "loss": 4.3703, "step": 1349 }, { "epoch": 0.0135, "grad_norm": 0.7058824966707771, "learning_rate": 0.003, "loss": 4.3665, "step": 1350 }, { "epoch": 0.01351, "grad_norm": 0.7269767228946674, "learning_rate": 0.003, "loss": 4.3913, "step": 1351 }, { "epoch": 0.01352, "grad_norm": 0.629762174967035, "learning_rate": 0.003, "loss": 4.3652, "step": 1352 }, { "epoch": 0.01353, "grad_norm": 0.46709847900974316, "learning_rate": 0.003, "loss": 4.3426, "step": 1353 }, { "epoch": 0.01354, "grad_norm": 0.4348570974243878, "learning_rate": 0.003, "loss": 4.33, "step": 1354 }, { "epoch": 0.01355, "grad_norm": 0.4058626528377844, "learning_rate": 0.003, "loss": 4.3397, "step": 1355 }, { "epoch": 0.01356, "grad_norm": 0.4077904728688183, "learning_rate": 0.003, "loss": 4.3329, "step": 1356 }, { "epoch": 0.01357, "grad_norm": 0.3881492908104237, "learning_rate": 0.003, "loss": 4.3196, "step": 1357 }, { "epoch": 0.01358, "grad_norm": 0.37883341535998916, "learning_rate": 0.003, "loss": 4.3135, "step": 1358 }, { "epoch": 0.01359, "grad_norm": 0.3673079033329598, "learning_rate": 0.003, "loss": 4.3515, "step": 1359 }, { "epoch": 0.0136, "grad_norm": 0.37976733712994504, "learning_rate": 0.003, "loss": 4.3177, "step": 1360 }, { "epoch": 0.01361, "grad_norm": 0.4118348378412375, "learning_rate": 0.003, "loss": 4.3313, "step": 1361 }, { "epoch": 0.01362, "grad_norm": 0.4407593683397167, "learning_rate": 0.003, "loss": 4.345, "step": 1362 }, { "epoch": 0.01363, "grad_norm": 0.5058671106167254, "learning_rate": 0.003, "loss": 4.3464, "step": 1363 }, { "epoch": 0.01364, "grad_norm": 0.6807238620694973, "learning_rate": 0.003, "loss": 4.3446, "step": 1364 }, { "epoch": 0.01365, "grad_norm": 0.8032111226209311, "learning_rate": 0.003, "loss": 4.3702, "step": 1365 }, { "epoch": 0.01366, "grad_norm": 0.7976336994683142, "learning_rate": 0.003, "loss": 4.3233, "step": 1366 }, { "epoch": 0.01367, "grad_norm": 0.7881501972797743, "learning_rate": 0.003, "loss": 4.3766, "step": 1367 }, { "epoch": 0.01368, "grad_norm": 0.6437176795201012, "learning_rate": 0.003, "loss": 4.346, "step": 1368 }, { "epoch": 0.01369, "grad_norm": 0.7544973412872852, "learning_rate": 0.003, "loss": 4.3393, "step": 1369 }, { "epoch": 0.0137, "grad_norm": 0.9164166166999201, "learning_rate": 0.003, "loss": 4.3794, "step": 1370 }, { "epoch": 0.01371, "grad_norm": 0.9303263432444765, "learning_rate": 0.003, "loss": 4.3692, "step": 1371 }, { "epoch": 0.01372, "grad_norm": 0.7939151912920627, "learning_rate": 0.003, "loss": 4.3176, "step": 1372 }, { "epoch": 0.01373, "grad_norm": 0.8088750323088586, "learning_rate": 0.003, "loss": 4.3626, "step": 1373 }, { "epoch": 0.01374, "grad_norm": 0.6721224672262793, "learning_rate": 0.003, "loss": 4.3585, "step": 1374 }, { "epoch": 0.01375, "grad_norm": 0.6771097427365759, "learning_rate": 0.003, "loss": 4.3249, "step": 1375 }, { "epoch": 0.01376, "grad_norm": 0.6762752470113543, "learning_rate": 0.003, "loss": 4.347, "step": 1376 }, { "epoch": 0.01377, "grad_norm": 0.6604690846076395, "learning_rate": 0.003, "loss": 4.3466, "step": 1377 }, { "epoch": 0.01378, "grad_norm": 0.8027631418948692, "learning_rate": 0.003, "loss": 4.3812, "step": 1378 }, { "epoch": 0.01379, "grad_norm": 0.8058991494927565, "learning_rate": 0.003, "loss": 4.3543, "step": 1379 }, { "epoch": 0.0138, "grad_norm": 0.6522516144796072, "learning_rate": 0.003, "loss": 4.3459, "step": 1380 }, { "epoch": 0.01381, "grad_norm": 0.6569912385058241, "learning_rate": 0.003, "loss": 4.3524, "step": 1381 }, { "epoch": 0.01382, "grad_norm": 0.5996275495095534, "learning_rate": 0.003, "loss": 4.3471, "step": 1382 }, { "epoch": 0.01383, "grad_norm": 0.7099037071072233, "learning_rate": 0.003, "loss": 4.3442, "step": 1383 }, { "epoch": 0.01384, "grad_norm": 0.9143332820449893, "learning_rate": 0.003, "loss": 4.3854, "step": 1384 }, { "epoch": 0.01385, "grad_norm": 1.0346171765908472, "learning_rate": 0.003, "loss": 4.3815, "step": 1385 }, { "epoch": 0.01386, "grad_norm": 0.8985740978326894, "learning_rate": 0.003, "loss": 4.3558, "step": 1386 }, { "epoch": 0.01387, "grad_norm": 0.6958974730086266, "learning_rate": 0.003, "loss": 4.335, "step": 1387 }, { "epoch": 0.01388, "grad_norm": 0.7497022428211844, "learning_rate": 0.003, "loss": 4.3735, "step": 1388 }, { "epoch": 0.01389, "grad_norm": 0.7334888360507559, "learning_rate": 0.003, "loss": 4.3541, "step": 1389 }, { "epoch": 0.0139, "grad_norm": 0.6634716599193375, "learning_rate": 0.003, "loss": 4.3533, "step": 1390 }, { "epoch": 0.01391, "grad_norm": 0.5884420459817105, "learning_rate": 0.003, "loss": 4.3483, "step": 1391 }, { "epoch": 0.01392, "grad_norm": 0.536202889592441, "learning_rate": 0.003, "loss": 4.3599, "step": 1392 }, { "epoch": 0.01393, "grad_norm": 0.5041805750347179, "learning_rate": 0.003, "loss": 4.327, "step": 1393 }, { "epoch": 0.01394, "grad_norm": 0.46187226586984687, "learning_rate": 0.003, "loss": 4.3118, "step": 1394 }, { "epoch": 0.01395, "grad_norm": 0.4642311356997612, "learning_rate": 0.003, "loss": 4.343, "step": 1395 }, { "epoch": 0.01396, "grad_norm": 0.49219234547480073, "learning_rate": 0.003, "loss": 4.3333, "step": 1396 }, { "epoch": 0.01397, "grad_norm": 0.535253115362672, "learning_rate": 0.003, "loss": 4.3471, "step": 1397 }, { "epoch": 0.01398, "grad_norm": 0.6052714387106423, "learning_rate": 0.003, "loss": 4.3267, "step": 1398 }, { "epoch": 0.01399, "grad_norm": 0.6277477924110956, "learning_rate": 0.003, "loss": 4.3504, "step": 1399 }, { "epoch": 0.014, "grad_norm": 0.6846592906181149, "learning_rate": 0.003, "loss": 4.3103, "step": 1400 }, { "epoch": 0.01401, "grad_norm": 0.8200984983857136, "learning_rate": 0.003, "loss": 4.3387, "step": 1401 }, { "epoch": 0.01402, "grad_norm": 0.8742108124137586, "learning_rate": 0.003, "loss": 4.3557, "step": 1402 }, { "epoch": 0.01403, "grad_norm": 0.7096844776193044, "learning_rate": 0.003, "loss": 4.3233, "step": 1403 }, { "epoch": 0.01404, "grad_norm": 0.5388679123577896, "learning_rate": 0.003, "loss": 4.3401, "step": 1404 }, { "epoch": 0.01405, "grad_norm": 0.4910540359683292, "learning_rate": 0.003, "loss": 4.3559, "step": 1405 }, { "epoch": 0.01406, "grad_norm": 0.5213447941957257, "learning_rate": 0.003, "loss": 4.3366, "step": 1406 }, { "epoch": 0.01407, "grad_norm": 0.555926299222398, "learning_rate": 0.003, "loss": 4.3521, "step": 1407 }, { "epoch": 0.01408, "grad_norm": 0.692327807092523, "learning_rate": 0.003, "loss": 4.3137, "step": 1408 }, { "epoch": 0.01409, "grad_norm": 0.8697281888439876, "learning_rate": 0.003, "loss": 4.3588, "step": 1409 }, { "epoch": 0.0141, "grad_norm": 0.9566955718451174, "learning_rate": 0.003, "loss": 4.3446, "step": 1410 }, { "epoch": 0.01411, "grad_norm": 0.7215244847418736, "learning_rate": 0.003, "loss": 4.3131, "step": 1411 }, { "epoch": 0.01412, "grad_norm": 0.6315141391735041, "learning_rate": 0.003, "loss": 4.3535, "step": 1412 }, { "epoch": 0.01413, "grad_norm": 0.6841381857718712, "learning_rate": 0.003, "loss": 4.342, "step": 1413 }, { "epoch": 0.01414, "grad_norm": 0.5798611042820683, "learning_rate": 0.003, "loss": 4.3235, "step": 1414 }, { "epoch": 0.01415, "grad_norm": 0.547398429036414, "learning_rate": 0.003, "loss": 4.3157, "step": 1415 }, { "epoch": 0.01416, "grad_norm": 0.532688568072748, "learning_rate": 0.003, "loss": 4.3388, "step": 1416 }, { "epoch": 0.01417, "grad_norm": 0.5474366342351658, "learning_rate": 0.003, "loss": 4.3317, "step": 1417 }, { "epoch": 0.01418, "grad_norm": 0.5499547169322332, "learning_rate": 0.003, "loss": 4.3477, "step": 1418 }, { "epoch": 0.01419, "grad_norm": 0.58527195779541, "learning_rate": 0.003, "loss": 4.3403, "step": 1419 }, { "epoch": 0.0142, "grad_norm": 0.4513568933907034, "learning_rate": 0.003, "loss": 4.3401, "step": 1420 }, { "epoch": 0.01421, "grad_norm": 0.433334289258551, "learning_rate": 0.003, "loss": 4.3072, "step": 1421 }, { "epoch": 0.01422, "grad_norm": 0.39636752625840327, "learning_rate": 0.003, "loss": 4.3095, "step": 1422 }, { "epoch": 0.01423, "grad_norm": 0.38308187149456013, "learning_rate": 0.003, "loss": 4.3141, "step": 1423 }, { "epoch": 0.01424, "grad_norm": 0.3793175762281477, "learning_rate": 0.003, "loss": 4.3274, "step": 1424 }, { "epoch": 0.01425, "grad_norm": 0.4426704955835019, "learning_rate": 0.003, "loss": 4.334, "step": 1425 }, { "epoch": 0.01426, "grad_norm": 0.5006373442027768, "learning_rate": 0.003, "loss": 4.3106, "step": 1426 }, { "epoch": 0.01427, "grad_norm": 0.654459732986379, "learning_rate": 0.003, "loss": 4.3121, "step": 1427 }, { "epoch": 0.01428, "grad_norm": 0.8067469794918197, "learning_rate": 0.003, "loss": 4.3172, "step": 1428 }, { "epoch": 0.01429, "grad_norm": 0.8729241550485854, "learning_rate": 0.003, "loss": 4.3306, "step": 1429 }, { "epoch": 0.0143, "grad_norm": 0.7278485959649278, "learning_rate": 0.003, "loss": 4.3432, "step": 1430 }, { "epoch": 0.01431, "grad_norm": 0.7004237859708793, "learning_rate": 0.003, "loss": 4.3301, "step": 1431 }, { "epoch": 0.01432, "grad_norm": 0.8171598559675952, "learning_rate": 0.003, "loss": 4.3267, "step": 1432 }, { "epoch": 0.01433, "grad_norm": 0.7196947939958487, "learning_rate": 0.003, "loss": 4.323, "step": 1433 }, { "epoch": 0.01434, "grad_norm": 0.6846858190137799, "learning_rate": 0.003, "loss": 4.3048, "step": 1434 }, { "epoch": 0.01435, "grad_norm": 0.8455490886566032, "learning_rate": 0.003, "loss": 4.3242, "step": 1435 }, { "epoch": 0.01436, "grad_norm": 0.83940902394985, "learning_rate": 0.003, "loss": 4.3453, "step": 1436 }, { "epoch": 0.01437, "grad_norm": 0.7328785180307785, "learning_rate": 0.003, "loss": 4.3547, "step": 1437 }, { "epoch": 0.01438, "grad_norm": 0.7369569320140708, "learning_rate": 0.003, "loss": 4.349, "step": 1438 }, { "epoch": 0.01439, "grad_norm": 0.7139149757788127, "learning_rate": 0.003, "loss": 4.3129, "step": 1439 }, { "epoch": 0.0144, "grad_norm": 0.7299011882512658, "learning_rate": 0.003, "loss": 4.3275, "step": 1440 }, { "epoch": 0.01441, "grad_norm": 0.670750314419178, "learning_rate": 0.003, "loss": 4.3309, "step": 1441 }, { "epoch": 0.01442, "grad_norm": 0.7691195459017623, "learning_rate": 0.003, "loss": 4.3219, "step": 1442 }, { "epoch": 0.01443, "grad_norm": 0.7471914299871487, "learning_rate": 0.003, "loss": 4.3829, "step": 1443 }, { "epoch": 0.01444, "grad_norm": 0.7410199276775529, "learning_rate": 0.003, "loss": 4.3445, "step": 1444 }, { "epoch": 0.01445, "grad_norm": 0.6515002318773166, "learning_rate": 0.003, "loss": 4.3041, "step": 1445 }, { "epoch": 0.01446, "grad_norm": 0.6781090959776701, "learning_rate": 0.003, "loss": 4.3207, "step": 1446 }, { "epoch": 0.01447, "grad_norm": 0.6643065072159584, "learning_rate": 0.003, "loss": 4.3293, "step": 1447 }, { "epoch": 0.01448, "grad_norm": 0.6532713033801402, "learning_rate": 0.003, "loss": 4.3375, "step": 1448 }, { "epoch": 0.01449, "grad_norm": 0.6344501403492658, "learning_rate": 0.003, "loss": 4.3302, "step": 1449 }, { "epoch": 0.0145, "grad_norm": 0.5404283744793761, "learning_rate": 0.003, "loss": 4.3401, "step": 1450 }, { "epoch": 0.01451, "grad_norm": 0.5381895515345636, "learning_rate": 0.003, "loss": 4.3181, "step": 1451 }, { "epoch": 0.01452, "grad_norm": 0.5085114651371503, "learning_rate": 0.003, "loss": 4.3176, "step": 1452 }, { "epoch": 0.01453, "grad_norm": 0.5297212710147206, "learning_rate": 0.003, "loss": 4.3302, "step": 1453 }, { "epoch": 0.01454, "grad_norm": 0.6082304279549751, "learning_rate": 0.003, "loss": 4.299, "step": 1454 }, { "epoch": 0.01455, "grad_norm": 0.799761378996593, "learning_rate": 0.003, "loss": 4.3379, "step": 1455 }, { "epoch": 0.01456, "grad_norm": 0.8200199061255905, "learning_rate": 0.003, "loss": 4.3248, "step": 1456 }, { "epoch": 0.01457, "grad_norm": 0.6729328173983412, "learning_rate": 0.003, "loss": 4.3054, "step": 1457 }, { "epoch": 0.01458, "grad_norm": 0.5761787071333488, "learning_rate": 0.003, "loss": 4.2955, "step": 1458 }, { "epoch": 0.01459, "grad_norm": 0.6468019060138231, "learning_rate": 0.003, "loss": 4.3289, "step": 1459 }, { "epoch": 0.0146, "grad_norm": 0.6012651007343053, "learning_rate": 0.003, "loss": 4.3283, "step": 1460 }, { "epoch": 0.01461, "grad_norm": 0.4832305210154182, "learning_rate": 0.003, "loss": 4.3037, "step": 1461 }, { "epoch": 0.01462, "grad_norm": 0.4933402571331549, "learning_rate": 0.003, "loss": 4.3088, "step": 1462 }, { "epoch": 0.01463, "grad_norm": 0.4774020866387011, "learning_rate": 0.003, "loss": 4.3072, "step": 1463 }, { "epoch": 0.01464, "grad_norm": 0.4604171826341042, "learning_rate": 0.003, "loss": 4.2821, "step": 1464 }, { "epoch": 0.01465, "grad_norm": 0.4294089797917102, "learning_rate": 0.003, "loss": 4.3245, "step": 1465 }, { "epoch": 0.01466, "grad_norm": 0.41929661999999984, "learning_rate": 0.003, "loss": 4.2919, "step": 1466 }, { "epoch": 0.01467, "grad_norm": 0.4391785760355703, "learning_rate": 0.003, "loss": 4.3283, "step": 1467 }, { "epoch": 0.01468, "grad_norm": 0.4538846759969807, "learning_rate": 0.003, "loss": 4.3037, "step": 1468 }, { "epoch": 0.01469, "grad_norm": 0.5119886030394242, "learning_rate": 0.003, "loss": 4.3053, "step": 1469 }, { "epoch": 0.0147, "grad_norm": 0.716327286811815, "learning_rate": 0.003, "loss": 4.3041, "step": 1470 }, { "epoch": 0.01471, "grad_norm": 0.9150052503540761, "learning_rate": 0.003, "loss": 4.3274, "step": 1471 }, { "epoch": 0.01472, "grad_norm": 0.7809983181295278, "learning_rate": 0.003, "loss": 4.3143, "step": 1472 }, { "epoch": 0.01473, "grad_norm": 0.8086105929105535, "learning_rate": 0.003, "loss": 4.3289, "step": 1473 }, { "epoch": 0.01474, "grad_norm": 0.8656063557310713, "learning_rate": 0.003, "loss": 4.3258, "step": 1474 }, { "epoch": 0.01475, "grad_norm": 0.8637306499434225, "learning_rate": 0.003, "loss": 4.3398, "step": 1475 }, { "epoch": 0.01476, "grad_norm": 0.6743755524690542, "learning_rate": 0.003, "loss": 4.3227, "step": 1476 }, { "epoch": 0.01477, "grad_norm": 0.6414161546100536, "learning_rate": 0.003, "loss": 4.3256, "step": 1477 }, { "epoch": 0.01478, "grad_norm": 0.7067557912668699, "learning_rate": 0.003, "loss": 4.3113, "step": 1478 }, { "epoch": 0.01479, "grad_norm": 0.7427717638657159, "learning_rate": 0.003, "loss": 4.3499, "step": 1479 }, { "epoch": 0.0148, "grad_norm": 0.7972486147312002, "learning_rate": 0.003, "loss": 4.3212, "step": 1480 }, { "epoch": 0.01481, "grad_norm": 0.8238319142257903, "learning_rate": 0.003, "loss": 4.3261, "step": 1481 }, { "epoch": 0.01482, "grad_norm": 0.7010334962030825, "learning_rate": 0.003, "loss": 4.3089, "step": 1482 }, { "epoch": 0.01483, "grad_norm": 0.6287754589698554, "learning_rate": 0.003, "loss": 4.303, "step": 1483 }, { "epoch": 0.01484, "grad_norm": 0.5953574922658559, "learning_rate": 0.003, "loss": 4.3534, "step": 1484 }, { "epoch": 0.01485, "grad_norm": 0.6859671225358986, "learning_rate": 0.003, "loss": 4.3172, "step": 1485 }, { "epoch": 0.01486, "grad_norm": 0.7393843625917039, "learning_rate": 0.003, "loss": 4.3181, "step": 1486 }, { "epoch": 0.01487, "grad_norm": 0.6948333501030851, "learning_rate": 0.003, "loss": 4.3225, "step": 1487 }, { "epoch": 0.01488, "grad_norm": 0.5806619385051136, "learning_rate": 0.003, "loss": 4.3259, "step": 1488 }, { "epoch": 0.01489, "grad_norm": 0.6342989775695094, "learning_rate": 0.003, "loss": 4.3221, "step": 1489 }, { "epoch": 0.0149, "grad_norm": 0.6704129451979153, "learning_rate": 0.003, "loss": 4.3147, "step": 1490 }, { "epoch": 0.01491, "grad_norm": 0.7017206728292057, "learning_rate": 0.003, "loss": 4.3305, "step": 1491 }, { "epoch": 0.01492, "grad_norm": 0.6813188063148864, "learning_rate": 0.003, "loss": 4.2961, "step": 1492 }, { "epoch": 0.01493, "grad_norm": 0.8525559699799811, "learning_rate": 0.003, "loss": 4.3324, "step": 1493 }, { "epoch": 0.01494, "grad_norm": 0.8687834663259932, "learning_rate": 0.003, "loss": 4.3213, "step": 1494 }, { "epoch": 0.01495, "grad_norm": 0.7274136348295218, "learning_rate": 0.003, "loss": 4.3243, "step": 1495 }, { "epoch": 0.01496, "grad_norm": 0.5434712910501038, "learning_rate": 0.003, "loss": 4.2923, "step": 1496 }, { "epoch": 0.01497, "grad_norm": 0.6148760530691155, "learning_rate": 0.003, "loss": 4.2701, "step": 1497 }, { "epoch": 0.01498, "grad_norm": 0.5665585769325068, "learning_rate": 0.003, "loss": 4.3224, "step": 1498 }, { "epoch": 0.01499, "grad_norm": 0.543044486095261, "learning_rate": 0.003, "loss": 4.3408, "step": 1499 }, { "epoch": 0.015, "grad_norm": 0.5868245294183819, "learning_rate": 0.003, "loss": 4.2952, "step": 1500 }, { "epoch": 0.01501, "grad_norm": 0.6488345831013783, "learning_rate": 0.003, "loss": 4.2943, "step": 1501 }, { "epoch": 0.01502, "grad_norm": 0.5560720951051904, "learning_rate": 0.003, "loss": 4.3322, "step": 1502 }, { "epoch": 0.01503, "grad_norm": 0.6170651368212409, "learning_rate": 0.003, "loss": 4.3142, "step": 1503 }, { "epoch": 0.01504, "grad_norm": 0.7253173696408437, "learning_rate": 0.003, "loss": 4.3199, "step": 1504 }, { "epoch": 0.01505, "grad_norm": 0.7573152828389973, "learning_rate": 0.003, "loss": 4.3067, "step": 1505 }, { "epoch": 0.01506, "grad_norm": 0.7119425101662294, "learning_rate": 0.003, "loss": 4.3233, "step": 1506 }, { "epoch": 0.01507, "grad_norm": 0.7408719494100054, "learning_rate": 0.003, "loss": 4.3394, "step": 1507 }, { "epoch": 0.01508, "grad_norm": 0.6023053184100382, "learning_rate": 0.003, "loss": 4.3108, "step": 1508 }, { "epoch": 0.01509, "grad_norm": 0.5054062616818538, "learning_rate": 0.003, "loss": 4.3257, "step": 1509 }, { "epoch": 0.0151, "grad_norm": 0.4440427822667444, "learning_rate": 0.003, "loss": 4.2952, "step": 1510 }, { "epoch": 0.01511, "grad_norm": 0.503043960784533, "learning_rate": 0.003, "loss": 4.2838, "step": 1511 }, { "epoch": 0.01512, "grad_norm": 0.4478910644887885, "learning_rate": 0.003, "loss": 4.2937, "step": 1512 }, { "epoch": 0.01513, "grad_norm": 0.46313433916934627, "learning_rate": 0.003, "loss": 4.2989, "step": 1513 }, { "epoch": 0.01514, "grad_norm": 0.5237062692888148, "learning_rate": 0.003, "loss": 4.3066, "step": 1514 }, { "epoch": 0.01515, "grad_norm": 0.6686824710181497, "learning_rate": 0.003, "loss": 4.3056, "step": 1515 }, { "epoch": 0.01516, "grad_norm": 0.8162150436928566, "learning_rate": 0.003, "loss": 4.3198, "step": 1516 }, { "epoch": 0.01517, "grad_norm": 0.8250386268904906, "learning_rate": 0.003, "loss": 4.274, "step": 1517 }, { "epoch": 0.01518, "grad_norm": 0.8551846044396628, "learning_rate": 0.003, "loss": 4.3102, "step": 1518 }, { "epoch": 0.01519, "grad_norm": 0.7778994522735176, "learning_rate": 0.003, "loss": 4.3471, "step": 1519 }, { "epoch": 0.0152, "grad_norm": 0.6407333772962789, "learning_rate": 0.003, "loss": 4.2901, "step": 1520 }, { "epoch": 0.01521, "grad_norm": 0.590099483112598, "learning_rate": 0.003, "loss": 4.3053, "step": 1521 }, { "epoch": 0.01522, "grad_norm": 0.6220605799864671, "learning_rate": 0.003, "loss": 4.3014, "step": 1522 }, { "epoch": 0.01523, "grad_norm": 0.6378640000910417, "learning_rate": 0.003, "loss": 4.2957, "step": 1523 }, { "epoch": 0.01524, "grad_norm": 0.6800153761279158, "learning_rate": 0.003, "loss": 4.3332, "step": 1524 }, { "epoch": 0.01525, "grad_norm": 0.6552309843606878, "learning_rate": 0.003, "loss": 4.3306, "step": 1525 }, { "epoch": 0.01526, "grad_norm": 0.5832236683536917, "learning_rate": 0.003, "loss": 4.3154, "step": 1526 }, { "epoch": 0.01527, "grad_norm": 0.5923964968536319, "learning_rate": 0.003, "loss": 4.2868, "step": 1527 }, { "epoch": 0.01528, "grad_norm": 0.6438689697714796, "learning_rate": 0.003, "loss": 4.3067, "step": 1528 }, { "epoch": 0.01529, "grad_norm": 0.7863523458278316, "learning_rate": 0.003, "loss": 4.3071, "step": 1529 }, { "epoch": 0.0153, "grad_norm": 0.8716684688082496, "learning_rate": 0.003, "loss": 4.3236, "step": 1530 }, { "epoch": 0.01531, "grad_norm": 0.7632243352103186, "learning_rate": 0.003, "loss": 4.314, "step": 1531 }, { "epoch": 0.01532, "grad_norm": 0.5830764209700208, "learning_rate": 0.003, "loss": 4.2863, "step": 1532 }, { "epoch": 0.01533, "grad_norm": 0.6925578088506626, "learning_rate": 0.003, "loss": 4.3193, "step": 1533 }, { "epoch": 0.01534, "grad_norm": 0.6884106279584329, "learning_rate": 0.003, "loss": 4.3131, "step": 1534 }, { "epoch": 0.01535, "grad_norm": 0.612086877528724, "learning_rate": 0.003, "loss": 4.3216, "step": 1535 }, { "epoch": 0.01536, "grad_norm": 0.6277711721176384, "learning_rate": 0.003, "loss": 4.3039, "step": 1536 }, { "epoch": 0.01537, "grad_norm": 0.7930101311629847, "learning_rate": 0.003, "loss": 4.3078, "step": 1537 }, { "epoch": 0.01538, "grad_norm": 0.8987557100700295, "learning_rate": 0.003, "loss": 4.2971, "step": 1538 }, { "epoch": 0.01539, "grad_norm": 0.9391352557198052, "learning_rate": 0.003, "loss": 4.334, "step": 1539 }, { "epoch": 0.0154, "grad_norm": 0.9440902063787158, "learning_rate": 0.003, "loss": 4.3487, "step": 1540 }, { "epoch": 0.01541, "grad_norm": 0.9127476278073733, "learning_rate": 0.003, "loss": 4.328, "step": 1541 }, { "epoch": 0.01542, "grad_norm": 0.7753546344208377, "learning_rate": 0.003, "loss": 4.3528, "step": 1542 }, { "epoch": 0.01543, "grad_norm": 0.7317390533493494, "learning_rate": 0.003, "loss": 4.3318, "step": 1543 }, { "epoch": 0.01544, "grad_norm": 0.5946634053593371, "learning_rate": 0.003, "loss": 4.3162, "step": 1544 }, { "epoch": 0.01545, "grad_norm": 0.49808121797063876, "learning_rate": 0.003, "loss": 4.375, "step": 1545 }, { "epoch": 0.01546, "grad_norm": 0.5189277397467036, "learning_rate": 0.003, "loss": 4.3435, "step": 1546 }, { "epoch": 0.01547, "grad_norm": 0.5075409907094708, "learning_rate": 0.003, "loss": 4.3431, "step": 1547 }, { "epoch": 0.01548, "grad_norm": 0.4269782583856954, "learning_rate": 0.003, "loss": 4.3016, "step": 1548 }, { "epoch": 0.01549, "grad_norm": 0.44259739876123444, "learning_rate": 0.003, "loss": 4.2898, "step": 1549 }, { "epoch": 0.0155, "grad_norm": 0.39476104723697186, "learning_rate": 0.003, "loss": 4.3072, "step": 1550 }, { "epoch": 0.01551, "grad_norm": 0.39719568498427116, "learning_rate": 0.003, "loss": 4.2836, "step": 1551 }, { "epoch": 0.01552, "grad_norm": 0.5112856286097273, "learning_rate": 0.003, "loss": 4.3059, "step": 1552 }, { "epoch": 0.01553, "grad_norm": 0.6994822828905559, "learning_rate": 0.003, "loss": 4.3366, "step": 1553 }, { "epoch": 0.01554, "grad_norm": 0.9918562900778292, "learning_rate": 0.003, "loss": 4.3337, "step": 1554 }, { "epoch": 0.01555, "grad_norm": 0.8597749628867734, "learning_rate": 0.003, "loss": 4.3154, "step": 1555 }, { "epoch": 0.01556, "grad_norm": 0.4859845337872918, "learning_rate": 0.003, "loss": 4.3299, "step": 1556 }, { "epoch": 0.01557, "grad_norm": 0.8105266239108289, "learning_rate": 0.003, "loss": 4.317, "step": 1557 }, { "epoch": 0.01558, "grad_norm": 0.6797261746777549, "learning_rate": 0.003, "loss": 4.3032, "step": 1558 }, { "epoch": 0.01559, "grad_norm": 0.5540991472031954, "learning_rate": 0.003, "loss": 4.3349, "step": 1559 }, { "epoch": 0.0156, "grad_norm": 0.5222478848499021, "learning_rate": 0.003, "loss": 4.3207, "step": 1560 }, { "epoch": 0.01561, "grad_norm": 0.4547690370956997, "learning_rate": 0.003, "loss": 4.3006, "step": 1561 }, { "epoch": 0.01562, "grad_norm": 0.3960981969234329, "learning_rate": 0.003, "loss": 4.2916, "step": 1562 }, { "epoch": 0.01563, "grad_norm": 0.4365008440994641, "learning_rate": 0.003, "loss": 4.2998, "step": 1563 }, { "epoch": 0.01564, "grad_norm": 0.4153235041333595, "learning_rate": 0.003, "loss": 4.3237, "step": 1564 }, { "epoch": 0.01565, "grad_norm": 0.4172430370227481, "learning_rate": 0.003, "loss": 4.2984, "step": 1565 }, { "epoch": 0.01566, "grad_norm": 0.4030631404816366, "learning_rate": 0.003, "loss": 4.3153, "step": 1566 }, { "epoch": 0.01567, "grad_norm": 0.4006086575381243, "learning_rate": 0.003, "loss": 4.3082, "step": 1567 }, { "epoch": 0.01568, "grad_norm": 0.4692595605101829, "learning_rate": 0.003, "loss": 4.2921, "step": 1568 }, { "epoch": 0.01569, "grad_norm": 0.5156092105425303, "learning_rate": 0.003, "loss": 4.2975, "step": 1569 }, { "epoch": 0.0157, "grad_norm": 0.5574730520335043, "learning_rate": 0.003, "loss": 4.3166, "step": 1570 }, { "epoch": 0.01571, "grad_norm": 0.7147942584489586, "learning_rate": 0.003, "loss": 4.306, "step": 1571 }, { "epoch": 0.01572, "grad_norm": 0.8176596681794123, "learning_rate": 0.003, "loss": 4.3202, "step": 1572 }, { "epoch": 0.01573, "grad_norm": 0.8859299700656846, "learning_rate": 0.003, "loss": 4.311, "step": 1573 }, { "epoch": 0.01574, "grad_norm": 1.0167563301248608, "learning_rate": 0.003, "loss": 4.3365, "step": 1574 }, { "epoch": 0.01575, "grad_norm": 0.9276915846212054, "learning_rate": 0.003, "loss": 4.3012, "step": 1575 }, { "epoch": 0.01576, "grad_norm": 0.7519027259067711, "learning_rate": 0.003, "loss": 4.317, "step": 1576 }, { "epoch": 0.01577, "grad_norm": 0.5990656751852772, "learning_rate": 0.003, "loss": 4.2743, "step": 1577 }, { "epoch": 0.01578, "grad_norm": 0.652141379980981, "learning_rate": 0.003, "loss": 4.3047, "step": 1578 }, { "epoch": 0.01579, "grad_norm": 0.6130941880481341, "learning_rate": 0.003, "loss": 4.3001, "step": 1579 }, { "epoch": 0.0158, "grad_norm": 0.6566738181349618, "learning_rate": 0.003, "loss": 4.3101, "step": 1580 }, { "epoch": 0.01581, "grad_norm": 0.5602300919728743, "learning_rate": 0.003, "loss": 4.3154, "step": 1581 }, { "epoch": 0.01582, "grad_norm": 0.6614515024819504, "learning_rate": 0.003, "loss": 4.3018, "step": 1582 }, { "epoch": 0.01583, "grad_norm": 0.6685116238668469, "learning_rate": 0.003, "loss": 4.2884, "step": 1583 }, { "epoch": 0.01584, "grad_norm": 0.7663983884039552, "learning_rate": 0.003, "loss": 4.3227, "step": 1584 }, { "epoch": 0.01585, "grad_norm": 0.8471607724061854, "learning_rate": 0.003, "loss": 4.3183, "step": 1585 }, { "epoch": 0.01586, "grad_norm": 0.8029205674223586, "learning_rate": 0.003, "loss": 4.2999, "step": 1586 }, { "epoch": 0.01587, "grad_norm": 0.7335558361580696, "learning_rate": 0.003, "loss": 4.3092, "step": 1587 }, { "epoch": 0.01588, "grad_norm": 0.6576426322376321, "learning_rate": 0.003, "loss": 4.3341, "step": 1588 }, { "epoch": 0.01589, "grad_norm": 0.7314604361721292, "learning_rate": 0.003, "loss": 4.328, "step": 1589 }, { "epoch": 0.0159, "grad_norm": 0.7484813941023133, "learning_rate": 0.003, "loss": 4.323, "step": 1590 }, { "epoch": 0.01591, "grad_norm": 0.6836450786052791, "learning_rate": 0.003, "loss": 4.3264, "step": 1591 }, { "epoch": 0.01592, "grad_norm": 0.6522043667699231, "learning_rate": 0.003, "loss": 4.3127, "step": 1592 }, { "epoch": 0.01593, "grad_norm": 0.6311012555499182, "learning_rate": 0.003, "loss": 4.3174, "step": 1593 }, { "epoch": 0.01594, "grad_norm": 0.5510623120073839, "learning_rate": 0.003, "loss": 4.3009, "step": 1594 }, { "epoch": 0.01595, "grad_norm": 0.48793948155833994, "learning_rate": 0.003, "loss": 4.3248, "step": 1595 }, { "epoch": 0.01596, "grad_norm": 0.42717068818596066, "learning_rate": 0.003, "loss": 4.3142, "step": 1596 }, { "epoch": 0.01597, "grad_norm": 0.43677376148637953, "learning_rate": 0.003, "loss": 4.2773, "step": 1597 }, { "epoch": 0.01598, "grad_norm": 0.47981748858846524, "learning_rate": 0.003, "loss": 4.2706, "step": 1598 }, { "epoch": 0.01599, "grad_norm": 0.5068183957178387, "learning_rate": 0.003, "loss": 4.2944, "step": 1599 }, { "epoch": 0.016, "grad_norm": 0.570411039839509, "learning_rate": 0.003, "loss": 4.261, "step": 1600 }, { "epoch": 0.01601, "grad_norm": 0.7428825863420937, "learning_rate": 0.003, "loss": 4.3164, "step": 1601 }, { "epoch": 0.01602, "grad_norm": 1.0316614845943484, "learning_rate": 0.003, "loss": 4.3056, "step": 1602 }, { "epoch": 0.01603, "grad_norm": 0.9265092099146023, "learning_rate": 0.003, "loss": 4.3273, "step": 1603 }, { "epoch": 0.01604, "grad_norm": 0.6714138671323193, "learning_rate": 0.003, "loss": 4.3099, "step": 1604 }, { "epoch": 0.01605, "grad_norm": 0.7449186454711473, "learning_rate": 0.003, "loss": 4.2937, "step": 1605 }, { "epoch": 0.01606, "grad_norm": 0.6551508425933098, "learning_rate": 0.003, "loss": 4.29, "step": 1606 }, { "epoch": 0.01607, "grad_norm": 0.6368386814210635, "learning_rate": 0.003, "loss": 4.3196, "step": 1607 }, { "epoch": 0.01608, "grad_norm": 0.6007545339051799, "learning_rate": 0.003, "loss": 4.287, "step": 1608 }, { "epoch": 0.01609, "grad_norm": 0.5685481620694516, "learning_rate": 0.003, "loss": 4.2973, "step": 1609 }, { "epoch": 0.0161, "grad_norm": 0.5084184586189073, "learning_rate": 0.003, "loss": 4.2592, "step": 1610 }, { "epoch": 0.01611, "grad_norm": 0.4840833495127669, "learning_rate": 0.003, "loss": 4.2802, "step": 1611 }, { "epoch": 0.01612, "grad_norm": 0.48942028074279065, "learning_rate": 0.003, "loss": 4.2702, "step": 1612 }, { "epoch": 0.01613, "grad_norm": 0.5011285237133881, "learning_rate": 0.003, "loss": 4.315, "step": 1613 }, { "epoch": 0.01614, "grad_norm": 0.43995362094592044, "learning_rate": 0.003, "loss": 4.3364, "step": 1614 }, { "epoch": 0.01615, "grad_norm": 0.42434808753845177, "learning_rate": 0.003, "loss": 4.3023, "step": 1615 }, { "epoch": 0.01616, "grad_norm": 0.40673555464346545, "learning_rate": 0.003, "loss": 4.2826, "step": 1616 }, { "epoch": 0.01617, "grad_norm": 0.3578337742407294, "learning_rate": 0.003, "loss": 4.2706, "step": 1617 }, { "epoch": 0.01618, "grad_norm": 0.41081917922167766, "learning_rate": 0.003, "loss": 4.2753, "step": 1618 }, { "epoch": 0.01619, "grad_norm": 0.47886760935645833, "learning_rate": 0.003, "loss": 4.2659, "step": 1619 }, { "epoch": 0.0162, "grad_norm": 0.6222321239813, "learning_rate": 0.003, "loss": 4.2767, "step": 1620 }, { "epoch": 0.01621, "grad_norm": 0.6903491585345826, "learning_rate": 0.003, "loss": 4.2786, "step": 1621 }, { "epoch": 0.01622, "grad_norm": 0.7573622136296652, "learning_rate": 0.003, "loss": 4.3118, "step": 1622 }, { "epoch": 0.01623, "grad_norm": 0.9256995453444036, "learning_rate": 0.003, "loss": 4.3026, "step": 1623 }, { "epoch": 0.01624, "grad_norm": 0.9558305805696413, "learning_rate": 0.003, "loss": 4.3216, "step": 1624 }, { "epoch": 0.01625, "grad_norm": 0.8114385487348762, "learning_rate": 0.003, "loss": 4.2898, "step": 1625 }, { "epoch": 0.01626, "grad_norm": 0.8322826717160557, "learning_rate": 0.003, "loss": 4.3177, "step": 1626 }, { "epoch": 0.01627, "grad_norm": 0.7727713367753223, "learning_rate": 0.003, "loss": 4.2816, "step": 1627 }, { "epoch": 0.01628, "grad_norm": 0.7109224896248458, "learning_rate": 0.003, "loss": 4.2929, "step": 1628 }, { "epoch": 0.01629, "grad_norm": 0.560897124973334, "learning_rate": 0.003, "loss": 4.2858, "step": 1629 }, { "epoch": 0.0163, "grad_norm": 0.6779380557060044, "learning_rate": 0.003, "loss": 4.2826, "step": 1630 }, { "epoch": 0.01631, "grad_norm": 0.6886638086492523, "learning_rate": 0.003, "loss": 4.3011, "step": 1631 }, { "epoch": 0.01632, "grad_norm": 0.6589349584439033, "learning_rate": 0.003, "loss": 4.3124, "step": 1632 }, { "epoch": 0.01633, "grad_norm": 0.7155276302347654, "learning_rate": 0.003, "loss": 4.2893, "step": 1633 }, { "epoch": 0.01634, "grad_norm": 0.7783909364325312, "learning_rate": 0.003, "loss": 4.3094, "step": 1634 }, { "epoch": 0.01635, "grad_norm": 0.8288192042951168, "learning_rate": 0.003, "loss": 4.2952, "step": 1635 }, { "epoch": 0.01636, "grad_norm": 0.8309682765627266, "learning_rate": 0.003, "loss": 4.3157, "step": 1636 }, { "epoch": 0.01637, "grad_norm": 0.7885672766922769, "learning_rate": 0.003, "loss": 4.2985, "step": 1637 }, { "epoch": 0.01638, "grad_norm": 0.6820787883112478, "learning_rate": 0.003, "loss": 4.3026, "step": 1638 }, { "epoch": 0.01639, "grad_norm": 0.5912531628997083, "learning_rate": 0.003, "loss": 4.3221, "step": 1639 }, { "epoch": 0.0164, "grad_norm": 0.7081315099543188, "learning_rate": 0.003, "loss": 4.3062, "step": 1640 }, { "epoch": 0.01641, "grad_norm": 0.7390837101488571, "learning_rate": 0.003, "loss": 4.279, "step": 1641 }, { "epoch": 0.01642, "grad_norm": 0.8019499352702314, "learning_rate": 0.003, "loss": 4.3244, "step": 1642 }, { "epoch": 0.01643, "grad_norm": 0.7922382978586373, "learning_rate": 0.003, "loss": 4.3126, "step": 1643 }, { "epoch": 0.01644, "grad_norm": 0.7260441686104165, "learning_rate": 0.003, "loss": 4.3282, "step": 1644 }, { "epoch": 0.01645, "grad_norm": 0.5765166219346322, "learning_rate": 0.003, "loss": 4.2686, "step": 1645 }, { "epoch": 0.01646, "grad_norm": 0.4846869130781467, "learning_rate": 0.003, "loss": 4.298, "step": 1646 }, { "epoch": 0.01647, "grad_norm": 0.4324516045425632, "learning_rate": 0.003, "loss": 4.294, "step": 1647 }, { "epoch": 0.01648, "grad_norm": 0.45801960619516824, "learning_rate": 0.003, "loss": 4.2725, "step": 1648 }, { "epoch": 0.01649, "grad_norm": 0.4519054523297515, "learning_rate": 0.003, "loss": 4.3034, "step": 1649 }, { "epoch": 0.0165, "grad_norm": 0.5030069407293299, "learning_rate": 0.003, "loss": 4.2846, "step": 1650 }, { "epoch": 0.01651, "grad_norm": 0.7338143580585036, "learning_rate": 0.003, "loss": 4.2988, "step": 1651 }, { "epoch": 0.01652, "grad_norm": 0.9179472565177694, "learning_rate": 0.003, "loss": 4.2688, "step": 1652 }, { "epoch": 0.01653, "grad_norm": 0.7974402781604697, "learning_rate": 0.003, "loss": 4.307, "step": 1653 }, { "epoch": 0.01654, "grad_norm": 0.8094858670947136, "learning_rate": 0.003, "loss": 4.2972, "step": 1654 }, { "epoch": 0.01655, "grad_norm": 0.8204525750124729, "learning_rate": 0.003, "loss": 4.313, "step": 1655 }, { "epoch": 0.01656, "grad_norm": 0.6293302149986666, "learning_rate": 0.003, "loss": 4.271, "step": 1656 }, { "epoch": 0.01657, "grad_norm": 0.5706278835378605, "learning_rate": 0.003, "loss": 4.293, "step": 1657 }, { "epoch": 0.01658, "grad_norm": 0.6110254736121277, "learning_rate": 0.003, "loss": 4.2809, "step": 1658 }, { "epoch": 0.01659, "grad_norm": 0.4984603935459828, "learning_rate": 0.003, "loss": 4.2824, "step": 1659 }, { "epoch": 0.0166, "grad_norm": 0.46804303085612786, "learning_rate": 0.003, "loss": 4.2649, "step": 1660 }, { "epoch": 0.01661, "grad_norm": 0.48879681023532395, "learning_rate": 0.003, "loss": 4.2671, "step": 1661 }, { "epoch": 0.01662, "grad_norm": 0.5571765680768687, "learning_rate": 0.003, "loss": 4.2719, "step": 1662 }, { "epoch": 0.01663, "grad_norm": 0.532882366154707, "learning_rate": 0.003, "loss": 4.294, "step": 1663 }, { "epoch": 0.01664, "grad_norm": 0.5088338427561463, "learning_rate": 0.003, "loss": 4.3094, "step": 1664 }, { "epoch": 0.01665, "grad_norm": 0.4963301596331138, "learning_rate": 0.003, "loss": 4.2712, "step": 1665 }, { "epoch": 0.01666, "grad_norm": 0.5472162068898109, "learning_rate": 0.003, "loss": 4.2732, "step": 1666 }, { "epoch": 0.01667, "grad_norm": 0.6863444215859392, "learning_rate": 0.003, "loss": 4.2951, "step": 1667 }, { "epoch": 0.01668, "grad_norm": 0.7945747889757943, "learning_rate": 0.003, "loss": 4.3019, "step": 1668 }, { "epoch": 0.01669, "grad_norm": 0.7029756676891371, "learning_rate": 0.003, "loss": 4.2868, "step": 1669 }, { "epoch": 0.0167, "grad_norm": 0.564779057565769, "learning_rate": 0.003, "loss": 4.2701, "step": 1670 }, { "epoch": 0.01671, "grad_norm": 0.5946383928046679, "learning_rate": 0.003, "loss": 4.2602, "step": 1671 }, { "epoch": 0.01672, "grad_norm": 0.7328231067257597, "learning_rate": 0.003, "loss": 4.2779, "step": 1672 }, { "epoch": 0.01673, "grad_norm": 0.7730808965686631, "learning_rate": 0.003, "loss": 4.2759, "step": 1673 }, { "epoch": 0.01674, "grad_norm": 0.8250670120229329, "learning_rate": 0.003, "loss": 4.3013, "step": 1674 }, { "epoch": 0.01675, "grad_norm": 0.7145691985141881, "learning_rate": 0.003, "loss": 4.2806, "step": 1675 }, { "epoch": 0.01676, "grad_norm": 0.6268084931910631, "learning_rate": 0.003, "loss": 4.2872, "step": 1676 }, { "epoch": 0.01677, "grad_norm": 0.6170941899124056, "learning_rate": 0.003, "loss": 4.2772, "step": 1677 }, { "epoch": 0.01678, "grad_norm": 0.5951034196601164, "learning_rate": 0.003, "loss": 4.2879, "step": 1678 }, { "epoch": 0.01679, "grad_norm": 0.5254502211849061, "learning_rate": 0.003, "loss": 4.2679, "step": 1679 }, { "epoch": 0.0168, "grad_norm": 0.6096842133031645, "learning_rate": 0.003, "loss": 4.2892, "step": 1680 }, { "epoch": 0.01681, "grad_norm": 0.6218177000062469, "learning_rate": 0.003, "loss": 4.2892, "step": 1681 }, { "epoch": 0.01682, "grad_norm": 0.5832829705689185, "learning_rate": 0.003, "loss": 4.2817, "step": 1682 }, { "epoch": 0.01683, "grad_norm": 0.5910768318994952, "learning_rate": 0.003, "loss": 4.2959, "step": 1683 }, { "epoch": 0.01684, "grad_norm": 0.6050344929193974, "learning_rate": 0.003, "loss": 4.2707, "step": 1684 }, { "epoch": 0.01685, "grad_norm": 0.6382986956510986, "learning_rate": 0.003, "loss": 4.2898, "step": 1685 }, { "epoch": 0.01686, "grad_norm": 0.8077022270811443, "learning_rate": 0.003, "loss": 4.2942, "step": 1686 }, { "epoch": 0.01687, "grad_norm": 0.8169553039156104, "learning_rate": 0.003, "loss": 4.2925, "step": 1687 }, { "epoch": 0.01688, "grad_norm": 0.8368087154638648, "learning_rate": 0.003, "loss": 4.2843, "step": 1688 }, { "epoch": 0.01689, "grad_norm": 0.869445348031783, "learning_rate": 0.003, "loss": 4.3002, "step": 1689 }, { "epoch": 0.0169, "grad_norm": 0.9062293741017381, "learning_rate": 0.003, "loss": 4.2906, "step": 1690 }, { "epoch": 0.01691, "grad_norm": 0.8487759964259872, "learning_rate": 0.003, "loss": 4.3075, "step": 1691 }, { "epoch": 0.01692, "grad_norm": 0.8483641489848167, "learning_rate": 0.003, "loss": 4.3088, "step": 1692 }, { "epoch": 0.01693, "grad_norm": 0.8279189520542338, "learning_rate": 0.003, "loss": 4.3369, "step": 1693 }, { "epoch": 0.01694, "grad_norm": 0.8089873138093994, "learning_rate": 0.003, "loss": 4.2887, "step": 1694 }, { "epoch": 0.01695, "grad_norm": 0.9455670720071184, "learning_rate": 0.003, "loss": 4.3248, "step": 1695 }, { "epoch": 0.01696, "grad_norm": 1.064016692244757, "learning_rate": 0.003, "loss": 4.3315, "step": 1696 }, { "epoch": 0.01697, "grad_norm": 0.9264587036694925, "learning_rate": 0.003, "loss": 4.302, "step": 1697 }, { "epoch": 0.01698, "grad_norm": 0.6997503897611733, "learning_rate": 0.003, "loss": 4.289, "step": 1698 }, { "epoch": 0.01699, "grad_norm": 0.6394457308265625, "learning_rate": 0.003, "loss": 4.2969, "step": 1699 }, { "epoch": 0.017, "grad_norm": 0.5891910529153762, "learning_rate": 0.003, "loss": 4.3094, "step": 1700 }, { "epoch": 0.01701, "grad_norm": 0.6435550929707158, "learning_rate": 0.003, "loss": 4.2939, "step": 1701 }, { "epoch": 0.01702, "grad_norm": 0.6810306253220882, "learning_rate": 0.003, "loss": 4.3163, "step": 1702 }, { "epoch": 0.01703, "grad_norm": 0.6849363850272787, "learning_rate": 0.003, "loss": 4.3034, "step": 1703 }, { "epoch": 0.01704, "grad_norm": 0.6708561417744424, "learning_rate": 0.003, "loss": 4.2869, "step": 1704 }, { "epoch": 0.01705, "grad_norm": 0.5968699215981799, "learning_rate": 0.003, "loss": 4.2879, "step": 1705 }, { "epoch": 0.01706, "grad_norm": 0.505702133399309, "learning_rate": 0.003, "loss": 4.2792, "step": 1706 }, { "epoch": 0.01707, "grad_norm": 0.5302615087387699, "learning_rate": 0.003, "loss": 4.3011, "step": 1707 }, { "epoch": 0.01708, "grad_norm": 0.5210003345198112, "learning_rate": 0.003, "loss": 4.2762, "step": 1708 }, { "epoch": 0.01709, "grad_norm": 0.46584995061994, "learning_rate": 0.003, "loss": 4.2953, "step": 1709 }, { "epoch": 0.0171, "grad_norm": 0.42345394468243425, "learning_rate": 0.003, "loss": 4.2414, "step": 1710 }, { "epoch": 0.01711, "grad_norm": 0.43754438467909607, "learning_rate": 0.003, "loss": 4.2717, "step": 1711 }, { "epoch": 0.01712, "grad_norm": 0.41267034274673536, "learning_rate": 0.003, "loss": 4.2801, "step": 1712 }, { "epoch": 0.01713, "grad_norm": 0.3568885732041511, "learning_rate": 0.003, "loss": 4.2523, "step": 1713 }, { "epoch": 0.01714, "grad_norm": 0.3457637645495226, "learning_rate": 0.003, "loss": 4.2876, "step": 1714 }, { "epoch": 0.01715, "grad_norm": 0.3722023563902025, "learning_rate": 0.003, "loss": 4.2958, "step": 1715 }, { "epoch": 0.01716, "grad_norm": 0.3390050605620623, "learning_rate": 0.003, "loss": 4.2697, "step": 1716 }, { "epoch": 0.01717, "grad_norm": 0.4093688383515523, "learning_rate": 0.003, "loss": 4.3028, "step": 1717 }, { "epoch": 0.01718, "grad_norm": 0.5445666194239754, "learning_rate": 0.003, "loss": 4.2735, "step": 1718 }, { "epoch": 0.01719, "grad_norm": 0.8818554274614839, "learning_rate": 0.003, "loss": 4.3037, "step": 1719 }, { "epoch": 0.0172, "grad_norm": 1.123998698583038, "learning_rate": 0.003, "loss": 4.2879, "step": 1720 }, { "epoch": 0.01721, "grad_norm": 0.6518188394707584, "learning_rate": 0.003, "loss": 4.2815, "step": 1721 }, { "epoch": 0.01722, "grad_norm": 0.7473961890284897, "learning_rate": 0.003, "loss": 4.281, "step": 1722 }, { "epoch": 0.01723, "grad_norm": 0.7241082541176558, "learning_rate": 0.003, "loss": 4.2942, "step": 1723 }, { "epoch": 0.01724, "grad_norm": 0.5406243852034243, "learning_rate": 0.003, "loss": 4.2839, "step": 1724 }, { "epoch": 0.01725, "grad_norm": 0.7661047717663526, "learning_rate": 0.003, "loss": 4.2758, "step": 1725 }, { "epoch": 0.01726, "grad_norm": 0.6519733535814134, "learning_rate": 0.003, "loss": 4.2678, "step": 1726 }, { "epoch": 0.01727, "grad_norm": 0.5095447102684948, "learning_rate": 0.003, "loss": 4.2681, "step": 1727 }, { "epoch": 0.01728, "grad_norm": 0.5716779713985857, "learning_rate": 0.003, "loss": 4.2703, "step": 1728 }, { "epoch": 0.01729, "grad_norm": 0.5506488662519434, "learning_rate": 0.003, "loss": 4.2777, "step": 1729 }, { "epoch": 0.0173, "grad_norm": 0.4864830783539529, "learning_rate": 0.003, "loss": 4.2498, "step": 1730 }, { "epoch": 0.01731, "grad_norm": 0.49289174915320866, "learning_rate": 0.003, "loss": 4.2691, "step": 1731 }, { "epoch": 0.01732, "grad_norm": 0.5258408981010323, "learning_rate": 0.003, "loss": 4.2782, "step": 1732 }, { "epoch": 0.01733, "grad_norm": 0.5689089752248919, "learning_rate": 0.003, "loss": 4.2578, "step": 1733 }, { "epoch": 0.01734, "grad_norm": 0.5515499021599949, "learning_rate": 0.003, "loss": 4.2704, "step": 1734 }, { "epoch": 0.01735, "grad_norm": 0.5101191368780025, "learning_rate": 0.003, "loss": 4.2394, "step": 1735 }, { "epoch": 0.01736, "grad_norm": 0.5538259418110674, "learning_rate": 0.003, "loss": 4.2405, "step": 1736 }, { "epoch": 0.01737, "grad_norm": 0.6368154932756357, "learning_rate": 0.003, "loss": 4.273, "step": 1737 }, { "epoch": 0.01738, "grad_norm": 0.6302527957263243, "learning_rate": 0.003, "loss": 4.2633, "step": 1738 }, { "epoch": 0.01739, "grad_norm": 0.5659864529447296, "learning_rate": 0.003, "loss": 4.2757, "step": 1739 }, { "epoch": 0.0174, "grad_norm": 0.6034857544005698, "learning_rate": 0.003, "loss": 4.278, "step": 1740 }, { "epoch": 0.01741, "grad_norm": 0.7023281656557057, "learning_rate": 0.003, "loss": 4.2883, "step": 1741 }, { "epoch": 0.01742, "grad_norm": 0.7127714842932543, "learning_rate": 0.003, "loss": 4.2766, "step": 1742 }, { "epoch": 0.01743, "grad_norm": 0.7178940660267726, "learning_rate": 0.003, "loss": 4.2889, "step": 1743 }, { "epoch": 0.01744, "grad_norm": 0.770440209657255, "learning_rate": 0.003, "loss": 4.2773, "step": 1744 }, { "epoch": 0.01745, "grad_norm": 0.7362175602563142, "learning_rate": 0.003, "loss": 4.2484, "step": 1745 }, { "epoch": 0.01746, "grad_norm": 0.7588375597390518, "learning_rate": 0.003, "loss": 4.2719, "step": 1746 }, { "epoch": 0.01747, "grad_norm": 0.6839956521205998, "learning_rate": 0.003, "loss": 4.267, "step": 1747 }, { "epoch": 0.01748, "grad_norm": 0.6088836411369196, "learning_rate": 0.003, "loss": 4.2555, "step": 1748 }, { "epoch": 0.01749, "grad_norm": 0.548971540706334, "learning_rate": 0.003, "loss": 4.2516, "step": 1749 }, { "epoch": 0.0175, "grad_norm": 0.5440778124239496, "learning_rate": 0.003, "loss": 4.2665, "step": 1750 }, { "epoch": 0.01751, "grad_norm": 0.5918941393242653, "learning_rate": 0.003, "loss": 4.2593, "step": 1751 }, { "epoch": 0.01752, "grad_norm": 0.6455620934648258, "learning_rate": 0.003, "loss": 4.3014, "step": 1752 }, { "epoch": 0.01753, "grad_norm": 0.6781800073296501, "learning_rate": 0.003, "loss": 4.2535, "step": 1753 }, { "epoch": 0.01754, "grad_norm": 0.6882980076107725, "learning_rate": 0.003, "loss": 4.2853, "step": 1754 }, { "epoch": 0.01755, "grad_norm": 0.7961632146248975, "learning_rate": 0.003, "loss": 4.233, "step": 1755 }, { "epoch": 0.01756, "grad_norm": 0.8494780429534167, "learning_rate": 0.003, "loss": 4.314, "step": 1756 }, { "epoch": 0.01757, "grad_norm": 0.9294824362906122, "learning_rate": 0.003, "loss": 4.2987, "step": 1757 }, { "epoch": 0.01758, "grad_norm": 0.852175947314296, "learning_rate": 0.003, "loss": 4.3126, "step": 1758 }, { "epoch": 0.01759, "grad_norm": 0.7347749740307682, "learning_rate": 0.003, "loss": 4.2719, "step": 1759 }, { "epoch": 0.0176, "grad_norm": 0.832142747460363, "learning_rate": 0.003, "loss": 4.2994, "step": 1760 }, { "epoch": 0.01761, "grad_norm": 0.8686282951443787, "learning_rate": 0.003, "loss": 4.2965, "step": 1761 }, { "epoch": 0.01762, "grad_norm": 0.9408046252281581, "learning_rate": 0.003, "loss": 4.3172, "step": 1762 }, { "epoch": 0.01763, "grad_norm": 0.8594428065536988, "learning_rate": 0.003, "loss": 4.3092, "step": 1763 }, { "epoch": 0.01764, "grad_norm": 0.7749341487974934, "learning_rate": 0.003, "loss": 4.2754, "step": 1764 }, { "epoch": 0.01765, "grad_norm": 0.7897730382355919, "learning_rate": 0.003, "loss": 4.3076, "step": 1765 }, { "epoch": 0.01766, "grad_norm": 0.7174878978412247, "learning_rate": 0.003, "loss": 4.317, "step": 1766 }, { "epoch": 0.01767, "grad_norm": 0.7171956437495152, "learning_rate": 0.003, "loss": 4.2757, "step": 1767 }, { "epoch": 0.01768, "grad_norm": 0.8077487259776668, "learning_rate": 0.003, "loss": 4.2866, "step": 1768 }, { "epoch": 0.01769, "grad_norm": 0.7100605985326003, "learning_rate": 0.003, "loss": 4.2811, "step": 1769 }, { "epoch": 0.0177, "grad_norm": 0.578964917508603, "learning_rate": 0.003, "loss": 4.2635, "step": 1770 }, { "epoch": 0.01771, "grad_norm": 0.6086271828330341, "learning_rate": 0.003, "loss": 4.2929, "step": 1771 }, { "epoch": 0.01772, "grad_norm": 0.497546957954685, "learning_rate": 0.003, "loss": 4.262, "step": 1772 }, { "epoch": 0.01773, "grad_norm": 0.490787842661778, "learning_rate": 0.003, "loss": 4.2539, "step": 1773 }, { "epoch": 0.01774, "grad_norm": 0.40528808868462435, "learning_rate": 0.003, "loss": 4.272, "step": 1774 }, { "epoch": 0.01775, "grad_norm": 0.3996997018074339, "learning_rate": 0.003, "loss": 4.2574, "step": 1775 }, { "epoch": 0.01776, "grad_norm": 0.40093115740510726, "learning_rate": 0.003, "loss": 4.2491, "step": 1776 }, { "epoch": 0.01777, "grad_norm": 0.4577551703461772, "learning_rate": 0.003, "loss": 4.27, "step": 1777 }, { "epoch": 0.01778, "grad_norm": 0.4894775985846586, "learning_rate": 0.003, "loss": 4.262, "step": 1778 }, { "epoch": 0.01779, "grad_norm": 0.619715296261353, "learning_rate": 0.003, "loss": 4.2821, "step": 1779 }, { "epoch": 0.0178, "grad_norm": 0.8092821046637155, "learning_rate": 0.003, "loss": 4.2576, "step": 1780 }, { "epoch": 0.01781, "grad_norm": 1.007368746628607, "learning_rate": 0.003, "loss": 4.2833, "step": 1781 }, { "epoch": 0.01782, "grad_norm": 1.0605028910459124, "learning_rate": 0.003, "loss": 4.314, "step": 1782 }, { "epoch": 0.01783, "grad_norm": 0.8361916169047299, "learning_rate": 0.003, "loss": 4.2912, "step": 1783 }, { "epoch": 0.01784, "grad_norm": 0.9950086095977337, "learning_rate": 0.003, "loss": 4.2963, "step": 1784 }, { "epoch": 0.01785, "grad_norm": 1.0228877834258285, "learning_rate": 0.003, "loss": 4.2673, "step": 1785 }, { "epoch": 0.01786, "grad_norm": 0.8766088407747356, "learning_rate": 0.003, "loss": 4.2899, "step": 1786 }, { "epoch": 0.01787, "grad_norm": 0.9346024274483619, "learning_rate": 0.003, "loss": 4.2991, "step": 1787 }, { "epoch": 0.01788, "grad_norm": 0.8655394124739175, "learning_rate": 0.003, "loss": 4.3097, "step": 1788 }, { "epoch": 0.01789, "grad_norm": 0.7320722132470949, "learning_rate": 0.003, "loss": 4.2742, "step": 1789 }, { "epoch": 0.0179, "grad_norm": 0.6043898276387406, "learning_rate": 0.003, "loss": 4.288, "step": 1790 }, { "epoch": 0.01791, "grad_norm": 0.551731430114501, "learning_rate": 0.003, "loss": 4.2984, "step": 1791 }, { "epoch": 0.01792, "grad_norm": 0.4116411374695503, "learning_rate": 0.003, "loss": 4.2944, "step": 1792 }, { "epoch": 0.01793, "grad_norm": 0.4368353784765224, "learning_rate": 0.003, "loss": 4.2574, "step": 1793 }, { "epoch": 0.01794, "grad_norm": 0.4134683249639472, "learning_rate": 0.003, "loss": 4.2696, "step": 1794 }, { "epoch": 0.01795, "grad_norm": 0.3898886643379656, "learning_rate": 0.003, "loss": 4.2661, "step": 1795 }, { "epoch": 0.01796, "grad_norm": 0.3690232703108766, "learning_rate": 0.003, "loss": 4.2797, "step": 1796 }, { "epoch": 0.01797, "grad_norm": 0.3352292075992397, "learning_rate": 0.003, "loss": 4.2421, "step": 1797 }, { "epoch": 0.01798, "grad_norm": 0.3577790623133374, "learning_rate": 0.003, "loss": 4.2547, "step": 1798 }, { "epoch": 0.01799, "grad_norm": 0.40125449243674877, "learning_rate": 0.003, "loss": 4.269, "step": 1799 }, { "epoch": 0.018, "grad_norm": 0.4677938601833102, "learning_rate": 0.003, "loss": 4.26, "step": 1800 }, { "epoch": 0.01801, "grad_norm": 0.5610552208097703, "learning_rate": 0.003, "loss": 4.2554, "step": 1801 }, { "epoch": 0.01802, "grad_norm": 0.5864236478232465, "learning_rate": 0.003, "loss": 4.2736, "step": 1802 }, { "epoch": 0.01803, "grad_norm": 0.540429672022444, "learning_rate": 0.003, "loss": 4.2776, "step": 1803 }, { "epoch": 0.01804, "grad_norm": 0.60459180760597, "learning_rate": 0.003, "loss": 4.2503, "step": 1804 }, { "epoch": 0.01805, "grad_norm": 0.6945199909530504, "learning_rate": 0.003, "loss": 4.2727, "step": 1805 }, { "epoch": 0.01806, "grad_norm": 0.6888751318415884, "learning_rate": 0.003, "loss": 4.2338, "step": 1806 }, { "epoch": 0.01807, "grad_norm": 0.652641222784165, "learning_rate": 0.003, "loss": 4.2674, "step": 1807 }, { "epoch": 0.01808, "grad_norm": 0.7163920878946057, "learning_rate": 0.003, "loss": 4.2613, "step": 1808 }, { "epoch": 0.01809, "grad_norm": 0.7231527059402414, "learning_rate": 0.003, "loss": 4.284, "step": 1809 }, { "epoch": 0.0181, "grad_norm": 0.7979758654578836, "learning_rate": 0.003, "loss": 4.2839, "step": 1810 }, { "epoch": 0.01811, "grad_norm": 0.7630566976684018, "learning_rate": 0.003, "loss": 4.2692, "step": 1811 }, { "epoch": 0.01812, "grad_norm": 0.68601985737994, "learning_rate": 0.003, "loss": 4.2545, "step": 1812 }, { "epoch": 0.01813, "grad_norm": 0.601926611394952, "learning_rate": 0.003, "loss": 4.2949, "step": 1813 }, { "epoch": 0.01814, "grad_norm": 0.66852657571521, "learning_rate": 0.003, "loss": 4.2647, "step": 1814 }, { "epoch": 0.01815, "grad_norm": 0.6050905896281803, "learning_rate": 0.003, "loss": 4.2474, "step": 1815 }, { "epoch": 0.01816, "grad_norm": 0.531953848915138, "learning_rate": 0.003, "loss": 4.2731, "step": 1816 }, { "epoch": 0.01817, "grad_norm": 0.6652005043351588, "learning_rate": 0.003, "loss": 4.2676, "step": 1817 }, { "epoch": 0.01818, "grad_norm": 0.6653492631522442, "learning_rate": 0.003, "loss": 4.2669, "step": 1818 }, { "epoch": 0.01819, "grad_norm": 0.6430139858317634, "learning_rate": 0.003, "loss": 4.2522, "step": 1819 }, { "epoch": 0.0182, "grad_norm": 0.7307512465670702, "learning_rate": 0.003, "loss": 4.2731, "step": 1820 }, { "epoch": 0.01821, "grad_norm": 0.7117919312146421, "learning_rate": 0.003, "loss": 4.2608, "step": 1821 }, { "epoch": 0.01822, "grad_norm": 0.6849992334976018, "learning_rate": 0.003, "loss": 4.2737, "step": 1822 }, { "epoch": 0.01823, "grad_norm": 0.6437329965546797, "learning_rate": 0.003, "loss": 4.2861, "step": 1823 }, { "epoch": 0.01824, "grad_norm": 0.6298055587951418, "learning_rate": 0.003, "loss": 4.2663, "step": 1824 }, { "epoch": 0.01825, "grad_norm": 0.6105471946808345, "learning_rate": 0.003, "loss": 4.2706, "step": 1825 }, { "epoch": 0.01826, "grad_norm": 0.5680736127986394, "learning_rate": 0.003, "loss": 4.2798, "step": 1826 }, { "epoch": 0.01827, "grad_norm": 0.6203032072106797, "learning_rate": 0.003, "loss": 4.2493, "step": 1827 }, { "epoch": 0.01828, "grad_norm": 0.5646483418870301, "learning_rate": 0.003, "loss": 4.2595, "step": 1828 }, { "epoch": 0.01829, "grad_norm": 0.43449345506164244, "learning_rate": 0.003, "loss": 4.2724, "step": 1829 }, { "epoch": 0.0183, "grad_norm": 0.4995474978675237, "learning_rate": 0.003, "loss": 4.268, "step": 1830 }, { "epoch": 0.01831, "grad_norm": 0.6731917230470822, "learning_rate": 0.003, "loss": 4.2625, "step": 1831 }, { "epoch": 0.01832, "grad_norm": 1.0735909763241713, "learning_rate": 0.003, "loss": 4.2749, "step": 1832 }, { "epoch": 0.01833, "grad_norm": 0.9454987640162065, "learning_rate": 0.003, "loss": 4.3126, "step": 1833 }, { "epoch": 0.01834, "grad_norm": 0.6279857981331142, "learning_rate": 0.003, "loss": 4.265, "step": 1834 }, { "epoch": 0.01835, "grad_norm": 0.7795333868843384, "learning_rate": 0.003, "loss": 4.2708, "step": 1835 }, { "epoch": 0.01836, "grad_norm": 0.7540135177559686, "learning_rate": 0.003, "loss": 4.269, "step": 1836 }, { "epoch": 0.01837, "grad_norm": 0.4873694552023737, "learning_rate": 0.003, "loss": 4.2594, "step": 1837 }, { "epoch": 0.01838, "grad_norm": 0.6475671331829288, "learning_rate": 0.003, "loss": 4.2679, "step": 1838 }, { "epoch": 0.01839, "grad_norm": 0.5413345448794652, "learning_rate": 0.003, "loss": 4.2849, "step": 1839 }, { "epoch": 0.0184, "grad_norm": 0.505876271981352, "learning_rate": 0.003, "loss": 4.2815, "step": 1840 }, { "epoch": 0.01841, "grad_norm": 0.5559518639320817, "learning_rate": 0.003, "loss": 4.2211, "step": 1841 }, { "epoch": 0.01842, "grad_norm": 0.565402658078461, "learning_rate": 0.003, "loss": 4.2563, "step": 1842 }, { "epoch": 0.01843, "grad_norm": 0.5176031889881334, "learning_rate": 0.003, "loss": 4.2347, "step": 1843 }, { "epoch": 0.01844, "grad_norm": 0.515476660965344, "learning_rate": 0.003, "loss": 4.2336, "step": 1844 }, { "epoch": 0.01845, "grad_norm": 0.537738729997812, "learning_rate": 0.003, "loss": 4.2366, "step": 1845 }, { "epoch": 0.01846, "grad_norm": 0.5825872331164712, "learning_rate": 0.003, "loss": 4.2629, "step": 1846 }, { "epoch": 0.01847, "grad_norm": 0.6172426924095343, "learning_rate": 0.003, "loss": 4.261, "step": 1847 }, { "epoch": 0.01848, "grad_norm": 0.5675520394460739, "learning_rate": 0.003, "loss": 4.2733, "step": 1848 }, { "epoch": 0.01849, "grad_norm": 0.5804129655862207, "learning_rate": 0.003, "loss": 4.2733, "step": 1849 }, { "epoch": 0.0185, "grad_norm": 0.5903509862738283, "learning_rate": 0.003, "loss": 4.2637, "step": 1850 }, { "epoch": 0.01851, "grad_norm": 0.5789415751735032, "learning_rate": 0.003, "loss": 4.2453, "step": 1851 }, { "epoch": 0.01852, "grad_norm": 0.6073114122094332, "learning_rate": 0.003, "loss": 4.249, "step": 1852 }, { "epoch": 0.01853, "grad_norm": 0.6172059367670675, "learning_rate": 0.003, "loss": 4.2514, "step": 1853 }, { "epoch": 0.01854, "grad_norm": 0.5952391168819953, "learning_rate": 0.003, "loss": 4.282, "step": 1854 }, { "epoch": 0.01855, "grad_norm": 0.599929499071102, "learning_rate": 0.003, "loss": 4.2645, "step": 1855 }, { "epoch": 0.01856, "grad_norm": 0.7854590983227943, "learning_rate": 0.003, "loss": 4.2582, "step": 1856 }, { "epoch": 0.01857, "grad_norm": 0.9479235992175757, "learning_rate": 0.003, "loss": 4.2521, "step": 1857 }, { "epoch": 0.01858, "grad_norm": 1.0547185212118042, "learning_rate": 0.003, "loss": 4.2782, "step": 1858 }, { "epoch": 0.01859, "grad_norm": 0.7498301751007965, "learning_rate": 0.003, "loss": 4.259, "step": 1859 }, { "epoch": 0.0186, "grad_norm": 0.6772314759733408, "learning_rate": 0.003, "loss": 4.238, "step": 1860 }, { "epoch": 0.01861, "grad_norm": 0.7554457171283483, "learning_rate": 0.003, "loss": 4.2858, "step": 1861 }, { "epoch": 0.01862, "grad_norm": 0.6601398192741809, "learning_rate": 0.003, "loss": 4.2482, "step": 1862 }, { "epoch": 0.01863, "grad_norm": 0.5632517970928785, "learning_rate": 0.003, "loss": 4.2548, "step": 1863 }, { "epoch": 0.01864, "grad_norm": 0.5518520472207163, "learning_rate": 0.003, "loss": 4.2848, "step": 1864 }, { "epoch": 0.01865, "grad_norm": 0.5191605026436766, "learning_rate": 0.003, "loss": 4.2571, "step": 1865 }, { "epoch": 0.01866, "grad_norm": 0.487977336215745, "learning_rate": 0.003, "loss": 4.2583, "step": 1866 }, { "epoch": 0.01867, "grad_norm": 0.38111750510593084, "learning_rate": 0.003, "loss": 4.2288, "step": 1867 }, { "epoch": 0.01868, "grad_norm": 0.36979378128361096, "learning_rate": 0.003, "loss": 4.2458, "step": 1868 }, { "epoch": 0.01869, "grad_norm": 0.4056719991155462, "learning_rate": 0.003, "loss": 4.259, "step": 1869 }, { "epoch": 0.0187, "grad_norm": 0.511550197815793, "learning_rate": 0.003, "loss": 4.2534, "step": 1870 }, { "epoch": 0.01871, "grad_norm": 0.6908775527772675, "learning_rate": 0.003, "loss": 4.2322, "step": 1871 }, { "epoch": 0.01872, "grad_norm": 0.8655201254185445, "learning_rate": 0.003, "loss": 4.2892, "step": 1872 }, { "epoch": 0.01873, "grad_norm": 0.903028038452321, "learning_rate": 0.003, "loss": 4.2527, "step": 1873 }, { "epoch": 0.01874, "grad_norm": 0.7655189256325052, "learning_rate": 0.003, "loss": 4.2534, "step": 1874 }, { "epoch": 0.01875, "grad_norm": 0.6845228657861865, "learning_rate": 0.003, "loss": 4.2697, "step": 1875 }, { "epoch": 0.01876, "grad_norm": 0.7967028641532641, "learning_rate": 0.003, "loss": 4.2796, "step": 1876 }, { "epoch": 0.01877, "grad_norm": 0.7276633971950639, "learning_rate": 0.003, "loss": 4.2792, "step": 1877 }, { "epoch": 0.01878, "grad_norm": 0.6745755254962531, "learning_rate": 0.003, "loss": 4.2647, "step": 1878 }, { "epoch": 0.01879, "grad_norm": 0.6794948462625433, "learning_rate": 0.003, "loss": 4.2421, "step": 1879 }, { "epoch": 0.0188, "grad_norm": 0.7496570961239313, "learning_rate": 0.003, "loss": 4.2581, "step": 1880 }, { "epoch": 0.01881, "grad_norm": 0.7216177452798419, "learning_rate": 0.003, "loss": 4.2527, "step": 1881 }, { "epoch": 0.01882, "grad_norm": 0.6411166720639763, "learning_rate": 0.003, "loss": 4.2764, "step": 1882 }, { "epoch": 0.01883, "grad_norm": 0.4976595430995637, "learning_rate": 0.003, "loss": 4.2463, "step": 1883 }, { "epoch": 0.01884, "grad_norm": 0.5806395175516162, "learning_rate": 0.003, "loss": 4.2463, "step": 1884 }, { "epoch": 0.01885, "grad_norm": 0.5932999040186268, "learning_rate": 0.003, "loss": 4.2267, "step": 1885 }, { "epoch": 0.01886, "grad_norm": 0.6243294916603833, "learning_rate": 0.003, "loss": 4.2637, "step": 1886 }, { "epoch": 0.01887, "grad_norm": 0.6594473434297762, "learning_rate": 0.003, "loss": 4.2583, "step": 1887 }, { "epoch": 0.01888, "grad_norm": 0.5833874852548948, "learning_rate": 0.003, "loss": 4.2402, "step": 1888 }, { "epoch": 0.01889, "grad_norm": 0.5554783687186964, "learning_rate": 0.003, "loss": 4.2547, "step": 1889 }, { "epoch": 0.0189, "grad_norm": 0.5267765258339018, "learning_rate": 0.003, "loss": 4.2329, "step": 1890 }, { "epoch": 0.01891, "grad_norm": 0.5533738473996538, "learning_rate": 0.003, "loss": 4.2819, "step": 1891 }, { "epoch": 0.01892, "grad_norm": 0.5233002886731548, "learning_rate": 0.003, "loss": 4.2568, "step": 1892 }, { "epoch": 0.01893, "grad_norm": 0.598656523235247, "learning_rate": 0.003, "loss": 4.2415, "step": 1893 }, { "epoch": 0.01894, "grad_norm": 0.6329032192123055, "learning_rate": 0.003, "loss": 4.2498, "step": 1894 }, { "epoch": 0.01895, "grad_norm": 0.7448049826558554, "learning_rate": 0.003, "loss": 4.2404, "step": 1895 }, { "epoch": 0.01896, "grad_norm": 0.7896808634621466, "learning_rate": 0.003, "loss": 4.2666, "step": 1896 }, { "epoch": 0.01897, "grad_norm": 0.8670691506268475, "learning_rate": 0.003, "loss": 4.267, "step": 1897 }, { "epoch": 0.01898, "grad_norm": 0.7564766735493696, "learning_rate": 0.003, "loss": 4.2371, "step": 1898 }, { "epoch": 0.01899, "grad_norm": 0.6014591021089313, "learning_rate": 0.003, "loss": 4.246, "step": 1899 }, { "epoch": 0.019, "grad_norm": 0.6487238549786433, "learning_rate": 0.003, "loss": 4.2427, "step": 1900 }, { "epoch": 0.01901, "grad_norm": 0.7335622998657767, "learning_rate": 0.003, "loss": 4.2939, "step": 1901 }, { "epoch": 0.01902, "grad_norm": 0.727489215463111, "learning_rate": 0.003, "loss": 4.2706, "step": 1902 }, { "epoch": 0.01903, "grad_norm": 0.6444511274857245, "learning_rate": 0.003, "loss": 4.2217, "step": 1903 }, { "epoch": 0.01904, "grad_norm": 0.5669107570524424, "learning_rate": 0.003, "loss": 4.2738, "step": 1904 }, { "epoch": 0.01905, "grad_norm": 0.47567496733321213, "learning_rate": 0.003, "loss": 4.2471, "step": 1905 }, { "epoch": 0.01906, "grad_norm": 0.568871434515019, "learning_rate": 0.003, "loss": 4.2665, "step": 1906 }, { "epoch": 0.01907, "grad_norm": 0.6437840413516579, "learning_rate": 0.003, "loss": 4.2605, "step": 1907 }, { "epoch": 0.01908, "grad_norm": 0.8102395542085408, "learning_rate": 0.003, "loss": 4.2806, "step": 1908 }, { "epoch": 0.01909, "grad_norm": 0.9019612301377007, "learning_rate": 0.003, "loss": 4.2522, "step": 1909 }, { "epoch": 0.0191, "grad_norm": 0.8060528165222425, "learning_rate": 0.003, "loss": 4.2682, "step": 1910 }, { "epoch": 0.01911, "grad_norm": 0.6514562643306322, "learning_rate": 0.003, "loss": 4.2341, "step": 1911 }, { "epoch": 0.01912, "grad_norm": 0.7512556786074466, "learning_rate": 0.003, "loss": 4.2481, "step": 1912 }, { "epoch": 0.01913, "grad_norm": 0.7206930768558686, "learning_rate": 0.003, "loss": 4.2688, "step": 1913 }, { "epoch": 0.01914, "grad_norm": 0.6802559754696879, "learning_rate": 0.003, "loss": 4.2815, "step": 1914 }, { "epoch": 0.01915, "grad_norm": 0.6329709283705148, "learning_rate": 0.003, "loss": 4.2674, "step": 1915 }, { "epoch": 0.01916, "grad_norm": 0.6539184714683666, "learning_rate": 0.003, "loss": 4.2462, "step": 1916 }, { "epoch": 0.01917, "grad_norm": 0.710899415708157, "learning_rate": 0.003, "loss": 4.2674, "step": 1917 }, { "epoch": 0.01918, "grad_norm": 0.7167946113405885, "learning_rate": 0.003, "loss": 4.2562, "step": 1918 }, { "epoch": 0.01919, "grad_norm": 0.7533071012807427, "learning_rate": 0.003, "loss": 4.2594, "step": 1919 }, { "epoch": 0.0192, "grad_norm": 0.6703396884694851, "learning_rate": 0.003, "loss": 4.2612, "step": 1920 }, { "epoch": 0.01921, "grad_norm": 0.5834644623189628, "learning_rate": 0.003, "loss": 4.2441, "step": 1921 }, { "epoch": 0.01922, "grad_norm": 0.6481277683352221, "learning_rate": 0.003, "loss": 4.2625, "step": 1922 }, { "epoch": 0.01923, "grad_norm": 0.6044219599617415, "learning_rate": 0.003, "loss": 4.2661, "step": 1923 }, { "epoch": 0.01924, "grad_norm": 0.5337323934986105, "learning_rate": 0.003, "loss": 4.2643, "step": 1924 }, { "epoch": 0.01925, "grad_norm": 0.5129044588459396, "learning_rate": 0.003, "loss": 4.251, "step": 1925 }, { "epoch": 0.01926, "grad_norm": 0.48201171188935904, "learning_rate": 0.003, "loss": 4.2563, "step": 1926 }, { "epoch": 0.01927, "grad_norm": 0.4295757083815179, "learning_rate": 0.003, "loss": 4.2456, "step": 1927 }, { "epoch": 0.01928, "grad_norm": 0.5221994128637466, "learning_rate": 0.003, "loss": 4.2241, "step": 1928 }, { "epoch": 0.01929, "grad_norm": 0.6151786259894082, "learning_rate": 0.003, "loss": 4.246, "step": 1929 }, { "epoch": 0.0193, "grad_norm": 0.717185888834206, "learning_rate": 0.003, "loss": 4.2399, "step": 1930 }, { "epoch": 0.01931, "grad_norm": 0.865419932280181, "learning_rate": 0.003, "loss": 4.2485, "step": 1931 }, { "epoch": 0.01932, "grad_norm": 1.015142018105296, "learning_rate": 0.003, "loss": 4.2679, "step": 1932 }, { "epoch": 0.01933, "grad_norm": 1.0909531598947217, "learning_rate": 0.003, "loss": 4.2582, "step": 1933 }, { "epoch": 0.01934, "grad_norm": 0.7995257446738059, "learning_rate": 0.003, "loss": 4.2554, "step": 1934 }, { "epoch": 0.01935, "grad_norm": 0.6434815333532291, "learning_rate": 0.003, "loss": 4.2669, "step": 1935 }, { "epoch": 0.01936, "grad_norm": 0.6569925784458591, "learning_rate": 0.003, "loss": 4.2489, "step": 1936 }, { "epoch": 0.01937, "grad_norm": 0.5996128457789688, "learning_rate": 0.003, "loss": 4.2577, "step": 1937 }, { "epoch": 0.01938, "grad_norm": 0.6329170311784792, "learning_rate": 0.003, "loss": 4.2615, "step": 1938 }, { "epoch": 0.01939, "grad_norm": 0.6354665323417178, "learning_rate": 0.003, "loss": 4.2808, "step": 1939 }, { "epoch": 0.0194, "grad_norm": 0.6008328437037136, "learning_rate": 0.003, "loss": 4.2581, "step": 1940 }, { "epoch": 0.01941, "grad_norm": 0.5343827591468221, "learning_rate": 0.003, "loss": 4.2378, "step": 1941 }, { "epoch": 0.01942, "grad_norm": 0.5512416106954112, "learning_rate": 0.003, "loss": 4.2611, "step": 1942 }, { "epoch": 0.01943, "grad_norm": 0.5866960320410021, "learning_rate": 0.003, "loss": 4.2341, "step": 1943 }, { "epoch": 0.01944, "grad_norm": 0.6240489288808161, "learning_rate": 0.003, "loss": 4.2751, "step": 1944 }, { "epoch": 0.01945, "grad_norm": 0.6243606216006328, "learning_rate": 0.003, "loss": 4.2437, "step": 1945 }, { "epoch": 0.01946, "grad_norm": 0.5880226151273782, "learning_rate": 0.003, "loss": 4.2379, "step": 1946 }, { "epoch": 0.01947, "grad_norm": 0.5934159043950513, "learning_rate": 0.003, "loss": 4.2471, "step": 1947 }, { "epoch": 0.01948, "grad_norm": 0.5378813319639308, "learning_rate": 0.003, "loss": 4.2659, "step": 1948 }, { "epoch": 0.01949, "grad_norm": 0.5777955568924656, "learning_rate": 0.003, "loss": 4.2595, "step": 1949 }, { "epoch": 0.0195, "grad_norm": 0.6588236649280669, "learning_rate": 0.003, "loss": 4.2493, "step": 1950 }, { "epoch": 0.01951, "grad_norm": 0.6662863362190039, "learning_rate": 0.003, "loss": 4.2457, "step": 1951 }, { "epoch": 0.01952, "grad_norm": 0.6956758803553106, "learning_rate": 0.003, "loss": 4.2563, "step": 1952 }, { "epoch": 0.01953, "grad_norm": 0.7309670323591311, "learning_rate": 0.003, "loss": 4.2663, "step": 1953 }, { "epoch": 0.01954, "grad_norm": 0.7102958436956044, "learning_rate": 0.003, "loss": 4.2656, "step": 1954 }, { "epoch": 0.01955, "grad_norm": 0.748927101392194, "learning_rate": 0.003, "loss": 4.2596, "step": 1955 }, { "epoch": 0.01956, "grad_norm": 0.6357959051502906, "learning_rate": 0.003, "loss": 4.2507, "step": 1956 }, { "epoch": 0.01957, "grad_norm": 0.5776415679220375, "learning_rate": 0.003, "loss": 4.2172, "step": 1957 }, { "epoch": 0.01958, "grad_norm": 0.5010308008271186, "learning_rate": 0.003, "loss": 4.2108, "step": 1958 }, { "epoch": 0.01959, "grad_norm": 0.500419502823399, "learning_rate": 0.003, "loss": 4.2342, "step": 1959 }, { "epoch": 0.0196, "grad_norm": 0.5017850552651191, "learning_rate": 0.003, "loss": 4.229, "step": 1960 }, { "epoch": 0.01961, "grad_norm": 0.49043553705552145, "learning_rate": 0.003, "loss": 4.2267, "step": 1961 }, { "epoch": 0.01962, "grad_norm": 0.49806058358446914, "learning_rate": 0.003, "loss": 4.2301, "step": 1962 }, { "epoch": 0.01963, "grad_norm": 0.5386284819858854, "learning_rate": 0.003, "loss": 4.2201, "step": 1963 }, { "epoch": 0.01964, "grad_norm": 0.6462166130085853, "learning_rate": 0.003, "loss": 4.2492, "step": 1964 }, { "epoch": 0.01965, "grad_norm": 0.8617584950578717, "learning_rate": 0.003, "loss": 4.2512, "step": 1965 }, { "epoch": 0.01966, "grad_norm": 1.064420589029421, "learning_rate": 0.003, "loss": 4.2624, "step": 1966 }, { "epoch": 0.01967, "grad_norm": 0.8064456905773513, "learning_rate": 0.003, "loss": 4.2378, "step": 1967 }, { "epoch": 0.01968, "grad_norm": 0.6120027249682956, "learning_rate": 0.003, "loss": 4.2516, "step": 1968 }, { "epoch": 0.01969, "grad_norm": 0.7508122971760963, "learning_rate": 0.003, "loss": 4.2526, "step": 1969 }, { "epoch": 0.0197, "grad_norm": 0.7405449826366749, "learning_rate": 0.003, "loss": 4.2514, "step": 1970 }, { "epoch": 0.01971, "grad_norm": 0.7056278637839583, "learning_rate": 0.003, "loss": 4.2377, "step": 1971 }, { "epoch": 0.01972, "grad_norm": 0.7105683738164339, "learning_rate": 0.003, "loss": 4.2407, "step": 1972 }, { "epoch": 0.01973, "grad_norm": 0.7508003521914575, "learning_rate": 0.003, "loss": 4.2264, "step": 1973 }, { "epoch": 0.01974, "grad_norm": 0.7061088132196116, "learning_rate": 0.003, "loss": 4.2641, "step": 1974 }, { "epoch": 0.01975, "grad_norm": 0.6140071978685933, "learning_rate": 0.003, "loss": 4.2539, "step": 1975 }, { "epoch": 0.01976, "grad_norm": 0.6214170582947839, "learning_rate": 0.003, "loss": 4.243, "step": 1976 }, { "epoch": 0.01977, "grad_norm": 0.6032091701282252, "learning_rate": 0.003, "loss": 4.2759, "step": 1977 }, { "epoch": 0.01978, "grad_norm": 0.6890579485446906, "learning_rate": 0.003, "loss": 4.2782, "step": 1978 }, { "epoch": 0.01979, "grad_norm": 0.7078287139411981, "learning_rate": 0.003, "loss": 4.269, "step": 1979 }, { "epoch": 0.0198, "grad_norm": 0.726915367409026, "learning_rate": 0.003, "loss": 4.2503, "step": 1980 }, { "epoch": 0.01981, "grad_norm": 0.7277727772925232, "learning_rate": 0.003, "loss": 4.2394, "step": 1981 }, { "epoch": 0.01982, "grad_norm": 0.6618110470758236, "learning_rate": 0.003, "loss": 4.262, "step": 1982 }, { "epoch": 0.01983, "grad_norm": 0.6282900232413579, "learning_rate": 0.003, "loss": 4.2497, "step": 1983 }, { "epoch": 0.01984, "grad_norm": 0.6651788332059134, "learning_rate": 0.003, "loss": 4.2635, "step": 1984 }, { "epoch": 0.01985, "grad_norm": 0.6195263920647736, "learning_rate": 0.003, "loss": 4.2484, "step": 1985 }, { "epoch": 0.01986, "grad_norm": 0.6562857171469267, "learning_rate": 0.003, "loss": 4.2792, "step": 1986 }, { "epoch": 0.01987, "grad_norm": 0.6274806569391715, "learning_rate": 0.003, "loss": 4.2331, "step": 1987 }, { "epoch": 0.01988, "grad_norm": 0.6627977107268054, "learning_rate": 0.003, "loss": 4.2466, "step": 1988 }, { "epoch": 0.01989, "grad_norm": 0.6286439849240877, "learning_rate": 0.003, "loss": 4.2344, "step": 1989 }, { "epoch": 0.0199, "grad_norm": 0.5786457654364954, "learning_rate": 0.003, "loss": 4.246, "step": 1990 }, { "epoch": 0.01991, "grad_norm": 0.5167049375276446, "learning_rate": 0.003, "loss": 4.2238, "step": 1991 }, { "epoch": 0.01992, "grad_norm": 0.50421095170271, "learning_rate": 0.003, "loss": 4.261, "step": 1992 }, { "epoch": 0.01993, "grad_norm": 0.4699320041008658, "learning_rate": 0.003, "loss": 4.2447, "step": 1993 }, { "epoch": 0.01994, "grad_norm": 0.5186369446769569, "learning_rate": 0.003, "loss": 4.2629, "step": 1994 }, { "epoch": 0.01995, "grad_norm": 0.5867398105885127, "learning_rate": 0.003, "loss": 4.2547, "step": 1995 }, { "epoch": 0.01996, "grad_norm": 0.8031924386202848, "learning_rate": 0.003, "loss": 4.2326, "step": 1996 }, { "epoch": 0.01997, "grad_norm": 0.9756949043315064, "learning_rate": 0.003, "loss": 4.2746, "step": 1997 }, { "epoch": 0.01998, "grad_norm": 0.8340147112264455, "learning_rate": 0.003, "loss": 4.2546, "step": 1998 }, { "epoch": 0.01999, "grad_norm": 0.6652469206537822, "learning_rate": 0.003, "loss": 4.2393, "step": 1999 }, { "epoch": 0.02, "grad_norm": 0.7695645348180246, "learning_rate": 0.003, "loss": 4.2318, "step": 2000 }, { "epoch": 0.02001, "grad_norm": 0.6865633733578148, "learning_rate": 0.003, "loss": 4.262, "step": 2001 }, { "epoch": 0.02002, "grad_norm": 0.7345949774088514, "learning_rate": 0.003, "loss": 4.2618, "step": 2002 }, { "epoch": 0.02003, "grad_norm": 0.5984396930779281, "learning_rate": 0.003, "loss": 4.2543, "step": 2003 }, { "epoch": 0.02004, "grad_norm": 0.5305961077422008, "learning_rate": 0.003, "loss": 4.2408, "step": 2004 }, { "epoch": 0.02005, "grad_norm": 0.48452673426344656, "learning_rate": 0.003, "loss": 4.2431, "step": 2005 }, { "epoch": 0.02006, "grad_norm": 0.5689487878863901, "learning_rate": 0.003, "loss": 4.2442, "step": 2006 }, { "epoch": 0.02007, "grad_norm": 0.5838792869042901, "learning_rate": 0.003, "loss": 4.2584, "step": 2007 }, { "epoch": 0.02008, "grad_norm": 0.6291452839633271, "learning_rate": 0.003, "loss": 4.2506, "step": 2008 }, { "epoch": 0.02009, "grad_norm": 0.6903355213664546, "learning_rate": 0.003, "loss": 4.2495, "step": 2009 }, { "epoch": 0.0201, "grad_norm": 0.6289287565263567, "learning_rate": 0.003, "loss": 4.2607, "step": 2010 }, { "epoch": 0.02011, "grad_norm": 0.5984818046752132, "learning_rate": 0.003, "loss": 4.2452, "step": 2011 }, { "epoch": 0.02012, "grad_norm": 0.6682118694640922, "learning_rate": 0.003, "loss": 4.2452, "step": 2012 }, { "epoch": 0.02013, "grad_norm": 0.7050046827190253, "learning_rate": 0.003, "loss": 4.2532, "step": 2013 }, { "epoch": 0.02014, "grad_norm": 0.6761323120848991, "learning_rate": 0.003, "loss": 4.2415, "step": 2014 }, { "epoch": 0.02015, "grad_norm": 0.6372907183843184, "learning_rate": 0.003, "loss": 4.2336, "step": 2015 }, { "epoch": 0.02016, "grad_norm": 0.5972921997645952, "learning_rate": 0.003, "loss": 4.2404, "step": 2016 }, { "epoch": 0.02017, "grad_norm": 0.49149348634303763, "learning_rate": 0.003, "loss": 4.2312, "step": 2017 }, { "epoch": 0.02018, "grad_norm": 0.6030678218331625, "learning_rate": 0.003, "loss": 4.2371, "step": 2018 }, { "epoch": 0.02019, "grad_norm": 0.6395667829535132, "learning_rate": 0.003, "loss": 4.2431, "step": 2019 }, { "epoch": 0.0202, "grad_norm": 0.6210152394675877, "learning_rate": 0.003, "loss": 4.2512, "step": 2020 }, { "epoch": 0.02021, "grad_norm": 0.6950283205655802, "learning_rate": 0.003, "loss": 4.268, "step": 2021 }, { "epoch": 0.02022, "grad_norm": 0.8101179933347831, "learning_rate": 0.003, "loss": 4.2328, "step": 2022 }, { "epoch": 0.02023, "grad_norm": 0.8379556185876997, "learning_rate": 0.003, "loss": 4.2128, "step": 2023 }, { "epoch": 0.02024, "grad_norm": 0.8247606718710992, "learning_rate": 0.003, "loss": 4.2373, "step": 2024 }, { "epoch": 0.02025, "grad_norm": 0.8736161526776162, "learning_rate": 0.003, "loss": 4.291, "step": 2025 }, { "epoch": 0.02026, "grad_norm": 0.8472496904448235, "learning_rate": 0.003, "loss": 4.2727, "step": 2026 }, { "epoch": 0.02027, "grad_norm": 0.8056170331271387, "learning_rate": 0.003, "loss": 4.271, "step": 2027 }, { "epoch": 0.02028, "grad_norm": 0.8498369070927115, "learning_rate": 0.003, "loss": 4.2569, "step": 2028 }, { "epoch": 0.02029, "grad_norm": 0.9047394863120564, "learning_rate": 0.003, "loss": 4.2658, "step": 2029 }, { "epoch": 0.0203, "grad_norm": 1.0823713408101607, "learning_rate": 0.003, "loss": 4.2684, "step": 2030 }, { "epoch": 0.02031, "grad_norm": 0.9309326946376139, "learning_rate": 0.003, "loss": 4.2732, "step": 2031 }, { "epoch": 0.02032, "grad_norm": 0.9146514324992777, "learning_rate": 0.003, "loss": 4.2447, "step": 2032 }, { "epoch": 0.02033, "grad_norm": 0.8591399303672403, "learning_rate": 0.003, "loss": 4.2848, "step": 2033 }, { "epoch": 0.02034, "grad_norm": 0.8501591519538507, "learning_rate": 0.003, "loss": 4.2786, "step": 2034 }, { "epoch": 0.02035, "grad_norm": 0.8000776065354223, "learning_rate": 0.003, "loss": 4.2613, "step": 2035 }, { "epoch": 0.02036, "grad_norm": 0.7789847842987989, "learning_rate": 0.003, "loss": 4.2629, "step": 2036 }, { "epoch": 0.02037, "grad_norm": 0.8301107777503408, "learning_rate": 0.003, "loss": 4.2828, "step": 2037 }, { "epoch": 0.02038, "grad_norm": 0.8246182480374772, "learning_rate": 0.003, "loss": 4.2649, "step": 2038 }, { "epoch": 0.02039, "grad_norm": 0.8433738197183226, "learning_rate": 0.003, "loss": 4.2745, "step": 2039 }, { "epoch": 0.0204, "grad_norm": 0.7922524759276299, "learning_rate": 0.003, "loss": 4.2766, "step": 2040 }, { "epoch": 0.02041, "grad_norm": 0.7214169410748629, "learning_rate": 0.003, "loss": 4.2381, "step": 2041 }, { "epoch": 0.02042, "grad_norm": 0.5980473954078982, "learning_rate": 0.003, "loss": 4.2311, "step": 2042 }, { "epoch": 0.02043, "grad_norm": 0.6544292652761489, "learning_rate": 0.003, "loss": 4.2765, "step": 2043 }, { "epoch": 0.02044, "grad_norm": 0.6815263020074647, "learning_rate": 0.003, "loss": 4.2533, "step": 2044 }, { "epoch": 0.02045, "grad_norm": 0.6716783756563022, "learning_rate": 0.003, "loss": 4.266, "step": 2045 }, { "epoch": 0.02046, "grad_norm": 0.5685869111952888, "learning_rate": 0.003, "loss": 4.2364, "step": 2046 }, { "epoch": 0.02047, "grad_norm": 0.539068698157252, "learning_rate": 0.003, "loss": 4.2693, "step": 2047 }, { "epoch": 0.02048, "grad_norm": 0.5462552506791943, "learning_rate": 0.003, "loss": 4.2521, "step": 2048 }, { "epoch": 0.02049, "grad_norm": 0.5078072105462605, "learning_rate": 0.003, "loss": 4.2715, "step": 2049 }, { "epoch": 0.0205, "grad_norm": 0.5740623442922046, "learning_rate": 0.003, "loss": 4.2205, "step": 2050 }, { "epoch": 0.02051, "grad_norm": 0.631335957620747, "learning_rate": 0.003, "loss": 4.2224, "step": 2051 }, { "epoch": 0.02052, "grad_norm": 0.6486110208729147, "learning_rate": 0.003, "loss": 4.2258, "step": 2052 }, { "epoch": 0.02053, "grad_norm": 0.5888200401710874, "learning_rate": 0.003, "loss": 4.2235, "step": 2053 }, { "epoch": 0.02054, "grad_norm": 0.5515462434036196, "learning_rate": 0.003, "loss": 4.2611, "step": 2054 }, { "epoch": 0.02055, "grad_norm": 0.4722653472823476, "learning_rate": 0.003, "loss": 4.2178, "step": 2055 }, { "epoch": 0.02056, "grad_norm": 0.44396394612077134, "learning_rate": 0.003, "loss": 4.2149, "step": 2056 }, { "epoch": 0.02057, "grad_norm": 0.4548184991267254, "learning_rate": 0.003, "loss": 4.2514, "step": 2057 }, { "epoch": 0.02058, "grad_norm": 0.5196145685763705, "learning_rate": 0.003, "loss": 4.2286, "step": 2058 }, { "epoch": 0.02059, "grad_norm": 0.5150404447757911, "learning_rate": 0.003, "loss": 4.2089, "step": 2059 }, { "epoch": 0.0206, "grad_norm": 0.44383213075680966, "learning_rate": 0.003, "loss": 4.251, "step": 2060 }, { "epoch": 0.02061, "grad_norm": 0.37383563921339574, "learning_rate": 0.003, "loss": 4.2222, "step": 2061 }, { "epoch": 0.02062, "grad_norm": 0.35393934819548667, "learning_rate": 0.003, "loss": 4.221, "step": 2062 }, { "epoch": 0.02063, "grad_norm": 0.3267679809833493, "learning_rate": 0.003, "loss": 4.2165, "step": 2063 }, { "epoch": 0.02064, "grad_norm": 0.34033928546810105, "learning_rate": 0.003, "loss": 4.2259, "step": 2064 }, { "epoch": 0.02065, "grad_norm": 0.35466259038416126, "learning_rate": 0.003, "loss": 4.2059, "step": 2065 }, { "epoch": 0.02066, "grad_norm": 0.3016586113305969, "learning_rate": 0.003, "loss": 4.2224, "step": 2066 }, { "epoch": 0.02067, "grad_norm": 0.3498296991074057, "learning_rate": 0.003, "loss": 4.22, "step": 2067 }, { "epoch": 0.02068, "grad_norm": 0.3676321022781276, "learning_rate": 0.003, "loss": 4.2017, "step": 2068 }, { "epoch": 0.02069, "grad_norm": 0.40962533052122363, "learning_rate": 0.003, "loss": 4.2244, "step": 2069 }, { "epoch": 0.0207, "grad_norm": 0.5995962395694695, "learning_rate": 0.003, "loss": 4.2023, "step": 2070 }, { "epoch": 0.02071, "grad_norm": 1.0281554874982604, "learning_rate": 0.003, "loss": 4.2499, "step": 2071 }, { "epoch": 0.02072, "grad_norm": 1.2174002009617175, "learning_rate": 0.003, "loss": 4.2817, "step": 2072 }, { "epoch": 0.02073, "grad_norm": 0.5766208529721838, "learning_rate": 0.003, "loss": 4.2339, "step": 2073 }, { "epoch": 0.02074, "grad_norm": 0.8482120839475051, "learning_rate": 0.003, "loss": 4.2516, "step": 2074 }, { "epoch": 0.02075, "grad_norm": 0.914249694296833, "learning_rate": 0.003, "loss": 4.2873, "step": 2075 }, { "epoch": 0.02076, "grad_norm": 1.0073872264521357, "learning_rate": 0.003, "loss": 4.2898, "step": 2076 }, { "epoch": 0.02077, "grad_norm": 0.9613925913617914, "learning_rate": 0.003, "loss": 4.2576, "step": 2077 }, { "epoch": 0.02078, "grad_norm": 1.0251025412787642, "learning_rate": 0.003, "loss": 4.2665, "step": 2078 }, { "epoch": 0.02079, "grad_norm": 0.8112389212571772, "learning_rate": 0.003, "loss": 4.243, "step": 2079 }, { "epoch": 0.0208, "grad_norm": 0.9069389526132752, "learning_rate": 0.003, "loss": 4.2693, "step": 2080 }, { "epoch": 0.02081, "grad_norm": 1.1627691288871134, "learning_rate": 0.003, "loss": 4.3161, "step": 2081 }, { "epoch": 0.02082, "grad_norm": 1.0285610491992647, "learning_rate": 0.003, "loss": 4.3126, "step": 2082 }, { "epoch": 0.02083, "grad_norm": 1.0850701155180529, "learning_rate": 0.003, "loss": 4.3091, "step": 2083 }, { "epoch": 0.02084, "grad_norm": 0.9461237384614024, "learning_rate": 0.003, "loss": 4.2987, "step": 2084 }, { "epoch": 0.02085, "grad_norm": 0.976394782581613, "learning_rate": 0.003, "loss": 4.284, "step": 2085 }, { "epoch": 0.02086, "grad_norm": 0.82502607345478, "learning_rate": 0.003, "loss": 4.2825, "step": 2086 }, { "epoch": 0.02087, "grad_norm": 0.6641978639980478, "learning_rate": 0.003, "loss": 4.2762, "step": 2087 }, { "epoch": 0.02088, "grad_norm": 0.5823960320780495, "learning_rate": 0.003, "loss": 4.3019, "step": 2088 }, { "epoch": 0.02089, "grad_norm": 0.6056623050213609, "learning_rate": 0.003, "loss": 4.2696, "step": 2089 }, { "epoch": 0.0209, "grad_norm": 0.6222015176775844, "learning_rate": 0.003, "loss": 4.2845, "step": 2090 }, { "epoch": 0.02091, "grad_norm": 0.8107138314533802, "learning_rate": 0.003, "loss": 4.275, "step": 2091 }, { "epoch": 0.02092, "grad_norm": 0.9481339301239347, "learning_rate": 0.003, "loss": 4.2799, "step": 2092 }, { "epoch": 0.02093, "grad_norm": 0.8740047196660798, "learning_rate": 0.003, "loss": 4.2456, "step": 2093 }, { "epoch": 0.02094, "grad_norm": 0.6305842273976254, "learning_rate": 0.003, "loss": 4.2436, "step": 2094 }, { "epoch": 0.02095, "grad_norm": 0.5189901063960811, "learning_rate": 0.003, "loss": 4.2773, "step": 2095 }, { "epoch": 0.02096, "grad_norm": 0.7024275821596554, "learning_rate": 0.003, "loss": 4.2741, "step": 2096 }, { "epoch": 0.02097, "grad_norm": 0.6837847162193671, "learning_rate": 0.003, "loss": 4.2518, "step": 2097 }, { "epoch": 0.02098, "grad_norm": 0.6707932407749881, "learning_rate": 0.003, "loss": 4.2705, "step": 2098 }, { "epoch": 0.02099, "grad_norm": 0.5110579033218214, "learning_rate": 0.003, "loss": 4.2724, "step": 2099 }, { "epoch": 0.021, "grad_norm": 0.4612182402663896, "learning_rate": 0.003, "loss": 4.2819, "step": 2100 }, { "epoch": 0.02101, "grad_norm": 0.45331179352856693, "learning_rate": 0.003, "loss": 4.2684, "step": 2101 }, { "epoch": 0.02102, "grad_norm": 0.42546683266841523, "learning_rate": 0.003, "loss": 4.271, "step": 2102 }, { "epoch": 0.02103, "grad_norm": 0.3547519868664197, "learning_rate": 0.003, "loss": 4.2273, "step": 2103 }, { "epoch": 0.02104, "grad_norm": 0.3900500431869311, "learning_rate": 0.003, "loss": 4.2306, "step": 2104 }, { "epoch": 0.02105, "grad_norm": 0.4122345047469997, "learning_rate": 0.003, "loss": 4.2329, "step": 2105 }, { "epoch": 0.02106, "grad_norm": 0.4185113160695593, "learning_rate": 0.003, "loss": 4.2247, "step": 2106 }, { "epoch": 0.02107, "grad_norm": 0.3925543469529075, "learning_rate": 0.003, "loss": 4.2367, "step": 2107 }, { "epoch": 0.02108, "grad_norm": 0.33327126218953085, "learning_rate": 0.003, "loss": 4.2153, "step": 2108 }, { "epoch": 0.02109, "grad_norm": 0.3153436836891661, "learning_rate": 0.003, "loss": 4.2035, "step": 2109 }, { "epoch": 0.0211, "grad_norm": 0.35294607433058894, "learning_rate": 0.003, "loss": 4.2193, "step": 2110 }, { "epoch": 0.02111, "grad_norm": 0.37843249197178286, "learning_rate": 0.003, "loss": 4.2278, "step": 2111 }, { "epoch": 0.02112, "grad_norm": 0.38829107411949254, "learning_rate": 0.003, "loss": 4.226, "step": 2112 }, { "epoch": 0.02113, "grad_norm": 0.36727891938554497, "learning_rate": 0.003, "loss": 4.2209, "step": 2113 }, { "epoch": 0.02114, "grad_norm": 0.3796589384288701, "learning_rate": 0.003, "loss": 4.2115, "step": 2114 }, { "epoch": 0.02115, "grad_norm": 0.49933195549505976, "learning_rate": 0.003, "loss": 4.2045, "step": 2115 }, { "epoch": 0.02116, "grad_norm": 0.7078427672787017, "learning_rate": 0.003, "loss": 4.2392, "step": 2116 }, { "epoch": 0.02117, "grad_norm": 0.9422169918121244, "learning_rate": 0.003, "loss": 4.2414, "step": 2117 }, { "epoch": 0.02118, "grad_norm": 0.8924259465708047, "learning_rate": 0.003, "loss": 4.2414, "step": 2118 }, { "epoch": 0.02119, "grad_norm": 0.5681560145798641, "learning_rate": 0.003, "loss": 4.2397, "step": 2119 }, { "epoch": 0.0212, "grad_norm": 0.5070503337124914, "learning_rate": 0.003, "loss": 4.213, "step": 2120 }, { "epoch": 0.02121, "grad_norm": 0.5980827300624205, "learning_rate": 0.003, "loss": 4.2118, "step": 2121 }, { "epoch": 0.02122, "grad_norm": 0.48859129709793475, "learning_rate": 0.003, "loss": 4.2167, "step": 2122 }, { "epoch": 0.02123, "grad_norm": 0.463982076261229, "learning_rate": 0.003, "loss": 4.1963, "step": 2123 }, { "epoch": 0.02124, "grad_norm": 0.44089509619617673, "learning_rate": 0.003, "loss": 4.2174, "step": 2124 }, { "epoch": 0.02125, "grad_norm": 0.48261858431331583, "learning_rate": 0.003, "loss": 4.2332, "step": 2125 }, { "epoch": 0.02126, "grad_norm": 0.5099262151069514, "learning_rate": 0.003, "loss": 4.2302, "step": 2126 }, { "epoch": 0.02127, "grad_norm": 0.5196898928719708, "learning_rate": 0.003, "loss": 4.2379, "step": 2127 }, { "epoch": 0.02128, "grad_norm": 0.5656439270875484, "learning_rate": 0.003, "loss": 4.2314, "step": 2128 }, { "epoch": 0.02129, "grad_norm": 0.5221861829481255, "learning_rate": 0.003, "loss": 4.2297, "step": 2129 }, { "epoch": 0.0213, "grad_norm": 0.4521057397115596, "learning_rate": 0.003, "loss": 4.2027, "step": 2130 }, { "epoch": 0.02131, "grad_norm": 0.45364673608644235, "learning_rate": 0.003, "loss": 4.2268, "step": 2131 }, { "epoch": 0.02132, "grad_norm": 0.519443445123456, "learning_rate": 0.003, "loss": 4.2417, "step": 2132 }, { "epoch": 0.02133, "grad_norm": 0.7304407810627219, "learning_rate": 0.003, "loss": 4.2391, "step": 2133 }, { "epoch": 0.02134, "grad_norm": 0.9541768215835498, "learning_rate": 0.003, "loss": 4.2522, "step": 2134 }, { "epoch": 0.02135, "grad_norm": 1.0307643830076532, "learning_rate": 0.003, "loss": 4.2588, "step": 2135 }, { "epoch": 0.02136, "grad_norm": 0.997681208751024, "learning_rate": 0.003, "loss": 4.2363, "step": 2136 }, { "epoch": 0.02137, "grad_norm": 0.8370170695212052, "learning_rate": 0.003, "loss": 4.2696, "step": 2137 }, { "epoch": 0.02138, "grad_norm": 0.9176192838894591, "learning_rate": 0.003, "loss": 4.2858, "step": 2138 }, { "epoch": 0.02139, "grad_norm": 0.9058155406944488, "learning_rate": 0.003, "loss": 4.2833, "step": 2139 }, { "epoch": 0.0214, "grad_norm": 0.8679543052073141, "learning_rate": 0.003, "loss": 4.2707, "step": 2140 }, { "epoch": 0.02141, "grad_norm": 0.7817830154220525, "learning_rate": 0.003, "loss": 4.2527, "step": 2141 }, { "epoch": 0.02142, "grad_norm": 0.798437758428762, "learning_rate": 0.003, "loss": 4.2453, "step": 2142 }, { "epoch": 0.02143, "grad_norm": 0.7871322126205197, "learning_rate": 0.003, "loss": 4.2612, "step": 2143 }, { "epoch": 0.02144, "grad_norm": 0.8115888487824465, "learning_rate": 0.003, "loss": 4.2653, "step": 2144 }, { "epoch": 0.02145, "grad_norm": 0.7440847388519684, "learning_rate": 0.003, "loss": 4.2448, "step": 2145 }, { "epoch": 0.02146, "grad_norm": 0.6836186393980395, "learning_rate": 0.003, "loss": 4.2558, "step": 2146 }, { "epoch": 0.02147, "grad_norm": 0.6617245709810007, "learning_rate": 0.003, "loss": 4.2727, "step": 2147 }, { "epoch": 0.02148, "grad_norm": 0.7076496122050561, "learning_rate": 0.003, "loss": 4.2832, "step": 2148 }, { "epoch": 0.02149, "grad_norm": 0.729078859235357, "learning_rate": 0.003, "loss": 4.2694, "step": 2149 }, { "epoch": 0.0215, "grad_norm": 0.6732791642286783, "learning_rate": 0.003, "loss": 4.2532, "step": 2150 }, { "epoch": 0.02151, "grad_norm": 0.6647883181229363, "learning_rate": 0.003, "loss": 4.2571, "step": 2151 }, { "epoch": 0.02152, "grad_norm": 0.539629911002914, "learning_rate": 0.003, "loss": 4.2298, "step": 2152 }, { "epoch": 0.02153, "grad_norm": 0.4979893391256132, "learning_rate": 0.003, "loss": 4.2372, "step": 2153 }, { "epoch": 0.02154, "grad_norm": 0.43644785385536694, "learning_rate": 0.003, "loss": 4.2266, "step": 2154 }, { "epoch": 0.02155, "grad_norm": 0.3633150618094754, "learning_rate": 0.003, "loss": 4.2532, "step": 2155 }, { "epoch": 0.02156, "grad_norm": 0.39513834073913734, "learning_rate": 0.003, "loss": 4.239, "step": 2156 }, { "epoch": 0.02157, "grad_norm": 0.3860719279707547, "learning_rate": 0.003, "loss": 4.2034, "step": 2157 }, { "epoch": 0.02158, "grad_norm": 0.4137178291066844, "learning_rate": 0.003, "loss": 4.2241, "step": 2158 }, { "epoch": 0.02159, "grad_norm": 0.5163329954248042, "learning_rate": 0.003, "loss": 4.2392, "step": 2159 }, { "epoch": 0.0216, "grad_norm": 0.6337653160295451, "learning_rate": 0.003, "loss": 4.2443, "step": 2160 }, { "epoch": 0.02161, "grad_norm": 0.7844529335341662, "learning_rate": 0.003, "loss": 4.2371, "step": 2161 }, { "epoch": 0.02162, "grad_norm": 0.8327279956908368, "learning_rate": 0.003, "loss": 4.2454, "step": 2162 }, { "epoch": 0.02163, "grad_norm": 0.6324138237584291, "learning_rate": 0.003, "loss": 4.2546, "step": 2163 }, { "epoch": 0.02164, "grad_norm": 0.4272642114449097, "learning_rate": 0.003, "loss": 4.2393, "step": 2164 }, { "epoch": 0.02165, "grad_norm": 0.5755156335143842, "learning_rate": 0.003, "loss": 4.2107, "step": 2165 }, { "epoch": 0.02166, "grad_norm": 0.5179351914703093, "learning_rate": 0.003, "loss": 4.2452, "step": 2166 }, { "epoch": 0.02167, "grad_norm": 0.47795702712318133, "learning_rate": 0.003, "loss": 4.2215, "step": 2167 }, { "epoch": 0.02168, "grad_norm": 0.39306911706804365, "learning_rate": 0.003, "loss": 4.2181, "step": 2168 }, { "epoch": 0.02169, "grad_norm": 0.43640956278445237, "learning_rate": 0.003, "loss": 4.2172, "step": 2169 }, { "epoch": 0.0217, "grad_norm": 0.44151372609355966, "learning_rate": 0.003, "loss": 4.2252, "step": 2170 }, { "epoch": 0.02171, "grad_norm": 0.4398868813684568, "learning_rate": 0.003, "loss": 4.238, "step": 2171 }, { "epoch": 0.02172, "grad_norm": 0.453127984813779, "learning_rate": 0.003, "loss": 4.2099, "step": 2172 }, { "epoch": 0.02173, "grad_norm": 0.4200137719224221, "learning_rate": 0.003, "loss": 4.2112, "step": 2173 }, { "epoch": 0.02174, "grad_norm": 0.40927864912838796, "learning_rate": 0.003, "loss": 4.2074, "step": 2174 }, { "epoch": 0.02175, "grad_norm": 0.37273652046629646, "learning_rate": 0.003, "loss": 4.2305, "step": 2175 }, { "epoch": 0.02176, "grad_norm": 0.3722266811459993, "learning_rate": 0.003, "loss": 4.2097, "step": 2176 }, { "epoch": 0.02177, "grad_norm": 0.43921830418385466, "learning_rate": 0.003, "loss": 4.2187, "step": 2177 }, { "epoch": 0.02178, "grad_norm": 0.5214363509579703, "learning_rate": 0.003, "loss": 4.1985, "step": 2178 }, { "epoch": 0.02179, "grad_norm": 0.8402711445332305, "learning_rate": 0.003, "loss": 4.2235, "step": 2179 }, { "epoch": 0.0218, "grad_norm": 1.247371610474746, "learning_rate": 0.003, "loss": 4.2635, "step": 2180 }, { "epoch": 0.02181, "grad_norm": 0.8116435611354782, "learning_rate": 0.003, "loss": 4.2484, "step": 2181 }, { "epoch": 0.02182, "grad_norm": 0.6837903180839037, "learning_rate": 0.003, "loss": 4.2355, "step": 2182 }, { "epoch": 0.02183, "grad_norm": 0.7005151877778275, "learning_rate": 0.003, "loss": 4.2476, "step": 2183 }, { "epoch": 0.02184, "grad_norm": 0.8236745566528523, "learning_rate": 0.003, "loss": 4.2265, "step": 2184 }, { "epoch": 0.02185, "grad_norm": 0.91418771570573, "learning_rate": 0.003, "loss": 4.2327, "step": 2185 }, { "epoch": 0.02186, "grad_norm": 0.8323760045804593, "learning_rate": 0.003, "loss": 4.2482, "step": 2186 }, { "epoch": 0.02187, "grad_norm": 0.8176331489387878, "learning_rate": 0.003, "loss": 4.245, "step": 2187 }, { "epoch": 0.02188, "grad_norm": 0.8776011343747697, "learning_rate": 0.003, "loss": 4.2325, "step": 2188 }, { "epoch": 0.02189, "grad_norm": 0.8972484956693035, "learning_rate": 0.003, "loss": 4.2667, "step": 2189 }, { "epoch": 0.0219, "grad_norm": 0.8431917439790871, "learning_rate": 0.003, "loss": 4.2584, "step": 2190 }, { "epoch": 0.02191, "grad_norm": 0.7416940909972495, "learning_rate": 0.003, "loss": 4.2439, "step": 2191 }, { "epoch": 0.02192, "grad_norm": 0.6213980069901923, "learning_rate": 0.003, "loss": 4.2676, "step": 2192 }, { "epoch": 0.02193, "grad_norm": 0.6597970415430513, "learning_rate": 0.003, "loss": 4.2343, "step": 2193 }, { "epoch": 0.02194, "grad_norm": 0.7061865144419822, "learning_rate": 0.003, "loss": 4.2832, "step": 2194 }, { "epoch": 0.02195, "grad_norm": 0.7006382568298751, "learning_rate": 0.003, "loss": 4.251, "step": 2195 }, { "epoch": 0.02196, "grad_norm": 0.6722854605002367, "learning_rate": 0.003, "loss": 4.2565, "step": 2196 }, { "epoch": 0.02197, "grad_norm": 0.6383221093399062, "learning_rate": 0.003, "loss": 4.2472, "step": 2197 }, { "epoch": 0.02198, "grad_norm": 0.6598256711875262, "learning_rate": 0.003, "loss": 4.2361, "step": 2198 }, { "epoch": 0.02199, "grad_norm": 0.6310822502739497, "learning_rate": 0.003, "loss": 4.2038, "step": 2199 }, { "epoch": 0.022, "grad_norm": 0.5807092706295028, "learning_rate": 0.003, "loss": 4.2307, "step": 2200 }, { "epoch": 0.02201, "grad_norm": 0.5164997329736423, "learning_rate": 0.003, "loss": 4.2429, "step": 2201 }, { "epoch": 0.02202, "grad_norm": 0.5150399093991901, "learning_rate": 0.003, "loss": 4.2262, "step": 2202 }, { "epoch": 0.02203, "grad_norm": 0.5301942341385608, "learning_rate": 0.003, "loss": 4.2546, "step": 2203 }, { "epoch": 0.02204, "grad_norm": 0.5633853956311704, "learning_rate": 0.003, "loss": 4.2484, "step": 2204 }, { "epoch": 0.02205, "grad_norm": 0.5924154634458846, "learning_rate": 0.003, "loss": 4.2516, "step": 2205 }, { "epoch": 0.02206, "grad_norm": 0.6443822475335407, "learning_rate": 0.003, "loss": 4.2165, "step": 2206 }, { "epoch": 0.02207, "grad_norm": 0.6602238711518633, "learning_rate": 0.003, "loss": 4.2293, "step": 2207 }, { "epoch": 0.02208, "grad_norm": 0.6586527270486826, "learning_rate": 0.003, "loss": 4.2124, "step": 2208 }, { "epoch": 0.02209, "grad_norm": 0.5991461642461526, "learning_rate": 0.003, "loss": 4.2276, "step": 2209 }, { "epoch": 0.0221, "grad_norm": 0.5268358370568649, "learning_rate": 0.003, "loss": 4.2071, "step": 2210 }, { "epoch": 0.02211, "grad_norm": 0.6149216818507237, "learning_rate": 0.003, "loss": 4.2195, "step": 2211 }, { "epoch": 0.02212, "grad_norm": 0.7463716016406938, "learning_rate": 0.003, "loss": 4.2226, "step": 2212 }, { "epoch": 0.02213, "grad_norm": 0.7963090983602238, "learning_rate": 0.003, "loss": 4.2341, "step": 2213 }, { "epoch": 0.02214, "grad_norm": 0.7746780288539415, "learning_rate": 0.003, "loss": 4.2283, "step": 2214 }, { "epoch": 0.02215, "grad_norm": 0.7305597710999601, "learning_rate": 0.003, "loss": 4.2338, "step": 2215 }, { "epoch": 0.02216, "grad_norm": 0.6802860521431111, "learning_rate": 0.003, "loss": 4.2097, "step": 2216 }, { "epoch": 0.02217, "grad_norm": 0.814645356813129, "learning_rate": 0.003, "loss": 4.2149, "step": 2217 }, { "epoch": 0.02218, "grad_norm": 0.8644777161103991, "learning_rate": 0.003, "loss": 4.2502, "step": 2218 }, { "epoch": 0.02219, "grad_norm": 0.8895266477572761, "learning_rate": 0.003, "loss": 4.2436, "step": 2219 }, { "epoch": 0.0222, "grad_norm": 0.8379487765766322, "learning_rate": 0.003, "loss": 4.241, "step": 2220 }, { "epoch": 0.02221, "grad_norm": 0.8832652718422841, "learning_rate": 0.003, "loss": 4.23, "step": 2221 }, { "epoch": 0.02222, "grad_norm": 0.7112453634526715, "learning_rate": 0.003, "loss": 4.2245, "step": 2222 }, { "epoch": 0.02223, "grad_norm": 0.5578145775957193, "learning_rate": 0.003, "loss": 4.2345, "step": 2223 }, { "epoch": 0.02224, "grad_norm": 0.5281644921526645, "learning_rate": 0.003, "loss": 4.216, "step": 2224 }, { "epoch": 0.02225, "grad_norm": 0.499018726060841, "learning_rate": 0.003, "loss": 4.216, "step": 2225 }, { "epoch": 0.02226, "grad_norm": 0.5772234482094156, "learning_rate": 0.003, "loss": 4.2362, "step": 2226 }, { "epoch": 0.02227, "grad_norm": 0.6481086517118151, "learning_rate": 0.003, "loss": 4.2311, "step": 2227 }, { "epoch": 0.02228, "grad_norm": 0.669454673605761, "learning_rate": 0.003, "loss": 4.2187, "step": 2228 }, { "epoch": 0.02229, "grad_norm": 0.677093243217859, "learning_rate": 0.003, "loss": 4.1992, "step": 2229 }, { "epoch": 0.0223, "grad_norm": 0.6785810316864852, "learning_rate": 0.003, "loss": 4.2386, "step": 2230 }, { "epoch": 0.02231, "grad_norm": 0.697493374925287, "learning_rate": 0.003, "loss": 4.2447, "step": 2231 }, { "epoch": 0.02232, "grad_norm": 0.8916380613416046, "learning_rate": 0.003, "loss": 4.2575, "step": 2232 }, { "epoch": 0.02233, "grad_norm": 1.0887336160767505, "learning_rate": 0.003, "loss": 4.2371, "step": 2233 }, { "epoch": 0.02234, "grad_norm": 0.654547980354846, "learning_rate": 0.003, "loss": 4.2454, "step": 2234 }, { "epoch": 0.02235, "grad_norm": 0.5125878831412668, "learning_rate": 0.003, "loss": 4.2654, "step": 2235 }, { "epoch": 0.02236, "grad_norm": 0.7208588554620924, "learning_rate": 0.003, "loss": 4.2213, "step": 2236 }, { "epoch": 0.02237, "grad_norm": 0.7459329466933863, "learning_rate": 0.003, "loss": 4.2569, "step": 2237 }, { "epoch": 0.02238, "grad_norm": 0.7047944961318278, "learning_rate": 0.003, "loss": 4.2359, "step": 2238 }, { "epoch": 0.02239, "grad_norm": 0.5989118371963864, "learning_rate": 0.003, "loss": 4.2173, "step": 2239 }, { "epoch": 0.0224, "grad_norm": 0.4980068818057585, "learning_rate": 0.003, "loss": 4.1972, "step": 2240 }, { "epoch": 0.02241, "grad_norm": 0.4765997364648245, "learning_rate": 0.003, "loss": 4.2278, "step": 2241 }, { "epoch": 0.02242, "grad_norm": 0.46816719473424767, "learning_rate": 0.003, "loss": 4.2111, "step": 2242 }, { "epoch": 0.02243, "grad_norm": 0.4356611881103323, "learning_rate": 0.003, "loss": 4.2383, "step": 2243 }, { "epoch": 0.02244, "grad_norm": 0.5122363302058777, "learning_rate": 0.003, "loss": 4.2149, "step": 2244 }, { "epoch": 0.02245, "grad_norm": 0.48883381472091325, "learning_rate": 0.003, "loss": 4.2031, "step": 2245 }, { "epoch": 0.02246, "grad_norm": 0.46925512406079145, "learning_rate": 0.003, "loss": 4.2303, "step": 2246 }, { "epoch": 0.02247, "grad_norm": 0.4305805801856768, "learning_rate": 0.003, "loss": 4.2025, "step": 2247 }, { "epoch": 0.02248, "grad_norm": 0.49466299772622346, "learning_rate": 0.003, "loss": 4.2113, "step": 2248 }, { "epoch": 0.02249, "grad_norm": 0.5526228295993102, "learning_rate": 0.003, "loss": 4.204, "step": 2249 }, { "epoch": 0.0225, "grad_norm": 0.5428701465777451, "learning_rate": 0.003, "loss": 4.219, "step": 2250 }, { "epoch": 0.02251, "grad_norm": 0.5748138491222456, "learning_rate": 0.003, "loss": 4.242, "step": 2251 }, { "epoch": 0.02252, "grad_norm": 0.5627410118560184, "learning_rate": 0.003, "loss": 4.2308, "step": 2252 }, { "epoch": 0.02253, "grad_norm": 0.5464235773202091, "learning_rate": 0.003, "loss": 4.2107, "step": 2253 }, { "epoch": 0.02254, "grad_norm": 0.5460056774038026, "learning_rate": 0.003, "loss": 4.2195, "step": 2254 }, { "epoch": 0.02255, "grad_norm": 0.5905579639478153, "learning_rate": 0.003, "loss": 4.2038, "step": 2255 }, { "epoch": 0.02256, "grad_norm": 0.5996630986155933, "learning_rate": 0.003, "loss": 4.1955, "step": 2256 }, { "epoch": 0.02257, "grad_norm": 0.5786721112589904, "learning_rate": 0.003, "loss": 4.2091, "step": 2257 }, { "epoch": 0.02258, "grad_norm": 0.6117786281389065, "learning_rate": 0.003, "loss": 4.2219, "step": 2258 }, { "epoch": 0.02259, "grad_norm": 0.6324462778570742, "learning_rate": 0.003, "loss": 4.1884, "step": 2259 }, { "epoch": 0.0226, "grad_norm": 0.5884786495082438, "learning_rate": 0.003, "loss": 4.2409, "step": 2260 }, { "epoch": 0.02261, "grad_norm": 0.57968484048356, "learning_rate": 0.003, "loss": 4.2168, "step": 2261 }, { "epoch": 0.02262, "grad_norm": 0.6074577050747278, "learning_rate": 0.003, "loss": 4.2183, "step": 2262 }, { "epoch": 0.02263, "grad_norm": 0.7175518494233851, "learning_rate": 0.003, "loss": 4.237, "step": 2263 }, { "epoch": 0.02264, "grad_norm": 0.7379078827688124, "learning_rate": 0.003, "loss": 4.2337, "step": 2264 }, { "epoch": 0.02265, "grad_norm": 0.8347809834095073, "learning_rate": 0.003, "loss": 4.194, "step": 2265 }, { "epoch": 0.02266, "grad_norm": 0.9870112085605878, "learning_rate": 0.003, "loss": 4.2201, "step": 2266 }, { "epoch": 0.02267, "grad_norm": 1.1653841189257785, "learning_rate": 0.003, "loss": 4.2605, "step": 2267 }, { "epoch": 0.02268, "grad_norm": 0.7766778764734964, "learning_rate": 0.003, "loss": 4.2198, "step": 2268 }, { "epoch": 0.02269, "grad_norm": 0.7319600539673422, "learning_rate": 0.003, "loss": 4.2435, "step": 2269 }, { "epoch": 0.0227, "grad_norm": 0.8988269964618911, "learning_rate": 0.003, "loss": 4.2441, "step": 2270 }, { "epoch": 0.02271, "grad_norm": 1.0408382709602264, "learning_rate": 0.003, "loss": 4.2637, "step": 2271 }, { "epoch": 0.02272, "grad_norm": 1.0101614929632108, "learning_rate": 0.003, "loss": 4.2626, "step": 2272 }, { "epoch": 0.02273, "grad_norm": 0.8989352397704572, "learning_rate": 0.003, "loss": 4.2643, "step": 2273 }, { "epoch": 0.02274, "grad_norm": 0.68535867805162, "learning_rate": 0.003, "loss": 4.2412, "step": 2274 }, { "epoch": 0.02275, "grad_norm": 0.6683944403644445, "learning_rate": 0.003, "loss": 4.2325, "step": 2275 }, { "epoch": 0.02276, "grad_norm": 0.6494300645141144, "learning_rate": 0.003, "loss": 4.2263, "step": 2276 }, { "epoch": 0.02277, "grad_norm": 0.6973670182968851, "learning_rate": 0.003, "loss": 4.2382, "step": 2277 }, { "epoch": 0.02278, "grad_norm": 0.7902320444222205, "learning_rate": 0.003, "loss": 4.2463, "step": 2278 }, { "epoch": 0.02279, "grad_norm": 0.8146336114310585, "learning_rate": 0.003, "loss": 4.2598, "step": 2279 }, { "epoch": 0.0228, "grad_norm": 0.8796339021642547, "learning_rate": 0.003, "loss": 4.2507, "step": 2280 }, { "epoch": 0.02281, "grad_norm": 0.8184015106359132, "learning_rate": 0.003, "loss": 4.2484, "step": 2281 }, { "epoch": 0.02282, "grad_norm": 0.6892747359251186, "learning_rate": 0.003, "loss": 4.2524, "step": 2282 }, { "epoch": 0.02283, "grad_norm": 0.6565456722535347, "learning_rate": 0.003, "loss": 4.2659, "step": 2283 }, { "epoch": 0.02284, "grad_norm": 0.5726981092319336, "learning_rate": 0.003, "loss": 4.2571, "step": 2284 }, { "epoch": 0.02285, "grad_norm": 0.5302500755986935, "learning_rate": 0.003, "loss": 4.1961, "step": 2285 }, { "epoch": 0.02286, "grad_norm": 0.5726425860790162, "learning_rate": 0.003, "loss": 4.2575, "step": 2286 }, { "epoch": 0.02287, "grad_norm": 0.5919298156896359, "learning_rate": 0.003, "loss": 4.2367, "step": 2287 }, { "epoch": 0.02288, "grad_norm": 0.5989015507067659, "learning_rate": 0.003, "loss": 4.2599, "step": 2288 }, { "epoch": 0.02289, "grad_norm": 0.6577684390532245, "learning_rate": 0.003, "loss": 4.2586, "step": 2289 }, { "epoch": 0.0229, "grad_norm": 0.8096348684056334, "learning_rate": 0.003, "loss": 4.2424, "step": 2290 }, { "epoch": 0.02291, "grad_norm": 0.8395348686742803, "learning_rate": 0.003, "loss": 4.2207, "step": 2291 }, { "epoch": 0.02292, "grad_norm": 0.6824435433690341, "learning_rate": 0.003, "loss": 4.2133, "step": 2292 }, { "epoch": 0.02293, "grad_norm": 0.5182235922586542, "learning_rate": 0.003, "loss": 4.2116, "step": 2293 }, { "epoch": 0.02294, "grad_norm": 0.5893445423849215, "learning_rate": 0.003, "loss": 4.2139, "step": 2294 }, { "epoch": 0.02295, "grad_norm": 0.5909258163581438, "learning_rate": 0.003, "loss": 4.2083, "step": 2295 }, { "epoch": 0.02296, "grad_norm": 0.49380609648697416, "learning_rate": 0.003, "loss": 4.2381, "step": 2296 }, { "epoch": 0.02297, "grad_norm": 0.45509453612267814, "learning_rate": 0.003, "loss": 4.2063, "step": 2297 }, { "epoch": 0.02298, "grad_norm": 0.39795367258729464, "learning_rate": 0.003, "loss": 4.2154, "step": 2298 }, { "epoch": 0.02299, "grad_norm": 0.41864078809034155, "learning_rate": 0.003, "loss": 4.2206, "step": 2299 }, { "epoch": 0.023, "grad_norm": 0.45486028161368913, "learning_rate": 0.003, "loss": 4.2385, "step": 2300 }, { "epoch": 0.02301, "grad_norm": 0.4383928009279485, "learning_rate": 0.003, "loss": 4.2022, "step": 2301 }, { "epoch": 0.02302, "grad_norm": 0.4499222406460722, "learning_rate": 0.003, "loss": 4.2089, "step": 2302 }, { "epoch": 0.02303, "grad_norm": 0.4500683500939326, "learning_rate": 0.003, "loss": 4.1978, "step": 2303 }, { "epoch": 0.02304, "grad_norm": 0.4684736001381324, "learning_rate": 0.003, "loss": 4.2294, "step": 2304 }, { "epoch": 0.02305, "grad_norm": 0.4868306442044598, "learning_rate": 0.003, "loss": 4.2051, "step": 2305 }, { "epoch": 0.02306, "grad_norm": 0.5334764519084825, "learning_rate": 0.003, "loss": 4.1822, "step": 2306 }, { "epoch": 0.02307, "grad_norm": 0.6492467780995748, "learning_rate": 0.003, "loss": 4.2166, "step": 2307 }, { "epoch": 0.02308, "grad_norm": 0.7095262909438931, "learning_rate": 0.003, "loss": 4.2125, "step": 2308 }, { "epoch": 0.02309, "grad_norm": 0.54863336292105, "learning_rate": 0.003, "loss": 4.2349, "step": 2309 }, { "epoch": 0.0231, "grad_norm": 0.5340829751385735, "learning_rate": 0.003, "loss": 4.2136, "step": 2310 }, { "epoch": 0.02311, "grad_norm": 0.6450529599308674, "learning_rate": 0.003, "loss": 4.2222, "step": 2311 }, { "epoch": 0.02312, "grad_norm": 0.820530552930021, "learning_rate": 0.003, "loss": 4.2023, "step": 2312 }, { "epoch": 0.02313, "grad_norm": 0.8724275505071452, "learning_rate": 0.003, "loss": 4.2036, "step": 2313 }, { "epoch": 0.02314, "grad_norm": 0.7571797533207066, "learning_rate": 0.003, "loss": 4.2199, "step": 2314 }, { "epoch": 0.02315, "grad_norm": 0.6953439594266967, "learning_rate": 0.003, "loss": 4.221, "step": 2315 }, { "epoch": 0.02316, "grad_norm": 0.7680069713239076, "learning_rate": 0.003, "loss": 4.2379, "step": 2316 }, { "epoch": 0.02317, "grad_norm": 0.8161604949475788, "learning_rate": 0.003, "loss": 4.223, "step": 2317 }, { "epoch": 0.02318, "grad_norm": 0.7516223217475189, "learning_rate": 0.003, "loss": 4.2136, "step": 2318 }, { "epoch": 0.02319, "grad_norm": 0.7000350308630269, "learning_rate": 0.003, "loss": 4.2358, "step": 2319 }, { "epoch": 0.0232, "grad_norm": 0.7134745769645445, "learning_rate": 0.003, "loss": 4.2452, "step": 2320 }, { "epoch": 0.02321, "grad_norm": 0.771931890706088, "learning_rate": 0.003, "loss": 4.2279, "step": 2321 }, { "epoch": 0.02322, "grad_norm": 0.9854722178210035, "learning_rate": 0.003, "loss": 4.2409, "step": 2322 }, { "epoch": 0.02323, "grad_norm": 0.9767405875240949, "learning_rate": 0.003, "loss": 4.2246, "step": 2323 }, { "epoch": 0.02324, "grad_norm": 0.8048742564770486, "learning_rate": 0.003, "loss": 4.2207, "step": 2324 }, { "epoch": 0.02325, "grad_norm": 0.7747178630515028, "learning_rate": 0.003, "loss": 4.242, "step": 2325 }, { "epoch": 0.02326, "grad_norm": 0.7969057026631506, "learning_rate": 0.003, "loss": 4.2598, "step": 2326 }, { "epoch": 0.02327, "grad_norm": 0.8966295433578796, "learning_rate": 0.003, "loss": 4.2563, "step": 2327 }, { "epoch": 0.02328, "grad_norm": 0.7936452621630464, "learning_rate": 0.003, "loss": 4.2524, "step": 2328 }, { "epoch": 0.02329, "grad_norm": 0.6876802756045969, "learning_rate": 0.003, "loss": 4.2386, "step": 2329 }, { "epoch": 0.0233, "grad_norm": 0.7923806233690935, "learning_rate": 0.003, "loss": 4.2331, "step": 2330 }, { "epoch": 0.02331, "grad_norm": 0.8629660125432402, "learning_rate": 0.003, "loss": 4.278, "step": 2331 }, { "epoch": 0.02332, "grad_norm": 0.8224989442560882, "learning_rate": 0.003, "loss": 4.2382, "step": 2332 }, { "epoch": 0.02333, "grad_norm": 0.7543416736322919, "learning_rate": 0.003, "loss": 4.2433, "step": 2333 }, { "epoch": 0.02334, "grad_norm": 0.6971129018109938, "learning_rate": 0.003, "loss": 4.209, "step": 2334 }, { "epoch": 0.02335, "grad_norm": 0.7296913197019357, "learning_rate": 0.003, "loss": 4.2519, "step": 2335 }, { "epoch": 0.02336, "grad_norm": 0.7163909054433683, "learning_rate": 0.003, "loss": 4.2383, "step": 2336 }, { "epoch": 0.02337, "grad_norm": 0.6354188850134015, "learning_rate": 0.003, "loss": 4.2384, "step": 2337 }, { "epoch": 0.02338, "grad_norm": 0.7380334344563654, "learning_rate": 0.003, "loss": 4.2287, "step": 2338 }, { "epoch": 0.02339, "grad_norm": 0.6731414857527438, "learning_rate": 0.003, "loss": 4.2221, "step": 2339 }, { "epoch": 0.0234, "grad_norm": 0.5402269466528953, "learning_rate": 0.003, "loss": 4.2314, "step": 2340 }, { "epoch": 0.02341, "grad_norm": 0.442581637819893, "learning_rate": 0.003, "loss": 4.2031, "step": 2341 }, { "epoch": 0.02342, "grad_norm": 0.37485389657180546, "learning_rate": 0.003, "loss": 4.2138, "step": 2342 }, { "epoch": 0.02343, "grad_norm": 0.3841947544396341, "learning_rate": 0.003, "loss": 4.213, "step": 2343 }, { "epoch": 0.02344, "grad_norm": 0.37024132956414024, "learning_rate": 0.003, "loss": 4.2104, "step": 2344 }, { "epoch": 0.02345, "grad_norm": 0.3532869549675326, "learning_rate": 0.003, "loss": 4.2035, "step": 2345 }, { "epoch": 0.02346, "grad_norm": 0.3470711196570803, "learning_rate": 0.003, "loss": 4.1998, "step": 2346 }, { "epoch": 0.02347, "grad_norm": 0.4249636779242577, "learning_rate": 0.003, "loss": 4.2374, "step": 2347 }, { "epoch": 0.02348, "grad_norm": 0.5336097138298947, "learning_rate": 0.003, "loss": 4.2032, "step": 2348 }, { "epoch": 0.02349, "grad_norm": 0.7560598476745078, "learning_rate": 0.003, "loss": 4.2252, "step": 2349 }, { "epoch": 0.0235, "grad_norm": 0.9366290665402001, "learning_rate": 0.003, "loss": 4.2316, "step": 2350 }, { "epoch": 0.02351, "grad_norm": 0.8241230319323669, "learning_rate": 0.003, "loss": 4.2392, "step": 2351 }, { "epoch": 0.02352, "grad_norm": 0.4838312119944649, "learning_rate": 0.003, "loss": 4.1838, "step": 2352 }, { "epoch": 0.02353, "grad_norm": 0.5962852237534145, "learning_rate": 0.003, "loss": 4.2218, "step": 2353 }, { "epoch": 0.02354, "grad_norm": 0.6656896770625591, "learning_rate": 0.003, "loss": 4.2262, "step": 2354 }, { "epoch": 0.02355, "grad_norm": 0.5663365054891429, "learning_rate": 0.003, "loss": 4.2216, "step": 2355 }, { "epoch": 0.02356, "grad_norm": 0.4266142835453117, "learning_rate": 0.003, "loss": 4.1971, "step": 2356 }, { "epoch": 0.02357, "grad_norm": 0.4433400684484211, "learning_rate": 0.003, "loss": 4.1998, "step": 2357 }, { "epoch": 0.02358, "grad_norm": 0.5052976773226068, "learning_rate": 0.003, "loss": 4.2226, "step": 2358 }, { "epoch": 0.02359, "grad_norm": 0.45316233898993824, "learning_rate": 0.003, "loss": 4.1954, "step": 2359 }, { "epoch": 0.0236, "grad_norm": 0.4014671488900521, "learning_rate": 0.003, "loss": 4.2325, "step": 2360 }, { "epoch": 0.02361, "grad_norm": 0.36698656753000536, "learning_rate": 0.003, "loss": 4.212, "step": 2361 }, { "epoch": 0.02362, "grad_norm": 0.42314800926533436, "learning_rate": 0.003, "loss": 4.2053, "step": 2362 }, { "epoch": 0.02363, "grad_norm": 0.4217103245805847, "learning_rate": 0.003, "loss": 4.1792, "step": 2363 }, { "epoch": 0.02364, "grad_norm": 0.4210664479788169, "learning_rate": 0.003, "loss": 4.2013, "step": 2364 }, { "epoch": 0.02365, "grad_norm": 0.4933666084308532, "learning_rate": 0.003, "loss": 4.2208, "step": 2365 }, { "epoch": 0.02366, "grad_norm": 0.5419476181345154, "learning_rate": 0.003, "loss": 4.1974, "step": 2366 }, { "epoch": 0.02367, "grad_norm": 0.5315884871641553, "learning_rate": 0.003, "loss": 4.2086, "step": 2367 }, { "epoch": 0.02368, "grad_norm": 0.5513031505324636, "learning_rate": 0.003, "loss": 4.1821, "step": 2368 }, { "epoch": 0.02369, "grad_norm": 0.6237648160785263, "learning_rate": 0.003, "loss": 4.1978, "step": 2369 }, { "epoch": 0.0237, "grad_norm": 0.7305017234045693, "learning_rate": 0.003, "loss": 4.2311, "step": 2370 }, { "epoch": 0.02371, "grad_norm": 0.7544509646548179, "learning_rate": 0.003, "loss": 4.2181, "step": 2371 }, { "epoch": 0.02372, "grad_norm": 0.8927671954437898, "learning_rate": 0.003, "loss": 4.2125, "step": 2372 }, { "epoch": 0.02373, "grad_norm": 0.8832047313566855, "learning_rate": 0.003, "loss": 4.2061, "step": 2373 }, { "epoch": 0.02374, "grad_norm": 0.9298955170309446, "learning_rate": 0.003, "loss": 4.2122, "step": 2374 }, { "epoch": 0.02375, "grad_norm": 0.8640645189121525, "learning_rate": 0.003, "loss": 4.2188, "step": 2375 }, { "epoch": 0.02376, "grad_norm": 0.8134531891441159, "learning_rate": 0.003, "loss": 4.2414, "step": 2376 }, { "epoch": 0.02377, "grad_norm": 0.8309736476293886, "learning_rate": 0.003, "loss": 4.2122, "step": 2377 }, { "epoch": 0.02378, "grad_norm": 0.8951590746635169, "learning_rate": 0.003, "loss": 4.2274, "step": 2378 }, { "epoch": 0.02379, "grad_norm": 0.8950538453176826, "learning_rate": 0.003, "loss": 4.2181, "step": 2379 }, { "epoch": 0.0238, "grad_norm": 0.8724035600141309, "learning_rate": 0.003, "loss": 4.1913, "step": 2380 }, { "epoch": 0.02381, "grad_norm": 0.7988879477893653, "learning_rate": 0.003, "loss": 4.2027, "step": 2381 }, { "epoch": 0.02382, "grad_norm": 0.7723567017862097, "learning_rate": 0.003, "loss": 4.2578, "step": 2382 }, { "epoch": 0.02383, "grad_norm": 0.671665890468045, "learning_rate": 0.003, "loss": 4.2171, "step": 2383 }, { "epoch": 0.02384, "grad_norm": 0.6389101019379912, "learning_rate": 0.003, "loss": 4.2056, "step": 2384 }, { "epoch": 0.02385, "grad_norm": 0.580627279569301, "learning_rate": 0.003, "loss": 4.2294, "step": 2385 }, { "epoch": 0.02386, "grad_norm": 0.6112201616439135, "learning_rate": 0.003, "loss": 4.2052, "step": 2386 }, { "epoch": 0.02387, "grad_norm": 0.609528097348241, "learning_rate": 0.003, "loss": 4.2032, "step": 2387 }, { "epoch": 0.02388, "grad_norm": 0.6686638362844006, "learning_rate": 0.003, "loss": 4.2564, "step": 2388 }, { "epoch": 0.02389, "grad_norm": 0.740840757442375, "learning_rate": 0.003, "loss": 4.2206, "step": 2389 }, { "epoch": 0.0239, "grad_norm": 0.795817868194079, "learning_rate": 0.003, "loss": 4.2173, "step": 2390 }, { "epoch": 0.02391, "grad_norm": 0.6885254297129437, "learning_rate": 0.003, "loss": 4.2366, "step": 2391 }, { "epoch": 0.02392, "grad_norm": 0.57208190101736, "learning_rate": 0.003, "loss": 4.2339, "step": 2392 }, { "epoch": 0.02393, "grad_norm": 0.6766423034488698, "learning_rate": 0.003, "loss": 4.2001, "step": 2393 }, { "epoch": 0.02394, "grad_norm": 0.7708917314903707, "learning_rate": 0.003, "loss": 4.219, "step": 2394 }, { "epoch": 0.02395, "grad_norm": 0.8419139355655553, "learning_rate": 0.003, "loss": 4.234, "step": 2395 }, { "epoch": 0.02396, "grad_norm": 0.7491203962086143, "learning_rate": 0.003, "loss": 4.2487, "step": 2396 }, { "epoch": 0.02397, "grad_norm": 0.6046499284946664, "learning_rate": 0.003, "loss": 4.2249, "step": 2397 }, { "epoch": 0.02398, "grad_norm": 0.654682549696845, "learning_rate": 0.003, "loss": 4.2309, "step": 2398 }, { "epoch": 0.02399, "grad_norm": 0.600390647492162, "learning_rate": 0.003, "loss": 4.209, "step": 2399 }, { "epoch": 0.024, "grad_norm": 0.5155181636394677, "learning_rate": 0.003, "loss": 4.2198, "step": 2400 }, { "epoch": 0.02401, "grad_norm": 0.48968723675185105, "learning_rate": 0.003, "loss": 4.1921, "step": 2401 }, { "epoch": 0.02402, "grad_norm": 0.5172416496163427, "learning_rate": 0.003, "loss": 4.2245, "step": 2402 }, { "epoch": 0.02403, "grad_norm": 0.5647625943348626, "learning_rate": 0.003, "loss": 4.1949, "step": 2403 }, { "epoch": 0.02404, "grad_norm": 0.5032714506791921, "learning_rate": 0.003, "loss": 4.233, "step": 2404 }, { "epoch": 0.02405, "grad_norm": 0.4675987670630769, "learning_rate": 0.003, "loss": 4.2334, "step": 2405 }, { "epoch": 0.02406, "grad_norm": 0.5178645102430114, "learning_rate": 0.003, "loss": 4.2192, "step": 2406 }, { "epoch": 0.02407, "grad_norm": 0.5559303502201767, "learning_rate": 0.003, "loss": 4.1976, "step": 2407 }, { "epoch": 0.02408, "grad_norm": 0.551443134128045, "learning_rate": 0.003, "loss": 4.2064, "step": 2408 }, { "epoch": 0.02409, "grad_norm": 0.5057926630793051, "learning_rate": 0.003, "loss": 4.2079, "step": 2409 }, { "epoch": 0.0241, "grad_norm": 0.6021866868934532, "learning_rate": 0.003, "loss": 4.2249, "step": 2410 }, { "epoch": 0.02411, "grad_norm": 0.7613042549642509, "learning_rate": 0.003, "loss": 4.1979, "step": 2411 }, { "epoch": 0.02412, "grad_norm": 0.7823591519195993, "learning_rate": 0.003, "loss": 4.2054, "step": 2412 }, { "epoch": 0.02413, "grad_norm": 0.6762814430739601, "learning_rate": 0.003, "loss": 4.2005, "step": 2413 }, { "epoch": 0.02414, "grad_norm": 0.6515291647943621, "learning_rate": 0.003, "loss": 4.2148, "step": 2414 }, { "epoch": 0.02415, "grad_norm": 0.5905595191694923, "learning_rate": 0.003, "loss": 4.2125, "step": 2415 }, { "epoch": 0.02416, "grad_norm": 0.5267459284750431, "learning_rate": 0.003, "loss": 4.1927, "step": 2416 }, { "epoch": 0.02417, "grad_norm": 0.6612447088265828, "learning_rate": 0.003, "loss": 4.2248, "step": 2417 }, { "epoch": 0.02418, "grad_norm": 0.7551414883340356, "learning_rate": 0.003, "loss": 4.2234, "step": 2418 }, { "epoch": 0.02419, "grad_norm": 0.8205697648408953, "learning_rate": 0.003, "loss": 4.2244, "step": 2419 }, { "epoch": 0.0242, "grad_norm": 0.8899430556748202, "learning_rate": 0.003, "loss": 4.2436, "step": 2420 }, { "epoch": 0.02421, "grad_norm": 0.7830186936394068, "learning_rate": 0.003, "loss": 4.2007, "step": 2421 }, { "epoch": 0.02422, "grad_norm": 0.707149933198692, "learning_rate": 0.003, "loss": 4.2528, "step": 2422 }, { "epoch": 0.02423, "grad_norm": 0.6635737571843986, "learning_rate": 0.003, "loss": 4.2424, "step": 2423 }, { "epoch": 0.02424, "grad_norm": 0.6518995770283964, "learning_rate": 0.003, "loss": 4.2374, "step": 2424 }, { "epoch": 0.02425, "grad_norm": 0.6795937334698865, "learning_rate": 0.003, "loss": 4.2233, "step": 2425 }, { "epoch": 0.02426, "grad_norm": 0.7581646116983769, "learning_rate": 0.003, "loss": 4.2315, "step": 2426 }, { "epoch": 0.02427, "grad_norm": 0.9154720690249456, "learning_rate": 0.003, "loss": 4.2647, "step": 2427 }, { "epoch": 0.02428, "grad_norm": 0.9816907611239863, "learning_rate": 0.003, "loss": 4.2433, "step": 2428 }, { "epoch": 0.02429, "grad_norm": 0.9289957548466957, "learning_rate": 0.003, "loss": 4.2347, "step": 2429 }, { "epoch": 0.0243, "grad_norm": 0.6787755121793266, "learning_rate": 0.003, "loss": 4.2464, "step": 2430 }, { "epoch": 0.02431, "grad_norm": 0.7835751457530344, "learning_rate": 0.003, "loss": 4.2265, "step": 2431 }, { "epoch": 0.02432, "grad_norm": 0.7857412874952409, "learning_rate": 0.003, "loss": 4.2512, "step": 2432 }, { "epoch": 0.02433, "grad_norm": 0.7902925766285801, "learning_rate": 0.003, "loss": 4.263, "step": 2433 }, { "epoch": 0.02434, "grad_norm": 0.6888559592034768, "learning_rate": 0.003, "loss": 4.2265, "step": 2434 }, { "epoch": 0.02435, "grad_norm": 0.7021573732138202, "learning_rate": 0.003, "loss": 4.2272, "step": 2435 }, { "epoch": 0.02436, "grad_norm": 0.7401895262850188, "learning_rate": 0.003, "loss": 4.235, "step": 2436 }, { "epoch": 0.02437, "grad_norm": 0.6162969571454292, "learning_rate": 0.003, "loss": 4.2159, "step": 2437 }, { "epoch": 0.02438, "grad_norm": 0.5086196755044915, "learning_rate": 0.003, "loss": 4.2268, "step": 2438 }, { "epoch": 0.02439, "grad_norm": 0.47970313870881265, "learning_rate": 0.003, "loss": 4.1957, "step": 2439 }, { "epoch": 0.0244, "grad_norm": 0.4288371919581958, "learning_rate": 0.003, "loss": 4.2421, "step": 2440 }, { "epoch": 0.02441, "grad_norm": 0.38882426885049926, "learning_rate": 0.003, "loss": 4.2052, "step": 2441 }, { "epoch": 0.02442, "grad_norm": 0.4241944568677858, "learning_rate": 0.003, "loss": 4.2386, "step": 2442 }, { "epoch": 0.02443, "grad_norm": 0.4820755060057781, "learning_rate": 0.003, "loss": 4.1743, "step": 2443 }, { "epoch": 0.02444, "grad_norm": 0.620820199570667, "learning_rate": 0.003, "loss": 4.2049, "step": 2444 }, { "epoch": 0.02445, "grad_norm": 0.8695985731954037, "learning_rate": 0.003, "loss": 4.2178, "step": 2445 }, { "epoch": 0.02446, "grad_norm": 0.93656467702838, "learning_rate": 0.003, "loss": 4.2301, "step": 2446 }, { "epoch": 0.02447, "grad_norm": 0.7193216075423335, "learning_rate": 0.003, "loss": 4.2297, "step": 2447 }, { "epoch": 0.02448, "grad_norm": 0.77368716084308, "learning_rate": 0.003, "loss": 4.2355, "step": 2448 }, { "epoch": 0.02449, "grad_norm": 0.93482214454163, "learning_rate": 0.003, "loss": 4.2378, "step": 2449 }, { "epoch": 0.0245, "grad_norm": 0.7890224806153067, "learning_rate": 0.003, "loss": 4.2333, "step": 2450 }, { "epoch": 0.02451, "grad_norm": 0.6715615912076842, "learning_rate": 0.003, "loss": 4.2085, "step": 2451 }, { "epoch": 0.02452, "grad_norm": 0.6401183815147788, "learning_rate": 0.003, "loss": 4.2138, "step": 2452 }, { "epoch": 0.02453, "grad_norm": 0.5995798001531515, "learning_rate": 0.003, "loss": 4.1908, "step": 2453 }, { "epoch": 0.02454, "grad_norm": 0.5267826981306184, "learning_rate": 0.003, "loss": 4.1933, "step": 2454 }, { "epoch": 0.02455, "grad_norm": 0.4821819525696827, "learning_rate": 0.003, "loss": 4.2032, "step": 2455 }, { "epoch": 0.02456, "grad_norm": 0.5089117196246842, "learning_rate": 0.003, "loss": 4.1693, "step": 2456 }, { "epoch": 0.02457, "grad_norm": 0.5510451022091881, "learning_rate": 0.003, "loss": 4.2212, "step": 2457 }, { "epoch": 0.02458, "grad_norm": 0.48930227370319684, "learning_rate": 0.003, "loss": 4.1938, "step": 2458 }, { "epoch": 0.02459, "grad_norm": 0.4331220174779214, "learning_rate": 0.003, "loss": 4.2193, "step": 2459 }, { "epoch": 0.0246, "grad_norm": 0.41158812351788215, "learning_rate": 0.003, "loss": 4.2215, "step": 2460 }, { "epoch": 0.02461, "grad_norm": 0.39108806608008795, "learning_rate": 0.003, "loss": 4.1964, "step": 2461 }, { "epoch": 0.02462, "grad_norm": 0.3754314162918144, "learning_rate": 0.003, "loss": 4.2216, "step": 2462 }, { "epoch": 0.02463, "grad_norm": 0.4122698662760153, "learning_rate": 0.003, "loss": 4.2201, "step": 2463 }, { "epoch": 0.02464, "grad_norm": 0.4350495849540257, "learning_rate": 0.003, "loss": 4.1992, "step": 2464 }, { "epoch": 0.02465, "grad_norm": 0.49445669847439655, "learning_rate": 0.003, "loss": 4.2175, "step": 2465 }, { "epoch": 0.02466, "grad_norm": 0.5818409584444987, "learning_rate": 0.003, "loss": 4.1996, "step": 2466 }, { "epoch": 0.02467, "grad_norm": 0.6235372640193676, "learning_rate": 0.003, "loss": 4.1803, "step": 2467 }, { "epoch": 0.02468, "grad_norm": 0.5927407776473445, "learning_rate": 0.003, "loss": 4.223, "step": 2468 }, { "epoch": 0.02469, "grad_norm": 0.6223619407097589, "learning_rate": 0.003, "loss": 4.2028, "step": 2469 }, { "epoch": 0.0247, "grad_norm": 0.583979309128103, "learning_rate": 0.003, "loss": 4.2, "step": 2470 }, { "epoch": 0.02471, "grad_norm": 0.5491918199641306, "learning_rate": 0.003, "loss": 4.1836, "step": 2471 }, { "epoch": 0.02472, "grad_norm": 0.635376629969627, "learning_rate": 0.003, "loss": 4.1909, "step": 2472 }, { "epoch": 0.02473, "grad_norm": 0.9426912757457121, "learning_rate": 0.003, "loss": 4.1862, "step": 2473 }, { "epoch": 0.02474, "grad_norm": 1.1494984995182613, "learning_rate": 0.003, "loss": 4.2384, "step": 2474 }, { "epoch": 0.02475, "grad_norm": 0.6832524660748021, "learning_rate": 0.003, "loss": 4.1958, "step": 2475 }, { "epoch": 0.02476, "grad_norm": 0.6659288683302277, "learning_rate": 0.003, "loss": 4.2064, "step": 2476 }, { "epoch": 0.02477, "grad_norm": 0.7078551330691552, "learning_rate": 0.003, "loss": 4.229, "step": 2477 }, { "epoch": 0.02478, "grad_norm": 0.682879510763541, "learning_rate": 0.003, "loss": 4.2039, "step": 2478 }, { "epoch": 0.02479, "grad_norm": 0.6453701850218885, "learning_rate": 0.003, "loss": 4.1922, "step": 2479 }, { "epoch": 0.0248, "grad_norm": 0.6214835257281583, "learning_rate": 0.003, "loss": 4.2323, "step": 2480 }, { "epoch": 0.02481, "grad_norm": 0.5814705370549468, "learning_rate": 0.003, "loss": 4.2104, "step": 2481 }, { "epoch": 0.02482, "grad_norm": 0.5717194029886628, "learning_rate": 0.003, "loss": 4.2206, "step": 2482 }, { "epoch": 0.02483, "grad_norm": 0.5507096505833119, "learning_rate": 0.003, "loss": 4.1741, "step": 2483 }, { "epoch": 0.02484, "grad_norm": 0.48818940634137703, "learning_rate": 0.003, "loss": 4.2, "step": 2484 }, { "epoch": 0.02485, "grad_norm": 0.5078768442645458, "learning_rate": 0.003, "loss": 4.2218, "step": 2485 }, { "epoch": 0.02486, "grad_norm": 0.6158842388358957, "learning_rate": 0.003, "loss": 4.2483, "step": 2486 }, { "epoch": 0.02487, "grad_norm": 0.7113530958465855, "learning_rate": 0.003, "loss": 4.2057, "step": 2487 }, { "epoch": 0.02488, "grad_norm": 0.8543927391668017, "learning_rate": 0.003, "loss": 4.1886, "step": 2488 }, { "epoch": 0.02489, "grad_norm": 0.9446890974964458, "learning_rate": 0.003, "loss": 4.1911, "step": 2489 }, { "epoch": 0.0249, "grad_norm": 0.7778085412828454, "learning_rate": 0.003, "loss": 4.1871, "step": 2490 }, { "epoch": 0.02491, "grad_norm": 0.6481694251808287, "learning_rate": 0.003, "loss": 4.2111, "step": 2491 }, { "epoch": 0.02492, "grad_norm": 0.6221254984926949, "learning_rate": 0.003, "loss": 4.2219, "step": 2492 }, { "epoch": 0.02493, "grad_norm": 0.6659856187784612, "learning_rate": 0.003, "loss": 4.1927, "step": 2493 }, { "epoch": 0.02494, "grad_norm": 0.6928875164931092, "learning_rate": 0.003, "loss": 4.206, "step": 2494 }, { "epoch": 0.02495, "grad_norm": 0.6666259038056425, "learning_rate": 0.003, "loss": 4.2171, "step": 2495 }, { "epoch": 0.02496, "grad_norm": 0.6388189581951018, "learning_rate": 0.003, "loss": 4.2077, "step": 2496 }, { "epoch": 0.02497, "grad_norm": 0.6011544202060304, "learning_rate": 0.003, "loss": 4.2109, "step": 2497 }, { "epoch": 0.02498, "grad_norm": 0.6445306947945534, "learning_rate": 0.003, "loss": 4.1989, "step": 2498 }, { "epoch": 0.02499, "grad_norm": 0.6336412786289282, "learning_rate": 0.003, "loss": 4.2101, "step": 2499 }, { "epoch": 0.025, "grad_norm": 0.7751683320425504, "learning_rate": 0.003, "loss": 4.1983, "step": 2500 }, { "epoch": 0.02501, "grad_norm": 0.8226642072050464, "learning_rate": 0.003, "loss": 4.2348, "step": 2501 }, { "epoch": 0.02502, "grad_norm": 0.7525954653781657, "learning_rate": 0.003, "loss": 4.2151, "step": 2502 }, { "epoch": 0.02503, "grad_norm": 0.6443708527662554, "learning_rate": 0.003, "loss": 4.22, "step": 2503 }, { "epoch": 0.02504, "grad_norm": 0.7431879561442126, "learning_rate": 0.003, "loss": 4.2035, "step": 2504 }, { "epoch": 0.02505, "grad_norm": 0.8444858068379042, "learning_rate": 0.003, "loss": 4.206, "step": 2505 }, { "epoch": 0.02506, "grad_norm": 0.8357688056745337, "learning_rate": 0.003, "loss": 4.2242, "step": 2506 }, { "epoch": 0.02507, "grad_norm": 0.8015204150666452, "learning_rate": 0.003, "loss": 4.21, "step": 2507 }, { "epoch": 0.02508, "grad_norm": 0.7988221880335916, "learning_rate": 0.003, "loss": 4.2096, "step": 2508 }, { "epoch": 0.02509, "grad_norm": 0.8648254923532768, "learning_rate": 0.003, "loss": 4.2197, "step": 2509 }, { "epoch": 0.0251, "grad_norm": 0.9052649096369385, "learning_rate": 0.003, "loss": 4.2352, "step": 2510 }, { "epoch": 0.02511, "grad_norm": 0.8287055915677601, "learning_rate": 0.003, "loss": 4.243, "step": 2511 }, { "epoch": 0.02512, "grad_norm": 0.6322731338734815, "learning_rate": 0.003, "loss": 4.2191, "step": 2512 }, { "epoch": 0.02513, "grad_norm": 0.6027563249296022, "learning_rate": 0.003, "loss": 4.2132, "step": 2513 }, { "epoch": 0.02514, "grad_norm": 0.5639092637703635, "learning_rate": 0.003, "loss": 4.1993, "step": 2514 }, { "epoch": 0.02515, "grad_norm": 0.5109276454863091, "learning_rate": 0.003, "loss": 4.1846, "step": 2515 }, { "epoch": 0.02516, "grad_norm": 0.49613462202561587, "learning_rate": 0.003, "loss": 4.2057, "step": 2516 }, { "epoch": 0.02517, "grad_norm": 0.4638933308319014, "learning_rate": 0.003, "loss": 4.2043, "step": 2517 }, { "epoch": 0.02518, "grad_norm": 0.47895707517955144, "learning_rate": 0.003, "loss": 4.2156, "step": 2518 }, { "epoch": 0.02519, "grad_norm": 0.540563379395588, "learning_rate": 0.003, "loss": 4.191, "step": 2519 }, { "epoch": 0.0252, "grad_norm": 0.593588962667406, "learning_rate": 0.003, "loss": 4.2039, "step": 2520 }, { "epoch": 0.02521, "grad_norm": 0.5859708147834043, "learning_rate": 0.003, "loss": 4.2007, "step": 2521 }, { "epoch": 0.02522, "grad_norm": 0.6750984334319393, "learning_rate": 0.003, "loss": 4.1967, "step": 2522 }, { "epoch": 0.02523, "grad_norm": 0.8294919931793535, "learning_rate": 0.003, "loss": 4.2041, "step": 2523 }, { "epoch": 0.02524, "grad_norm": 0.8897442405025344, "learning_rate": 0.003, "loss": 4.2231, "step": 2524 }, { "epoch": 0.02525, "grad_norm": 0.8626054901448138, "learning_rate": 0.003, "loss": 4.2361, "step": 2525 }, { "epoch": 0.02526, "grad_norm": 0.6609622839443053, "learning_rate": 0.003, "loss": 4.1771, "step": 2526 }, { "epoch": 0.02527, "grad_norm": 0.5788832246003424, "learning_rate": 0.003, "loss": 4.2082, "step": 2527 }, { "epoch": 0.02528, "grad_norm": 0.6673373775723666, "learning_rate": 0.003, "loss": 4.2334, "step": 2528 }, { "epoch": 0.02529, "grad_norm": 0.6949939363717212, "learning_rate": 0.003, "loss": 4.2031, "step": 2529 }, { "epoch": 0.0253, "grad_norm": 0.6926157434556843, "learning_rate": 0.003, "loss": 4.2244, "step": 2530 }, { "epoch": 0.02531, "grad_norm": 0.6799840691277568, "learning_rate": 0.003, "loss": 4.2264, "step": 2531 }, { "epoch": 0.02532, "grad_norm": 0.8293024556709889, "learning_rate": 0.003, "loss": 4.1997, "step": 2532 }, { "epoch": 0.02533, "grad_norm": 0.9626324272072302, "learning_rate": 0.003, "loss": 4.2444, "step": 2533 }, { "epoch": 0.02534, "grad_norm": 0.9642180813694794, "learning_rate": 0.003, "loss": 4.2316, "step": 2534 }, { "epoch": 0.02535, "grad_norm": 0.8934366837294853, "learning_rate": 0.003, "loss": 4.2235, "step": 2535 }, { "epoch": 0.02536, "grad_norm": 0.8077546891906852, "learning_rate": 0.003, "loss": 4.2184, "step": 2536 }, { "epoch": 0.02537, "grad_norm": 0.6532019172502265, "learning_rate": 0.003, "loss": 4.2093, "step": 2537 }, { "epoch": 0.02538, "grad_norm": 0.6905511076952489, "learning_rate": 0.003, "loss": 4.184, "step": 2538 }, { "epoch": 0.02539, "grad_norm": 0.7499705806639466, "learning_rate": 0.003, "loss": 4.2378, "step": 2539 }, { "epoch": 0.0254, "grad_norm": 0.6787160415395472, "learning_rate": 0.003, "loss": 4.2251, "step": 2540 }, { "epoch": 0.02541, "grad_norm": 0.6164160513405081, "learning_rate": 0.003, "loss": 4.2157, "step": 2541 }, { "epoch": 0.02542, "grad_norm": 0.6175874501025589, "learning_rate": 0.003, "loss": 4.1908, "step": 2542 }, { "epoch": 0.02543, "grad_norm": 0.5640476735952517, "learning_rate": 0.003, "loss": 4.2185, "step": 2543 }, { "epoch": 0.02544, "grad_norm": 0.6088525590071204, "learning_rate": 0.003, "loss": 4.2062, "step": 2544 }, { "epoch": 0.02545, "grad_norm": 0.5774041309556538, "learning_rate": 0.003, "loss": 4.1853, "step": 2545 }, { "epoch": 0.02546, "grad_norm": 0.5080999994690095, "learning_rate": 0.003, "loss": 4.194, "step": 2546 }, { "epoch": 0.02547, "grad_norm": 0.491841642905056, "learning_rate": 0.003, "loss": 4.2303, "step": 2547 }, { "epoch": 0.02548, "grad_norm": 0.5507165108887961, "learning_rate": 0.003, "loss": 4.2283, "step": 2548 }, { "epoch": 0.02549, "grad_norm": 0.6923243100094933, "learning_rate": 0.003, "loss": 4.2227, "step": 2549 }, { "epoch": 0.0255, "grad_norm": 0.7817347310990386, "learning_rate": 0.003, "loss": 4.1886, "step": 2550 }, { "epoch": 0.02551, "grad_norm": 0.7447240542500363, "learning_rate": 0.003, "loss": 4.2189, "step": 2551 }, { "epoch": 0.02552, "grad_norm": 0.6307913070627245, "learning_rate": 0.003, "loss": 4.1874, "step": 2552 }, { "epoch": 0.02553, "grad_norm": 0.6438248127745729, "learning_rate": 0.003, "loss": 4.2512, "step": 2553 }, { "epoch": 0.02554, "grad_norm": 0.5917787379710305, "learning_rate": 0.003, "loss": 4.1983, "step": 2554 }, { "epoch": 0.02555, "grad_norm": 0.6080534723714437, "learning_rate": 0.003, "loss": 4.1896, "step": 2555 }, { "epoch": 0.02556, "grad_norm": 0.6279427379491616, "learning_rate": 0.003, "loss": 4.2002, "step": 2556 }, { "epoch": 0.02557, "grad_norm": 0.5766106602168388, "learning_rate": 0.003, "loss": 4.2029, "step": 2557 }, { "epoch": 0.02558, "grad_norm": 0.5614919439663087, "learning_rate": 0.003, "loss": 4.2176, "step": 2558 }, { "epoch": 0.02559, "grad_norm": 0.5870187050191585, "learning_rate": 0.003, "loss": 4.2144, "step": 2559 }, { "epoch": 0.0256, "grad_norm": 0.5866194992858084, "learning_rate": 0.003, "loss": 4.1883, "step": 2560 }, { "epoch": 0.02561, "grad_norm": 0.56223387782931, "learning_rate": 0.003, "loss": 4.1893, "step": 2561 }, { "epoch": 0.02562, "grad_norm": 0.4873971454432025, "learning_rate": 0.003, "loss": 4.203, "step": 2562 }, { "epoch": 0.02563, "grad_norm": 0.6113475215270017, "learning_rate": 0.003, "loss": 4.1898, "step": 2563 }, { "epoch": 0.02564, "grad_norm": 0.7598159388308933, "learning_rate": 0.003, "loss": 4.2128, "step": 2564 }, { "epoch": 0.02565, "grad_norm": 0.9584592936312263, "learning_rate": 0.003, "loss": 4.2023, "step": 2565 }, { "epoch": 0.02566, "grad_norm": 1.059905791929432, "learning_rate": 0.003, "loss": 4.231, "step": 2566 }, { "epoch": 0.02567, "grad_norm": 0.6761577058111206, "learning_rate": 0.003, "loss": 4.219, "step": 2567 }, { "epoch": 0.02568, "grad_norm": 0.582893785111441, "learning_rate": 0.003, "loss": 4.2104, "step": 2568 }, { "epoch": 0.02569, "grad_norm": 0.7274372672775176, "learning_rate": 0.003, "loss": 4.1941, "step": 2569 }, { "epoch": 0.0257, "grad_norm": 0.6382797975462534, "learning_rate": 0.003, "loss": 4.1988, "step": 2570 }, { "epoch": 0.02571, "grad_norm": 0.5842618410643182, "learning_rate": 0.003, "loss": 4.2168, "step": 2571 }, { "epoch": 0.02572, "grad_norm": 0.5733806386596177, "learning_rate": 0.003, "loss": 4.1929, "step": 2572 }, { "epoch": 0.02573, "grad_norm": 0.6098335904085562, "learning_rate": 0.003, "loss": 4.2012, "step": 2573 }, { "epoch": 0.02574, "grad_norm": 0.7180717115741516, "learning_rate": 0.003, "loss": 4.233, "step": 2574 }, { "epoch": 0.02575, "grad_norm": 0.7732503452131877, "learning_rate": 0.003, "loss": 4.1693, "step": 2575 }, { "epoch": 0.02576, "grad_norm": 0.8039085737347494, "learning_rate": 0.003, "loss": 4.1792, "step": 2576 }, { "epoch": 0.02577, "grad_norm": 0.7708873473006739, "learning_rate": 0.003, "loss": 4.183, "step": 2577 }, { "epoch": 0.02578, "grad_norm": 0.6416087124695665, "learning_rate": 0.003, "loss": 4.2007, "step": 2578 }, { "epoch": 0.02579, "grad_norm": 0.6401520636312507, "learning_rate": 0.003, "loss": 4.1503, "step": 2579 }, { "epoch": 0.0258, "grad_norm": 0.609541740323575, "learning_rate": 0.003, "loss": 4.2065, "step": 2580 }, { "epoch": 0.02581, "grad_norm": 0.6036763081432622, "learning_rate": 0.003, "loss": 4.2268, "step": 2581 }, { "epoch": 0.02582, "grad_norm": 0.5959601922482883, "learning_rate": 0.003, "loss": 4.2238, "step": 2582 }, { "epoch": 0.02583, "grad_norm": 0.6183189840573183, "learning_rate": 0.003, "loss": 4.1962, "step": 2583 }, { "epoch": 0.02584, "grad_norm": 0.7087135376074408, "learning_rate": 0.003, "loss": 4.2138, "step": 2584 }, { "epoch": 0.02585, "grad_norm": 0.6812145006805832, "learning_rate": 0.003, "loss": 4.1952, "step": 2585 }, { "epoch": 0.02586, "grad_norm": 0.7034361159454041, "learning_rate": 0.003, "loss": 4.2173, "step": 2586 }, { "epoch": 0.02587, "grad_norm": 0.7458480631965212, "learning_rate": 0.003, "loss": 4.2079, "step": 2587 }, { "epoch": 0.02588, "grad_norm": 0.6631960688574444, "learning_rate": 0.003, "loss": 4.2037, "step": 2588 }, { "epoch": 0.02589, "grad_norm": 0.6894974904796883, "learning_rate": 0.003, "loss": 4.2122, "step": 2589 }, { "epoch": 0.0259, "grad_norm": 0.6129883156712106, "learning_rate": 0.003, "loss": 4.2205, "step": 2590 }, { "epoch": 0.02591, "grad_norm": 0.52233908649411, "learning_rate": 0.003, "loss": 4.1499, "step": 2591 }, { "epoch": 0.02592, "grad_norm": 0.5621629848199292, "learning_rate": 0.003, "loss": 4.1958, "step": 2592 }, { "epoch": 0.02593, "grad_norm": 0.579018019134519, "learning_rate": 0.003, "loss": 4.1981, "step": 2593 }, { "epoch": 0.02594, "grad_norm": 0.7055147915095833, "learning_rate": 0.003, "loss": 4.2202, "step": 2594 }, { "epoch": 0.02595, "grad_norm": 0.9044844479304169, "learning_rate": 0.003, "loss": 4.182, "step": 2595 }, { "epoch": 0.02596, "grad_norm": 0.9468656207805218, "learning_rate": 0.003, "loss": 4.2266, "step": 2596 }, { "epoch": 0.02597, "grad_norm": 0.6963050525881186, "learning_rate": 0.003, "loss": 4.1873, "step": 2597 }, { "epoch": 0.02598, "grad_norm": 0.6825938651999739, "learning_rate": 0.003, "loss": 4.2289, "step": 2598 }, { "epoch": 0.02599, "grad_norm": 0.7880468862794131, "learning_rate": 0.003, "loss": 4.225, "step": 2599 }, { "epoch": 0.026, "grad_norm": 0.8906440360951334, "learning_rate": 0.003, "loss": 4.2307, "step": 2600 }, { "epoch": 0.02601, "grad_norm": 0.8491652376595475, "learning_rate": 0.003, "loss": 4.2355, "step": 2601 }, { "epoch": 0.02602, "grad_norm": 0.7504592913224991, "learning_rate": 0.003, "loss": 4.2094, "step": 2602 }, { "epoch": 0.02603, "grad_norm": 0.6893393420157942, "learning_rate": 0.003, "loss": 4.2031, "step": 2603 }, { "epoch": 0.02604, "grad_norm": 0.8097986318712986, "learning_rate": 0.003, "loss": 4.1981, "step": 2604 }, { "epoch": 0.02605, "grad_norm": 0.9104130090907385, "learning_rate": 0.003, "loss": 4.2234, "step": 2605 }, { "epoch": 0.02606, "grad_norm": 1.0310289052047719, "learning_rate": 0.003, "loss": 4.2436, "step": 2606 }, { "epoch": 0.02607, "grad_norm": 0.9582413171497328, "learning_rate": 0.003, "loss": 4.2397, "step": 2607 }, { "epoch": 0.02608, "grad_norm": 0.8048575866472404, "learning_rate": 0.003, "loss": 4.2123, "step": 2608 }, { "epoch": 0.02609, "grad_norm": 0.8595866026929856, "learning_rate": 0.003, "loss": 4.2586, "step": 2609 }, { "epoch": 0.0261, "grad_norm": 0.823900371529962, "learning_rate": 0.003, "loss": 4.2068, "step": 2610 }, { "epoch": 0.02611, "grad_norm": 0.7141217984189516, "learning_rate": 0.003, "loss": 4.1945, "step": 2611 }, { "epoch": 0.02612, "grad_norm": 0.716568582474741, "learning_rate": 0.003, "loss": 4.2324, "step": 2612 }, { "epoch": 0.02613, "grad_norm": 0.6977866203993734, "learning_rate": 0.003, "loss": 4.2227, "step": 2613 }, { "epoch": 0.02614, "grad_norm": 0.6120459222412155, "learning_rate": 0.003, "loss": 4.1988, "step": 2614 }, { "epoch": 0.02615, "grad_norm": 0.48978414596737385, "learning_rate": 0.003, "loss": 4.185, "step": 2615 }, { "epoch": 0.02616, "grad_norm": 0.4111484941092938, "learning_rate": 0.003, "loss": 4.193, "step": 2616 }, { "epoch": 0.02617, "grad_norm": 0.4314798871804994, "learning_rate": 0.003, "loss": 4.1808, "step": 2617 }, { "epoch": 0.02618, "grad_norm": 0.45257700858495165, "learning_rate": 0.003, "loss": 4.2091, "step": 2618 }, { "epoch": 0.02619, "grad_norm": 0.43820325303953606, "learning_rate": 0.003, "loss": 4.2202, "step": 2619 }, { "epoch": 0.0262, "grad_norm": 0.48135056397229203, "learning_rate": 0.003, "loss": 4.1827, "step": 2620 }, { "epoch": 0.02621, "grad_norm": 0.5541701604207973, "learning_rate": 0.003, "loss": 4.2081, "step": 2621 }, { "epoch": 0.02622, "grad_norm": 0.6217167828478687, "learning_rate": 0.003, "loss": 4.2095, "step": 2622 }, { "epoch": 0.02623, "grad_norm": 0.5922780087971841, "learning_rate": 0.003, "loss": 4.2099, "step": 2623 }, { "epoch": 0.02624, "grad_norm": 0.6090506282888395, "learning_rate": 0.003, "loss": 4.246, "step": 2624 }, { "epoch": 0.02625, "grad_norm": 0.5994929489144406, "learning_rate": 0.003, "loss": 4.2398, "step": 2625 }, { "epoch": 0.02626, "grad_norm": 0.6828660510269309, "learning_rate": 0.003, "loss": 4.1745, "step": 2626 }, { "epoch": 0.02627, "grad_norm": 0.7689467228473569, "learning_rate": 0.003, "loss": 4.1978, "step": 2627 }, { "epoch": 0.02628, "grad_norm": 0.772945921723307, "learning_rate": 0.003, "loss": 4.2279, "step": 2628 }, { "epoch": 0.02629, "grad_norm": 0.7998878360923921, "learning_rate": 0.003, "loss": 4.1718, "step": 2629 }, { "epoch": 0.0263, "grad_norm": 0.8071005154576086, "learning_rate": 0.003, "loss": 4.2029, "step": 2630 }, { "epoch": 0.02631, "grad_norm": 0.7063928050043093, "learning_rate": 0.003, "loss": 4.2214, "step": 2631 }, { "epoch": 0.02632, "grad_norm": 0.5358237009716528, "learning_rate": 0.003, "loss": 4.1777, "step": 2632 }, { "epoch": 0.02633, "grad_norm": 0.5777990078306479, "learning_rate": 0.003, "loss": 4.1915, "step": 2633 }, { "epoch": 0.02634, "grad_norm": 0.5758302081275357, "learning_rate": 0.003, "loss": 4.2102, "step": 2634 }, { "epoch": 0.02635, "grad_norm": 0.5582382698428011, "learning_rate": 0.003, "loss": 4.2059, "step": 2635 }, { "epoch": 0.02636, "grad_norm": 0.5270921433631331, "learning_rate": 0.003, "loss": 4.1886, "step": 2636 }, { "epoch": 0.02637, "grad_norm": 0.4952392838330578, "learning_rate": 0.003, "loss": 4.2209, "step": 2637 }, { "epoch": 0.02638, "grad_norm": 0.42063663579214783, "learning_rate": 0.003, "loss": 4.1814, "step": 2638 }, { "epoch": 0.02639, "grad_norm": 0.38387035795548136, "learning_rate": 0.003, "loss": 4.1685, "step": 2639 }, { "epoch": 0.0264, "grad_norm": 0.39356773800785, "learning_rate": 0.003, "loss": 4.1664, "step": 2640 }, { "epoch": 0.02641, "grad_norm": 0.4027429798510006, "learning_rate": 0.003, "loss": 4.1789, "step": 2641 }, { "epoch": 0.02642, "grad_norm": 0.4954271688101395, "learning_rate": 0.003, "loss": 4.1892, "step": 2642 }, { "epoch": 0.02643, "grad_norm": 0.544589775918662, "learning_rate": 0.003, "loss": 4.176, "step": 2643 }, { "epoch": 0.02644, "grad_norm": 0.5577274451115164, "learning_rate": 0.003, "loss": 4.1927, "step": 2644 }, { "epoch": 0.02645, "grad_norm": 0.7183426191932024, "learning_rate": 0.003, "loss": 4.2101, "step": 2645 }, { "epoch": 0.02646, "grad_norm": 0.9615412387915055, "learning_rate": 0.003, "loss": 4.1833, "step": 2646 }, { "epoch": 0.02647, "grad_norm": 1.0740028901393692, "learning_rate": 0.003, "loss": 4.2196, "step": 2647 }, { "epoch": 0.02648, "grad_norm": 0.7345014390620105, "learning_rate": 0.003, "loss": 4.2256, "step": 2648 }, { "epoch": 0.02649, "grad_norm": 0.6618619800661987, "learning_rate": 0.003, "loss": 4.2112, "step": 2649 }, { "epoch": 0.0265, "grad_norm": 0.7347893947046258, "learning_rate": 0.003, "loss": 4.1759, "step": 2650 }, { "epoch": 0.02651, "grad_norm": 0.6484062784740668, "learning_rate": 0.003, "loss": 4.1885, "step": 2651 }, { "epoch": 0.02652, "grad_norm": 0.7667553931843348, "learning_rate": 0.003, "loss": 4.2255, "step": 2652 }, { "epoch": 0.02653, "grad_norm": 0.7289832175113301, "learning_rate": 0.003, "loss": 4.1797, "step": 2653 }, { "epoch": 0.02654, "grad_norm": 0.7103736472197925, "learning_rate": 0.003, "loss": 4.1924, "step": 2654 }, { "epoch": 0.02655, "grad_norm": 0.7580675808718691, "learning_rate": 0.003, "loss": 4.2053, "step": 2655 }, { "epoch": 0.02656, "grad_norm": 0.7969812553544864, "learning_rate": 0.003, "loss": 4.1802, "step": 2656 }, { "epoch": 0.02657, "grad_norm": 0.6347600401053554, "learning_rate": 0.003, "loss": 4.2057, "step": 2657 }, { "epoch": 0.02658, "grad_norm": 0.5896722976882536, "learning_rate": 0.003, "loss": 4.2116, "step": 2658 }, { "epoch": 0.02659, "grad_norm": 0.651082411912306, "learning_rate": 0.003, "loss": 4.2054, "step": 2659 }, { "epoch": 0.0266, "grad_norm": 0.6950508733673917, "learning_rate": 0.003, "loss": 4.1947, "step": 2660 }, { "epoch": 0.02661, "grad_norm": 0.7309821342606786, "learning_rate": 0.003, "loss": 4.2168, "step": 2661 }, { "epoch": 0.02662, "grad_norm": 0.8443517188841829, "learning_rate": 0.003, "loss": 4.2102, "step": 2662 }, { "epoch": 0.02663, "grad_norm": 0.9107871566375996, "learning_rate": 0.003, "loss": 4.1952, "step": 2663 }, { "epoch": 0.02664, "grad_norm": 0.8948636609916674, "learning_rate": 0.003, "loss": 4.2015, "step": 2664 }, { "epoch": 0.02665, "grad_norm": 0.7895030310092762, "learning_rate": 0.003, "loss": 4.2447, "step": 2665 }, { "epoch": 0.02666, "grad_norm": 0.7544169624146947, "learning_rate": 0.003, "loss": 4.2351, "step": 2666 }, { "epoch": 0.02667, "grad_norm": 0.7917512909838754, "learning_rate": 0.003, "loss": 4.2156, "step": 2667 }, { "epoch": 0.02668, "grad_norm": 0.8660435271921453, "learning_rate": 0.003, "loss": 4.1906, "step": 2668 }, { "epoch": 0.02669, "grad_norm": 0.8211806952379437, "learning_rate": 0.003, "loss": 4.2092, "step": 2669 }, { "epoch": 0.0267, "grad_norm": 0.7376659322382964, "learning_rate": 0.003, "loss": 4.2095, "step": 2670 }, { "epoch": 0.02671, "grad_norm": 0.7319080249702333, "learning_rate": 0.003, "loss": 4.2252, "step": 2671 }, { "epoch": 0.02672, "grad_norm": 0.674274482359015, "learning_rate": 0.003, "loss": 4.2161, "step": 2672 }, { "epoch": 0.02673, "grad_norm": 0.7262456950172295, "learning_rate": 0.003, "loss": 4.2152, "step": 2673 }, { "epoch": 0.02674, "grad_norm": 0.7478436146050179, "learning_rate": 0.003, "loss": 4.1824, "step": 2674 }, { "epoch": 0.02675, "grad_norm": 0.6974424835283173, "learning_rate": 0.003, "loss": 4.1794, "step": 2675 }, { "epoch": 0.02676, "grad_norm": 0.7773148175185325, "learning_rate": 0.003, "loss": 4.2023, "step": 2676 }, { "epoch": 0.02677, "grad_norm": 0.9696205190703804, "learning_rate": 0.003, "loss": 4.2371, "step": 2677 }, { "epoch": 0.02678, "grad_norm": 0.9527960429936483, "learning_rate": 0.003, "loss": 4.237, "step": 2678 }, { "epoch": 0.02679, "grad_norm": 0.686114258952289, "learning_rate": 0.003, "loss": 4.1997, "step": 2679 }, { "epoch": 0.0268, "grad_norm": 0.587978184638772, "learning_rate": 0.003, "loss": 4.2094, "step": 2680 }, { "epoch": 0.02681, "grad_norm": 0.5919245996634437, "learning_rate": 0.003, "loss": 4.2288, "step": 2681 }, { "epoch": 0.02682, "grad_norm": 0.5747684944993391, "learning_rate": 0.003, "loss": 4.2039, "step": 2682 }, { "epoch": 0.02683, "grad_norm": 0.49692411048438434, "learning_rate": 0.003, "loss": 4.2231, "step": 2683 }, { "epoch": 0.02684, "grad_norm": 0.42371991981886814, "learning_rate": 0.003, "loss": 4.1939, "step": 2684 }, { "epoch": 0.02685, "grad_norm": 0.43270831648127167, "learning_rate": 0.003, "loss": 4.1987, "step": 2685 }, { "epoch": 0.02686, "grad_norm": 0.4373309691769235, "learning_rate": 0.003, "loss": 4.1811, "step": 2686 }, { "epoch": 0.02687, "grad_norm": 0.4372836741597173, "learning_rate": 0.003, "loss": 4.189, "step": 2687 }, { "epoch": 0.02688, "grad_norm": 0.49637578837338964, "learning_rate": 0.003, "loss": 4.1657, "step": 2688 }, { "epoch": 0.02689, "grad_norm": 0.42425016692512946, "learning_rate": 0.003, "loss": 4.197, "step": 2689 }, { "epoch": 0.0269, "grad_norm": 0.3687359570047172, "learning_rate": 0.003, "loss": 4.199, "step": 2690 }, { "epoch": 0.02691, "grad_norm": 0.422533215672721, "learning_rate": 0.003, "loss": 4.1917, "step": 2691 }, { "epoch": 0.02692, "grad_norm": 0.36876790177094304, "learning_rate": 0.003, "loss": 4.1944, "step": 2692 }, { "epoch": 0.02693, "grad_norm": 0.33344364158726264, "learning_rate": 0.003, "loss": 4.2054, "step": 2693 }, { "epoch": 0.02694, "grad_norm": 0.35193571774116406, "learning_rate": 0.003, "loss": 4.1673, "step": 2694 }, { "epoch": 0.02695, "grad_norm": 0.37618933218650874, "learning_rate": 0.003, "loss": 4.1828, "step": 2695 }, { "epoch": 0.02696, "grad_norm": 0.41130959986438415, "learning_rate": 0.003, "loss": 4.185, "step": 2696 }, { "epoch": 0.02697, "grad_norm": 0.5181896124624418, "learning_rate": 0.003, "loss": 4.198, "step": 2697 }, { "epoch": 0.02698, "grad_norm": 0.8288766752985389, "learning_rate": 0.003, "loss": 4.1898, "step": 2698 }, { "epoch": 0.02699, "grad_norm": 1.1773125298816605, "learning_rate": 0.003, "loss": 4.23, "step": 2699 }, { "epoch": 0.027, "grad_norm": 0.7553245674542797, "learning_rate": 0.003, "loss": 4.1855, "step": 2700 }, { "epoch": 0.02701, "grad_norm": 0.8921172592412265, "learning_rate": 0.003, "loss": 4.1838, "step": 2701 }, { "epoch": 0.02702, "grad_norm": 1.0764387127470998, "learning_rate": 0.003, "loss": 4.2606, "step": 2702 }, { "epoch": 0.02703, "grad_norm": 0.9044993098249169, "learning_rate": 0.003, "loss": 4.2449, "step": 2703 }, { "epoch": 0.02704, "grad_norm": 0.8806690266182866, "learning_rate": 0.003, "loss": 4.1921, "step": 2704 }, { "epoch": 0.02705, "grad_norm": 0.8651796636346343, "learning_rate": 0.003, "loss": 4.2521, "step": 2705 }, { "epoch": 0.02706, "grad_norm": 0.8193327778471594, "learning_rate": 0.003, "loss": 4.2075, "step": 2706 }, { "epoch": 0.02707, "grad_norm": 0.8383738553676314, "learning_rate": 0.003, "loss": 4.2115, "step": 2707 }, { "epoch": 0.02708, "grad_norm": 0.7927971133508214, "learning_rate": 0.003, "loss": 4.2392, "step": 2708 }, { "epoch": 0.02709, "grad_norm": 0.702011577126357, "learning_rate": 0.003, "loss": 4.2044, "step": 2709 }, { "epoch": 0.0271, "grad_norm": 0.7610112656844941, "learning_rate": 0.003, "loss": 4.2165, "step": 2710 }, { "epoch": 0.02711, "grad_norm": 0.7565309559289577, "learning_rate": 0.003, "loss": 4.1983, "step": 2711 }, { "epoch": 0.02712, "grad_norm": 0.7177786599726909, "learning_rate": 0.003, "loss": 4.2091, "step": 2712 }, { "epoch": 0.02713, "grad_norm": 0.6187976568863948, "learning_rate": 0.003, "loss": 4.1995, "step": 2713 }, { "epoch": 0.02714, "grad_norm": 0.5823945630189209, "learning_rate": 0.003, "loss": 4.1941, "step": 2714 }, { "epoch": 0.02715, "grad_norm": 0.6590346464153963, "learning_rate": 0.003, "loss": 4.1907, "step": 2715 }, { "epoch": 0.02716, "grad_norm": 0.8913115921228741, "learning_rate": 0.003, "loss": 4.2092, "step": 2716 }, { "epoch": 0.02717, "grad_norm": 0.9991228875759847, "learning_rate": 0.003, "loss": 4.219, "step": 2717 }, { "epoch": 0.02718, "grad_norm": 0.9312424963750244, "learning_rate": 0.003, "loss": 4.2354, "step": 2718 }, { "epoch": 0.02719, "grad_norm": 0.6956685262676899, "learning_rate": 0.003, "loss": 4.2005, "step": 2719 }, { "epoch": 0.0272, "grad_norm": 0.6830262491518131, "learning_rate": 0.003, "loss": 4.192, "step": 2720 }, { "epoch": 0.02721, "grad_norm": 0.5790742761201393, "learning_rate": 0.003, "loss": 4.2022, "step": 2721 }, { "epoch": 0.02722, "grad_norm": 0.6475692617370225, "learning_rate": 0.003, "loss": 4.2016, "step": 2722 }, { "epoch": 0.02723, "grad_norm": 0.6175956588274222, "learning_rate": 0.003, "loss": 4.2065, "step": 2723 }, { "epoch": 0.02724, "grad_norm": 0.5457779888221875, "learning_rate": 0.003, "loss": 4.2041, "step": 2724 }, { "epoch": 0.02725, "grad_norm": 0.5026905482131955, "learning_rate": 0.003, "loss": 4.1969, "step": 2725 }, { "epoch": 0.02726, "grad_norm": 0.42536659188846526, "learning_rate": 0.003, "loss": 4.1647, "step": 2726 }, { "epoch": 0.02727, "grad_norm": 0.5003680404401011, "learning_rate": 0.003, "loss": 4.201, "step": 2727 }, { "epoch": 0.02728, "grad_norm": 0.48640002941271926, "learning_rate": 0.003, "loss": 4.1833, "step": 2728 }, { "epoch": 0.02729, "grad_norm": 0.41280683340266905, "learning_rate": 0.003, "loss": 4.2125, "step": 2729 }, { "epoch": 0.0273, "grad_norm": 0.43446915053933527, "learning_rate": 0.003, "loss": 4.1856, "step": 2730 }, { "epoch": 0.02731, "grad_norm": 0.51314133885114, "learning_rate": 0.003, "loss": 4.1847, "step": 2731 }, { "epoch": 0.02732, "grad_norm": 0.5875155983292144, "learning_rate": 0.003, "loss": 4.1792, "step": 2732 }, { "epoch": 0.02733, "grad_norm": 0.7435494959956005, "learning_rate": 0.003, "loss": 4.2133, "step": 2733 }, { "epoch": 0.02734, "grad_norm": 0.8318623947885522, "learning_rate": 0.003, "loss": 4.1703, "step": 2734 }, { "epoch": 0.02735, "grad_norm": 0.7270117460217965, "learning_rate": 0.003, "loss": 4.2139, "step": 2735 }, { "epoch": 0.02736, "grad_norm": 0.5421807988773258, "learning_rate": 0.003, "loss": 4.1837, "step": 2736 }, { "epoch": 0.02737, "grad_norm": 0.5828799726606484, "learning_rate": 0.003, "loss": 4.1756, "step": 2737 }, { "epoch": 0.02738, "grad_norm": 0.6293605543493668, "learning_rate": 0.003, "loss": 4.2025, "step": 2738 }, { "epoch": 0.02739, "grad_norm": 0.5663805709619927, "learning_rate": 0.003, "loss": 4.1907, "step": 2739 }, { "epoch": 0.0274, "grad_norm": 0.5687899337220772, "learning_rate": 0.003, "loss": 4.1804, "step": 2740 }, { "epoch": 0.02741, "grad_norm": 0.6075448196131129, "learning_rate": 0.003, "loss": 4.1946, "step": 2741 }, { "epoch": 0.02742, "grad_norm": 0.550237016515108, "learning_rate": 0.003, "loss": 4.1804, "step": 2742 }, { "epoch": 0.02743, "grad_norm": 0.6055901207765589, "learning_rate": 0.003, "loss": 4.2013, "step": 2743 }, { "epoch": 0.02744, "grad_norm": 0.7300871101889295, "learning_rate": 0.003, "loss": 4.1884, "step": 2744 }, { "epoch": 0.02745, "grad_norm": 0.8259955139853259, "learning_rate": 0.003, "loss": 4.2039, "step": 2745 }, { "epoch": 0.02746, "grad_norm": 0.83396545170588, "learning_rate": 0.003, "loss": 4.1856, "step": 2746 }, { "epoch": 0.02747, "grad_norm": 0.729485971791691, "learning_rate": 0.003, "loss": 4.2152, "step": 2747 }, { "epoch": 0.02748, "grad_norm": 0.695156151246541, "learning_rate": 0.003, "loss": 4.1664, "step": 2748 }, { "epoch": 0.02749, "grad_norm": 0.6271520188600127, "learning_rate": 0.003, "loss": 4.1715, "step": 2749 }, { "epoch": 0.0275, "grad_norm": 0.6580176171244524, "learning_rate": 0.003, "loss": 4.2067, "step": 2750 }, { "epoch": 0.02751, "grad_norm": 0.672675352925045, "learning_rate": 0.003, "loss": 4.2192, "step": 2751 }, { "epoch": 0.02752, "grad_norm": 0.6249956014069801, "learning_rate": 0.003, "loss": 4.2068, "step": 2752 }, { "epoch": 0.02753, "grad_norm": 0.6422052590772455, "learning_rate": 0.003, "loss": 4.1858, "step": 2753 }, { "epoch": 0.02754, "grad_norm": 0.6380940449715866, "learning_rate": 0.003, "loss": 4.1719, "step": 2754 }, { "epoch": 0.02755, "grad_norm": 0.5972581452735276, "learning_rate": 0.003, "loss": 4.1688, "step": 2755 }, { "epoch": 0.02756, "grad_norm": 0.6077948327385506, "learning_rate": 0.003, "loss": 4.1967, "step": 2756 }, { "epoch": 0.02757, "grad_norm": 0.7032781486647851, "learning_rate": 0.003, "loss": 4.2127, "step": 2757 }, { "epoch": 0.02758, "grad_norm": 0.793287683394734, "learning_rate": 0.003, "loss": 4.1637, "step": 2758 }, { "epoch": 0.02759, "grad_norm": 0.9418904888299408, "learning_rate": 0.003, "loss": 4.2024, "step": 2759 }, { "epoch": 0.0276, "grad_norm": 0.9351479329737886, "learning_rate": 0.003, "loss": 4.215, "step": 2760 }, { "epoch": 0.02761, "grad_norm": 0.8139578363148168, "learning_rate": 0.003, "loss": 4.2282, "step": 2761 }, { "epoch": 0.02762, "grad_norm": 0.6757269439646322, "learning_rate": 0.003, "loss": 4.1918, "step": 2762 }, { "epoch": 0.02763, "grad_norm": 0.673672124363711, "learning_rate": 0.003, "loss": 4.2006, "step": 2763 }, { "epoch": 0.02764, "grad_norm": 0.8103484124048353, "learning_rate": 0.003, "loss": 4.199, "step": 2764 }, { "epoch": 0.02765, "grad_norm": 0.861735499978179, "learning_rate": 0.003, "loss": 4.2084, "step": 2765 }, { "epoch": 0.02766, "grad_norm": 0.7923213801421585, "learning_rate": 0.003, "loss": 4.1751, "step": 2766 }, { "epoch": 0.02767, "grad_norm": 0.7631227515735746, "learning_rate": 0.003, "loss": 4.2302, "step": 2767 }, { "epoch": 0.02768, "grad_norm": 0.8386215859490249, "learning_rate": 0.003, "loss": 4.1676, "step": 2768 }, { "epoch": 0.02769, "grad_norm": 0.8716391723421844, "learning_rate": 0.003, "loss": 4.2312, "step": 2769 }, { "epoch": 0.0277, "grad_norm": 0.79734786858364, "learning_rate": 0.003, "loss": 4.1984, "step": 2770 }, { "epoch": 0.02771, "grad_norm": 0.7636865958720973, "learning_rate": 0.003, "loss": 4.187, "step": 2771 }, { "epoch": 0.02772, "grad_norm": 0.7711684731670819, "learning_rate": 0.003, "loss": 4.1786, "step": 2772 }, { "epoch": 0.02773, "grad_norm": 0.6855679524292124, "learning_rate": 0.003, "loss": 4.2122, "step": 2773 }, { "epoch": 0.02774, "grad_norm": 0.6373232969577011, "learning_rate": 0.003, "loss": 4.2297, "step": 2774 }, { "epoch": 0.02775, "grad_norm": 0.5808981715198482, "learning_rate": 0.003, "loss": 4.1967, "step": 2775 }, { "epoch": 0.02776, "grad_norm": 0.551216477733259, "learning_rate": 0.003, "loss": 4.1912, "step": 2776 }, { "epoch": 0.02777, "grad_norm": 0.5502623559066758, "learning_rate": 0.003, "loss": 4.1695, "step": 2777 }, { "epoch": 0.02778, "grad_norm": 0.5471574038460382, "learning_rate": 0.003, "loss": 4.209, "step": 2778 }, { "epoch": 0.02779, "grad_norm": 0.5740086450201776, "learning_rate": 0.003, "loss": 4.1802, "step": 2779 }, { "epoch": 0.0278, "grad_norm": 0.6603722539886849, "learning_rate": 0.003, "loss": 4.1784, "step": 2780 }, { "epoch": 0.02781, "grad_norm": 0.7045319397684864, "learning_rate": 0.003, "loss": 4.1629, "step": 2781 }, { "epoch": 0.02782, "grad_norm": 0.7384112585585232, "learning_rate": 0.003, "loss": 4.189, "step": 2782 }, { "epoch": 0.02783, "grad_norm": 0.7302997601128179, "learning_rate": 0.003, "loss": 4.1951, "step": 2783 }, { "epoch": 0.02784, "grad_norm": 0.623059472233842, "learning_rate": 0.003, "loss": 4.1896, "step": 2784 }, { "epoch": 0.02785, "grad_norm": 0.6333396786278916, "learning_rate": 0.003, "loss": 4.2038, "step": 2785 }, { "epoch": 0.02786, "grad_norm": 0.7323878741557076, "learning_rate": 0.003, "loss": 4.2023, "step": 2786 }, { "epoch": 0.02787, "grad_norm": 0.7244716928344251, "learning_rate": 0.003, "loss": 4.2035, "step": 2787 }, { "epoch": 0.02788, "grad_norm": 0.6542769748417147, "learning_rate": 0.003, "loss": 4.1892, "step": 2788 }, { "epoch": 0.02789, "grad_norm": 0.6574058601306716, "learning_rate": 0.003, "loss": 4.1939, "step": 2789 }, { "epoch": 0.0279, "grad_norm": 0.7081427842141735, "learning_rate": 0.003, "loss": 4.1695, "step": 2790 }, { "epoch": 0.02791, "grad_norm": 0.6942117835439664, "learning_rate": 0.003, "loss": 4.181, "step": 2791 }, { "epoch": 0.02792, "grad_norm": 0.6067557855062717, "learning_rate": 0.003, "loss": 4.1954, "step": 2792 }, { "epoch": 0.02793, "grad_norm": 0.5947324428169551, "learning_rate": 0.003, "loss": 4.1906, "step": 2793 }, { "epoch": 0.02794, "grad_norm": 0.5401502710295608, "learning_rate": 0.003, "loss": 4.1858, "step": 2794 }, { "epoch": 0.02795, "grad_norm": 0.5028686206702352, "learning_rate": 0.003, "loss": 4.2, "step": 2795 }, { "epoch": 0.02796, "grad_norm": 0.49627952868559183, "learning_rate": 0.003, "loss": 4.191, "step": 2796 }, { "epoch": 0.02797, "grad_norm": 0.4876291995281765, "learning_rate": 0.003, "loss": 4.2019, "step": 2797 }, { "epoch": 0.02798, "grad_norm": 0.5294169569466303, "learning_rate": 0.003, "loss": 4.1979, "step": 2798 }, { "epoch": 0.02799, "grad_norm": 0.5927669437619445, "learning_rate": 0.003, "loss": 4.1858, "step": 2799 }, { "epoch": 0.028, "grad_norm": 0.6682805931240281, "learning_rate": 0.003, "loss": 4.1988, "step": 2800 }, { "epoch": 0.02801, "grad_norm": 0.9068693661645921, "learning_rate": 0.003, "loss": 4.1815, "step": 2801 }, { "epoch": 0.02802, "grad_norm": 1.0254407756501984, "learning_rate": 0.003, "loss": 4.1982, "step": 2802 }, { "epoch": 0.02803, "grad_norm": 0.8128875626073906, "learning_rate": 0.003, "loss": 4.2114, "step": 2803 }, { "epoch": 0.02804, "grad_norm": 0.7353680218352796, "learning_rate": 0.003, "loss": 4.18, "step": 2804 }, { "epoch": 0.02805, "grad_norm": 0.7856799021536293, "learning_rate": 0.003, "loss": 4.222, "step": 2805 }, { "epoch": 0.02806, "grad_norm": 0.7319188472007512, "learning_rate": 0.003, "loss": 4.1915, "step": 2806 }, { "epoch": 0.02807, "grad_norm": 0.6281145778193726, "learning_rate": 0.003, "loss": 4.1913, "step": 2807 }, { "epoch": 0.02808, "grad_norm": 0.6007996487222561, "learning_rate": 0.003, "loss": 4.1745, "step": 2808 }, { "epoch": 0.02809, "grad_norm": 0.6107598223212796, "learning_rate": 0.003, "loss": 4.2139, "step": 2809 }, { "epoch": 0.0281, "grad_norm": 0.5658938842173376, "learning_rate": 0.003, "loss": 4.1671, "step": 2810 }, { "epoch": 0.02811, "grad_norm": 0.5188959758106707, "learning_rate": 0.003, "loss": 4.1753, "step": 2811 }, { "epoch": 0.02812, "grad_norm": 0.47017604842162736, "learning_rate": 0.003, "loss": 4.19, "step": 2812 }, { "epoch": 0.02813, "grad_norm": 0.43817425390848724, "learning_rate": 0.003, "loss": 4.1784, "step": 2813 }, { "epoch": 0.02814, "grad_norm": 0.49665633422440225, "learning_rate": 0.003, "loss": 4.1789, "step": 2814 }, { "epoch": 0.02815, "grad_norm": 0.5737919498303556, "learning_rate": 0.003, "loss": 4.2179, "step": 2815 }, { "epoch": 0.02816, "grad_norm": 0.5958113314856148, "learning_rate": 0.003, "loss": 4.1978, "step": 2816 }, { "epoch": 0.02817, "grad_norm": 0.7178958177645314, "learning_rate": 0.003, "loss": 4.1563, "step": 2817 }, { "epoch": 0.02818, "grad_norm": 0.856700432217493, "learning_rate": 0.003, "loss": 4.1797, "step": 2818 }, { "epoch": 0.02819, "grad_norm": 0.8381122457158061, "learning_rate": 0.003, "loss": 4.2052, "step": 2819 }, { "epoch": 0.0282, "grad_norm": 0.9145463072400436, "learning_rate": 0.003, "loss": 4.2085, "step": 2820 }, { "epoch": 0.02821, "grad_norm": 0.8180716884994599, "learning_rate": 0.003, "loss": 4.2204, "step": 2821 }, { "epoch": 0.02822, "grad_norm": 0.749853073972982, "learning_rate": 0.003, "loss": 4.1962, "step": 2822 }, { "epoch": 0.02823, "grad_norm": 0.8042960763019706, "learning_rate": 0.003, "loss": 4.2228, "step": 2823 }, { "epoch": 0.02824, "grad_norm": 0.9373801354350708, "learning_rate": 0.003, "loss": 4.2156, "step": 2824 }, { "epoch": 0.02825, "grad_norm": 1.0578826545081004, "learning_rate": 0.003, "loss": 4.2331, "step": 2825 }, { "epoch": 0.02826, "grad_norm": 0.7996056170859099, "learning_rate": 0.003, "loss": 4.1963, "step": 2826 }, { "epoch": 0.02827, "grad_norm": 0.8051765401350005, "learning_rate": 0.003, "loss": 4.1832, "step": 2827 }, { "epoch": 0.02828, "grad_norm": 0.8027875497458861, "learning_rate": 0.003, "loss": 4.2252, "step": 2828 }, { "epoch": 0.02829, "grad_norm": 0.7733380033017664, "learning_rate": 0.003, "loss": 4.2487, "step": 2829 }, { "epoch": 0.0283, "grad_norm": 0.7838154488025357, "learning_rate": 0.003, "loss": 4.215, "step": 2830 }, { "epoch": 0.02831, "grad_norm": 0.7961302139800379, "learning_rate": 0.003, "loss": 4.2125, "step": 2831 }, { "epoch": 0.02832, "grad_norm": 0.8562794848672607, "learning_rate": 0.003, "loss": 4.2283, "step": 2832 }, { "epoch": 0.02833, "grad_norm": 0.9219375196224617, "learning_rate": 0.003, "loss": 4.2033, "step": 2833 }, { "epoch": 0.02834, "grad_norm": 1.066449364140732, "learning_rate": 0.003, "loss": 4.2018, "step": 2834 }, { "epoch": 0.02835, "grad_norm": 0.9222685550708334, "learning_rate": 0.003, "loss": 4.2111, "step": 2835 }, { "epoch": 0.02836, "grad_norm": 0.7684380143787258, "learning_rate": 0.003, "loss": 4.2263, "step": 2836 }, { "epoch": 0.02837, "grad_norm": 0.6557155817543934, "learning_rate": 0.003, "loss": 4.1966, "step": 2837 }, { "epoch": 0.02838, "grad_norm": 0.635865016407609, "learning_rate": 0.003, "loss": 4.1597, "step": 2838 }, { "epoch": 0.02839, "grad_norm": 0.6046255360999631, "learning_rate": 0.003, "loss": 4.159, "step": 2839 }, { "epoch": 0.0284, "grad_norm": 0.5605750331386489, "learning_rate": 0.003, "loss": 4.1687, "step": 2840 }, { "epoch": 0.02841, "grad_norm": 0.5616642948887489, "learning_rate": 0.003, "loss": 4.206, "step": 2841 }, { "epoch": 0.02842, "grad_norm": 0.57068171158362, "learning_rate": 0.003, "loss": 4.1872, "step": 2842 }, { "epoch": 0.02843, "grad_norm": 0.5405942011469851, "learning_rate": 0.003, "loss": 4.2077, "step": 2843 }, { "epoch": 0.02844, "grad_norm": 0.5611073031217337, "learning_rate": 0.003, "loss": 4.2038, "step": 2844 }, { "epoch": 0.02845, "grad_norm": 0.6289027848917312, "learning_rate": 0.003, "loss": 4.1835, "step": 2845 }, { "epoch": 0.02846, "grad_norm": 0.6920775632747969, "learning_rate": 0.003, "loss": 4.178, "step": 2846 }, { "epoch": 0.02847, "grad_norm": 0.767579826338002, "learning_rate": 0.003, "loss": 4.2364, "step": 2847 }, { "epoch": 0.02848, "grad_norm": 0.7249701206375546, "learning_rate": 0.003, "loss": 4.1955, "step": 2848 }, { "epoch": 0.02849, "grad_norm": 0.516455337675884, "learning_rate": 0.003, "loss": 4.173, "step": 2849 }, { "epoch": 0.0285, "grad_norm": 0.5059786145152202, "learning_rate": 0.003, "loss": 4.1762, "step": 2850 }, { "epoch": 0.02851, "grad_norm": 0.5163885986759773, "learning_rate": 0.003, "loss": 4.2134, "step": 2851 }, { "epoch": 0.02852, "grad_norm": 0.49258101706112184, "learning_rate": 0.003, "loss": 4.1891, "step": 2852 }, { "epoch": 0.02853, "grad_norm": 0.500381435832321, "learning_rate": 0.003, "loss": 4.1572, "step": 2853 }, { "epoch": 0.02854, "grad_norm": 0.4710049420804533, "learning_rate": 0.003, "loss": 4.1946, "step": 2854 }, { "epoch": 0.02855, "grad_norm": 0.4796815944787949, "learning_rate": 0.003, "loss": 4.1909, "step": 2855 }, { "epoch": 0.02856, "grad_norm": 0.5101632070884339, "learning_rate": 0.003, "loss": 4.1943, "step": 2856 }, { "epoch": 0.02857, "grad_norm": 0.4913306093184678, "learning_rate": 0.003, "loss": 4.1728, "step": 2857 }, { "epoch": 0.02858, "grad_norm": 0.4711311880423705, "learning_rate": 0.003, "loss": 4.1626, "step": 2858 }, { "epoch": 0.02859, "grad_norm": 0.5082622329894141, "learning_rate": 0.003, "loss": 4.1713, "step": 2859 }, { "epoch": 0.0286, "grad_norm": 0.5184604450005492, "learning_rate": 0.003, "loss": 4.1843, "step": 2860 }, { "epoch": 0.02861, "grad_norm": 0.48267289618449316, "learning_rate": 0.003, "loss": 4.1541, "step": 2861 }, { "epoch": 0.02862, "grad_norm": 0.4761246257076003, "learning_rate": 0.003, "loss": 4.1721, "step": 2862 }, { "epoch": 0.02863, "grad_norm": 0.555990003004704, "learning_rate": 0.003, "loss": 4.1996, "step": 2863 }, { "epoch": 0.02864, "grad_norm": 0.6398184414941633, "learning_rate": 0.003, "loss": 4.1725, "step": 2864 }, { "epoch": 0.02865, "grad_norm": 0.7105267215046387, "learning_rate": 0.003, "loss": 4.2076, "step": 2865 }, { "epoch": 0.02866, "grad_norm": 0.7313418455236842, "learning_rate": 0.003, "loss": 4.1911, "step": 2866 }, { "epoch": 0.02867, "grad_norm": 0.6641528598178345, "learning_rate": 0.003, "loss": 4.1753, "step": 2867 }, { "epoch": 0.02868, "grad_norm": 0.5930404639850378, "learning_rate": 0.003, "loss": 4.1816, "step": 2868 }, { "epoch": 0.02869, "grad_norm": 0.6719622014213373, "learning_rate": 0.003, "loss": 4.1744, "step": 2869 }, { "epoch": 0.0287, "grad_norm": 0.6550610400030609, "learning_rate": 0.003, "loss": 4.1971, "step": 2870 }, { "epoch": 0.02871, "grad_norm": 0.6686050845776436, "learning_rate": 0.003, "loss": 4.1824, "step": 2871 }, { "epoch": 0.02872, "grad_norm": 0.7301947656997149, "learning_rate": 0.003, "loss": 4.2172, "step": 2872 }, { "epoch": 0.02873, "grad_norm": 0.6807495486455808, "learning_rate": 0.003, "loss": 4.1867, "step": 2873 }, { "epoch": 0.02874, "grad_norm": 0.625144556477654, "learning_rate": 0.003, "loss": 4.1995, "step": 2874 }, { "epoch": 0.02875, "grad_norm": 0.6566904903408397, "learning_rate": 0.003, "loss": 4.2051, "step": 2875 }, { "epoch": 0.02876, "grad_norm": 0.759141268501963, "learning_rate": 0.003, "loss": 4.1921, "step": 2876 }, { "epoch": 0.02877, "grad_norm": 0.8807634127058958, "learning_rate": 0.003, "loss": 4.1842, "step": 2877 }, { "epoch": 0.02878, "grad_norm": 1.0261973428455773, "learning_rate": 0.003, "loss": 4.1965, "step": 2878 }, { "epoch": 0.02879, "grad_norm": 0.888483399285398, "learning_rate": 0.003, "loss": 4.1815, "step": 2879 }, { "epoch": 0.0288, "grad_norm": 0.8435453856609076, "learning_rate": 0.003, "loss": 4.1882, "step": 2880 }, { "epoch": 0.02881, "grad_norm": 0.8865486451512147, "learning_rate": 0.003, "loss": 4.204, "step": 2881 }, { "epoch": 0.02882, "grad_norm": 0.8928993496723746, "learning_rate": 0.003, "loss": 4.2242, "step": 2882 }, { "epoch": 0.02883, "grad_norm": 0.8226859467424321, "learning_rate": 0.003, "loss": 4.2032, "step": 2883 }, { "epoch": 0.02884, "grad_norm": 0.77429031635637, "learning_rate": 0.003, "loss": 4.2401, "step": 2884 }, { "epoch": 0.02885, "grad_norm": 0.8922281925752906, "learning_rate": 0.003, "loss": 4.1845, "step": 2885 }, { "epoch": 0.02886, "grad_norm": 0.9214758547731667, "learning_rate": 0.003, "loss": 4.2251, "step": 2886 }, { "epoch": 0.02887, "grad_norm": 0.7646945576422838, "learning_rate": 0.003, "loss": 4.205, "step": 2887 }, { "epoch": 0.02888, "grad_norm": 0.5765970376943947, "learning_rate": 0.003, "loss": 4.1923, "step": 2888 }, { "epoch": 0.02889, "grad_norm": 0.6149544262580506, "learning_rate": 0.003, "loss": 4.1823, "step": 2889 }, { "epoch": 0.0289, "grad_norm": 0.5856358962779576, "learning_rate": 0.003, "loss": 4.2099, "step": 2890 }, { "epoch": 0.02891, "grad_norm": 0.5989364430008874, "learning_rate": 0.003, "loss": 4.2274, "step": 2891 }, { "epoch": 0.02892, "grad_norm": 0.6055146901336932, "learning_rate": 0.003, "loss": 4.1785, "step": 2892 }, { "epoch": 0.02893, "grad_norm": 0.6302785437079876, "learning_rate": 0.003, "loss": 4.185, "step": 2893 }, { "epoch": 0.02894, "grad_norm": 0.5540105467551689, "learning_rate": 0.003, "loss": 4.1878, "step": 2894 }, { "epoch": 0.02895, "grad_norm": 0.4023612128343426, "learning_rate": 0.003, "loss": 4.2266, "step": 2895 }, { "epoch": 0.02896, "grad_norm": 0.46314453746126943, "learning_rate": 0.003, "loss": 4.1815, "step": 2896 }, { "epoch": 0.02897, "grad_norm": 0.5689975115372741, "learning_rate": 0.003, "loss": 4.1638, "step": 2897 }, { "epoch": 0.02898, "grad_norm": 0.5759099877999215, "learning_rate": 0.003, "loss": 4.2076, "step": 2898 }, { "epoch": 0.02899, "grad_norm": 0.5841372181240183, "learning_rate": 0.003, "loss": 4.1911, "step": 2899 }, { "epoch": 0.029, "grad_norm": 0.5742774291659906, "learning_rate": 0.003, "loss": 4.1791, "step": 2900 }, { "epoch": 0.02901, "grad_norm": 0.505503655503479, "learning_rate": 0.003, "loss": 4.1922, "step": 2901 }, { "epoch": 0.02902, "grad_norm": 0.47503490148304944, "learning_rate": 0.003, "loss": 4.2092, "step": 2902 }, { "epoch": 0.02903, "grad_norm": 0.5474247569951972, "learning_rate": 0.003, "loss": 4.1689, "step": 2903 }, { "epoch": 0.02904, "grad_norm": 0.6382457425375883, "learning_rate": 0.003, "loss": 4.1897, "step": 2904 }, { "epoch": 0.02905, "grad_norm": 0.707934254197877, "learning_rate": 0.003, "loss": 4.2073, "step": 2905 }, { "epoch": 0.02906, "grad_norm": 0.8403476091404251, "learning_rate": 0.003, "loss": 4.192, "step": 2906 }, { "epoch": 0.02907, "grad_norm": 0.8318613643365537, "learning_rate": 0.003, "loss": 4.1704, "step": 2907 }, { "epoch": 0.02908, "grad_norm": 0.7272031628626594, "learning_rate": 0.003, "loss": 4.1912, "step": 2908 }, { "epoch": 0.02909, "grad_norm": 0.669946483884344, "learning_rate": 0.003, "loss": 4.1853, "step": 2909 }, { "epoch": 0.0291, "grad_norm": 0.6105283375602145, "learning_rate": 0.003, "loss": 4.1946, "step": 2910 }, { "epoch": 0.02911, "grad_norm": 0.6965941358145008, "learning_rate": 0.003, "loss": 4.1939, "step": 2911 }, { "epoch": 0.02912, "grad_norm": 0.6975491884532862, "learning_rate": 0.003, "loss": 4.1654, "step": 2912 }, { "epoch": 0.02913, "grad_norm": 0.7857229509367818, "learning_rate": 0.003, "loss": 4.1992, "step": 2913 }, { "epoch": 0.02914, "grad_norm": 0.811281083177593, "learning_rate": 0.003, "loss": 4.1892, "step": 2914 }, { "epoch": 0.02915, "grad_norm": 0.6690033742013554, "learning_rate": 0.003, "loss": 4.1796, "step": 2915 }, { "epoch": 0.02916, "grad_norm": 0.5795750985593426, "learning_rate": 0.003, "loss": 4.1847, "step": 2916 }, { "epoch": 0.02917, "grad_norm": 0.6521162065161483, "learning_rate": 0.003, "loss": 4.2172, "step": 2917 }, { "epoch": 0.02918, "grad_norm": 0.6434845720798231, "learning_rate": 0.003, "loss": 4.1719, "step": 2918 }, { "epoch": 0.02919, "grad_norm": 0.6415437095317365, "learning_rate": 0.003, "loss": 4.2133, "step": 2919 }, { "epoch": 0.0292, "grad_norm": 0.6445863883052798, "learning_rate": 0.003, "loss": 4.1534, "step": 2920 }, { "epoch": 0.02921, "grad_norm": 0.5711168990862754, "learning_rate": 0.003, "loss": 4.1919, "step": 2921 }, { "epoch": 0.02922, "grad_norm": 0.6255743522411465, "learning_rate": 0.003, "loss": 4.1926, "step": 2922 }, { "epoch": 0.02923, "grad_norm": 0.7756468163112304, "learning_rate": 0.003, "loss": 4.1675, "step": 2923 }, { "epoch": 0.02924, "grad_norm": 0.8580521016504452, "learning_rate": 0.003, "loss": 4.2067, "step": 2924 }, { "epoch": 0.02925, "grad_norm": 0.8062805285298449, "learning_rate": 0.003, "loss": 4.2084, "step": 2925 }, { "epoch": 0.02926, "grad_norm": 0.6123939436837456, "learning_rate": 0.003, "loss": 4.1817, "step": 2926 }, { "epoch": 0.02927, "grad_norm": 0.5880261599317353, "learning_rate": 0.003, "loss": 4.1862, "step": 2927 }, { "epoch": 0.02928, "grad_norm": 0.6052041112444663, "learning_rate": 0.003, "loss": 4.2074, "step": 2928 }, { "epoch": 0.02929, "grad_norm": 0.6235842112912531, "learning_rate": 0.003, "loss": 4.192, "step": 2929 }, { "epoch": 0.0293, "grad_norm": 0.660214912657757, "learning_rate": 0.003, "loss": 4.1869, "step": 2930 }, { "epoch": 0.02931, "grad_norm": 0.5466629516534421, "learning_rate": 0.003, "loss": 4.1832, "step": 2931 }, { "epoch": 0.02932, "grad_norm": 0.5338608341273119, "learning_rate": 0.003, "loss": 4.1686, "step": 2932 }, { "epoch": 0.02933, "grad_norm": 0.4969939045829935, "learning_rate": 0.003, "loss": 4.1951, "step": 2933 }, { "epoch": 0.02934, "grad_norm": 0.49477597477846896, "learning_rate": 0.003, "loss": 4.1818, "step": 2934 }, { "epoch": 0.02935, "grad_norm": 0.4464607167642475, "learning_rate": 0.003, "loss": 4.1672, "step": 2935 }, { "epoch": 0.02936, "grad_norm": 0.49350443190661963, "learning_rate": 0.003, "loss": 4.1877, "step": 2936 }, { "epoch": 0.02937, "grad_norm": 0.6198926034158319, "learning_rate": 0.003, "loss": 4.1699, "step": 2937 }, { "epoch": 0.02938, "grad_norm": 0.7272182520738907, "learning_rate": 0.003, "loss": 4.1923, "step": 2938 }, { "epoch": 0.02939, "grad_norm": 0.8486086179503688, "learning_rate": 0.003, "loss": 4.1848, "step": 2939 }, { "epoch": 0.0294, "grad_norm": 0.8446986466812662, "learning_rate": 0.003, "loss": 4.2102, "step": 2940 }, { "epoch": 0.02941, "grad_norm": 0.7723043017297002, "learning_rate": 0.003, "loss": 4.208, "step": 2941 }, { "epoch": 0.02942, "grad_norm": 0.7697250754745237, "learning_rate": 0.003, "loss": 4.197, "step": 2942 }, { "epoch": 0.02943, "grad_norm": 0.741727859654241, "learning_rate": 0.003, "loss": 4.181, "step": 2943 }, { "epoch": 0.02944, "grad_norm": 0.691917422319892, "learning_rate": 0.003, "loss": 4.2047, "step": 2944 }, { "epoch": 0.02945, "grad_norm": 0.6550566422880618, "learning_rate": 0.003, "loss": 4.1868, "step": 2945 }, { "epoch": 0.02946, "grad_norm": 0.7775286126916453, "learning_rate": 0.003, "loss": 4.1841, "step": 2946 }, { "epoch": 0.02947, "grad_norm": 0.989103701566936, "learning_rate": 0.003, "loss": 4.2271, "step": 2947 }, { "epoch": 0.02948, "grad_norm": 1.1031413486335138, "learning_rate": 0.003, "loss": 4.2196, "step": 2948 }, { "epoch": 0.02949, "grad_norm": 0.9071172160919259, "learning_rate": 0.003, "loss": 4.1919, "step": 2949 }, { "epoch": 0.0295, "grad_norm": 0.9640749695276918, "learning_rate": 0.003, "loss": 4.2161, "step": 2950 }, { "epoch": 0.02951, "grad_norm": 1.0552794520418902, "learning_rate": 0.003, "loss": 4.1768, "step": 2951 }, { "epoch": 0.02952, "grad_norm": 1.0245244401252407, "learning_rate": 0.003, "loss": 4.2154, "step": 2952 }, { "epoch": 0.02953, "grad_norm": 1.0404102150119146, "learning_rate": 0.003, "loss": 4.2329, "step": 2953 }, { "epoch": 0.02954, "grad_norm": 1.200181394447266, "learning_rate": 0.003, "loss": 4.2778, "step": 2954 }, { "epoch": 0.02955, "grad_norm": 0.8161704211969852, "learning_rate": 0.003, "loss": 4.2394, "step": 2955 }, { "epoch": 0.02956, "grad_norm": 0.8295432503492015, "learning_rate": 0.003, "loss": 4.2255, "step": 2956 }, { "epoch": 0.02957, "grad_norm": 0.8024603706062242, "learning_rate": 0.003, "loss": 4.2343, "step": 2957 }, { "epoch": 0.02958, "grad_norm": 0.9622635969093861, "learning_rate": 0.003, "loss": 4.2527, "step": 2958 }, { "epoch": 0.02959, "grad_norm": 1.144055914507281, "learning_rate": 0.003, "loss": 4.2509, "step": 2959 }, { "epoch": 0.0296, "grad_norm": 0.8230875124438284, "learning_rate": 0.003, "loss": 4.2477, "step": 2960 }, { "epoch": 0.02961, "grad_norm": 0.7761645102479322, "learning_rate": 0.003, "loss": 4.2177, "step": 2961 }, { "epoch": 0.02962, "grad_norm": 0.5798322996384551, "learning_rate": 0.003, "loss": 4.2146, "step": 2962 }, { "epoch": 0.02963, "grad_norm": 0.6082007449676078, "learning_rate": 0.003, "loss": 4.1885, "step": 2963 }, { "epoch": 0.02964, "grad_norm": 0.5952539583915469, "learning_rate": 0.003, "loss": 4.1722, "step": 2964 }, { "epoch": 0.02965, "grad_norm": 0.6067339945313683, "learning_rate": 0.003, "loss": 4.2192, "step": 2965 }, { "epoch": 0.02966, "grad_norm": 0.6300423766569044, "learning_rate": 0.003, "loss": 4.2275, "step": 2966 }, { "epoch": 0.02967, "grad_norm": 0.6210602868633931, "learning_rate": 0.003, "loss": 4.2121, "step": 2967 }, { "epoch": 0.02968, "grad_norm": 0.5447153342269296, "learning_rate": 0.003, "loss": 4.208, "step": 2968 }, { "epoch": 0.02969, "grad_norm": 0.5492051288557623, "learning_rate": 0.003, "loss": 4.2026, "step": 2969 }, { "epoch": 0.0297, "grad_norm": 0.5667658969213387, "learning_rate": 0.003, "loss": 4.1857, "step": 2970 }, { "epoch": 0.02971, "grad_norm": 0.5386297699076645, "learning_rate": 0.003, "loss": 4.2025, "step": 2971 }, { "epoch": 0.02972, "grad_norm": 0.4946208196601275, "learning_rate": 0.003, "loss": 4.1702, "step": 2972 }, { "epoch": 0.02973, "grad_norm": 0.5841767565220707, "learning_rate": 0.003, "loss": 4.1584, "step": 2973 }, { "epoch": 0.02974, "grad_norm": 0.7076979032412727, "learning_rate": 0.003, "loss": 4.1953, "step": 2974 }, { "epoch": 0.02975, "grad_norm": 0.9835377294492083, "learning_rate": 0.003, "loss": 4.1927, "step": 2975 }, { "epoch": 0.02976, "grad_norm": 1.1071095377980806, "learning_rate": 0.003, "loss": 4.2143, "step": 2976 }, { "epoch": 0.02977, "grad_norm": 0.7259690969712574, "learning_rate": 0.003, "loss": 4.1859, "step": 2977 }, { "epoch": 0.02978, "grad_norm": 0.7734023943128602, "learning_rate": 0.003, "loss": 4.2094, "step": 2978 }, { "epoch": 0.02979, "grad_norm": 0.7278346414839765, "learning_rate": 0.003, "loss": 4.1886, "step": 2979 }, { "epoch": 0.0298, "grad_norm": 0.660497169411274, "learning_rate": 0.003, "loss": 4.1931, "step": 2980 }, { "epoch": 0.02981, "grad_norm": 0.6667577508097533, "learning_rate": 0.003, "loss": 4.2009, "step": 2981 }, { "epoch": 0.02982, "grad_norm": 0.6386554072904321, "learning_rate": 0.003, "loss": 4.1958, "step": 2982 }, { "epoch": 0.02983, "grad_norm": 0.5418480426752083, "learning_rate": 0.003, "loss": 4.2068, "step": 2983 }, { "epoch": 0.02984, "grad_norm": 0.5015711298760291, "learning_rate": 0.003, "loss": 4.1815, "step": 2984 }, { "epoch": 0.02985, "grad_norm": 0.47335120613727616, "learning_rate": 0.003, "loss": 4.2107, "step": 2985 }, { "epoch": 0.02986, "grad_norm": 0.45169284672546406, "learning_rate": 0.003, "loss": 4.1679, "step": 2986 }, { "epoch": 0.02987, "grad_norm": 0.3872178111815347, "learning_rate": 0.003, "loss": 4.1603, "step": 2987 }, { "epoch": 0.02988, "grad_norm": 0.5156766146850885, "learning_rate": 0.003, "loss": 4.176, "step": 2988 }, { "epoch": 0.02989, "grad_norm": 0.5734795816734266, "learning_rate": 0.003, "loss": 4.1903, "step": 2989 }, { "epoch": 0.0299, "grad_norm": 0.6108581797072585, "learning_rate": 0.003, "loss": 4.1491, "step": 2990 }, { "epoch": 0.02991, "grad_norm": 0.6465308189507616, "learning_rate": 0.003, "loss": 4.2032, "step": 2991 }, { "epoch": 0.02992, "grad_norm": 0.6024426991759066, "learning_rate": 0.003, "loss": 4.1963, "step": 2992 }, { "epoch": 0.02993, "grad_norm": 0.5450535416942648, "learning_rate": 0.003, "loss": 4.1677, "step": 2993 }, { "epoch": 0.02994, "grad_norm": 0.5271060232078253, "learning_rate": 0.003, "loss": 4.1764, "step": 2994 }, { "epoch": 0.02995, "grad_norm": 0.45562729812821884, "learning_rate": 0.003, "loss": 4.2083, "step": 2995 }, { "epoch": 0.02996, "grad_norm": 0.460767745641914, "learning_rate": 0.003, "loss": 4.1977, "step": 2996 }, { "epoch": 0.02997, "grad_norm": 0.49883686701327856, "learning_rate": 0.003, "loss": 4.1594, "step": 2997 }, { "epoch": 0.02998, "grad_norm": 0.5495557644619213, "learning_rate": 0.003, "loss": 4.1591, "step": 2998 }, { "epoch": 0.02999, "grad_norm": 0.6523752894410377, "learning_rate": 0.003, "loss": 4.1565, "step": 2999 }, { "epoch": 0.03, "grad_norm": 0.7129180858865645, "learning_rate": 0.003, "loss": 4.2061, "step": 3000 }, { "epoch": 0.03001, "grad_norm": 0.7274677458257305, "learning_rate": 0.003, "loss": 4.1741, "step": 3001 }, { "epoch": 0.03002, "grad_norm": 0.6467948184705592, "learning_rate": 0.003, "loss": 4.1853, "step": 3002 }, { "epoch": 0.03003, "grad_norm": 0.6600036116283045, "learning_rate": 0.003, "loss": 4.1654, "step": 3003 }, { "epoch": 0.03004, "grad_norm": 0.7176122662961554, "learning_rate": 0.003, "loss": 4.1486, "step": 3004 }, { "epoch": 0.03005, "grad_norm": 0.6686551022549297, "learning_rate": 0.003, "loss": 4.1965, "step": 3005 }, { "epoch": 0.03006, "grad_norm": 0.7091833087612831, "learning_rate": 0.003, "loss": 4.1949, "step": 3006 }, { "epoch": 0.03007, "grad_norm": 0.6742803685976605, "learning_rate": 0.003, "loss": 4.1664, "step": 3007 }, { "epoch": 0.03008, "grad_norm": 0.6530504414551399, "learning_rate": 0.003, "loss": 4.1926, "step": 3008 }, { "epoch": 0.03009, "grad_norm": 0.7147134856842983, "learning_rate": 0.003, "loss": 4.1681, "step": 3009 }, { "epoch": 0.0301, "grad_norm": 0.803723268068343, "learning_rate": 0.003, "loss": 4.1972, "step": 3010 }, { "epoch": 0.03011, "grad_norm": 0.8381383897634889, "learning_rate": 0.003, "loss": 4.1805, "step": 3011 }, { "epoch": 0.03012, "grad_norm": 0.6693037897815421, "learning_rate": 0.003, "loss": 4.1812, "step": 3012 }, { "epoch": 0.03013, "grad_norm": 0.5695069096544523, "learning_rate": 0.003, "loss": 4.14, "step": 3013 }, { "epoch": 0.03014, "grad_norm": 0.5914912051432484, "learning_rate": 0.003, "loss": 4.1697, "step": 3014 }, { "epoch": 0.03015, "grad_norm": 0.6342676904398465, "learning_rate": 0.003, "loss": 4.1663, "step": 3015 }, { "epoch": 0.03016, "grad_norm": 0.643666546817278, "learning_rate": 0.003, "loss": 4.1758, "step": 3016 }, { "epoch": 0.03017, "grad_norm": 0.6248622988385808, "learning_rate": 0.003, "loss": 4.1711, "step": 3017 }, { "epoch": 0.03018, "grad_norm": 0.5659743586845699, "learning_rate": 0.003, "loss": 4.1652, "step": 3018 }, { "epoch": 0.03019, "grad_norm": 0.543953415462997, "learning_rate": 0.003, "loss": 4.1825, "step": 3019 }, { "epoch": 0.0302, "grad_norm": 0.4979549215791465, "learning_rate": 0.003, "loss": 4.171, "step": 3020 }, { "epoch": 0.03021, "grad_norm": 0.5988839209053217, "learning_rate": 0.003, "loss": 4.1986, "step": 3021 }, { "epoch": 0.03022, "grad_norm": 0.6917001050231241, "learning_rate": 0.003, "loss": 4.1995, "step": 3022 }, { "epoch": 0.03023, "grad_norm": 0.767246802866098, "learning_rate": 0.003, "loss": 4.215, "step": 3023 }, { "epoch": 0.03024, "grad_norm": 0.7171193879296623, "learning_rate": 0.003, "loss": 4.2081, "step": 3024 }, { "epoch": 0.03025, "grad_norm": 0.8041231345280121, "learning_rate": 0.003, "loss": 4.1898, "step": 3025 }, { "epoch": 0.03026, "grad_norm": 0.8792691727950304, "learning_rate": 0.003, "loss": 4.169, "step": 3026 }, { "epoch": 0.03027, "grad_norm": 1.0175395228935675, "learning_rate": 0.003, "loss": 4.1955, "step": 3027 }, { "epoch": 0.03028, "grad_norm": 0.923391249014174, "learning_rate": 0.003, "loss": 4.1605, "step": 3028 }, { "epoch": 0.03029, "grad_norm": 0.7571617694978044, "learning_rate": 0.003, "loss": 4.1972, "step": 3029 }, { "epoch": 0.0303, "grad_norm": 0.8910919036216922, "learning_rate": 0.003, "loss": 4.1858, "step": 3030 }, { "epoch": 0.03031, "grad_norm": 0.9641578814377423, "learning_rate": 0.003, "loss": 4.1735, "step": 3031 }, { "epoch": 0.03032, "grad_norm": 0.9409295179057755, "learning_rate": 0.003, "loss": 4.2025, "step": 3032 }, { "epoch": 0.03033, "grad_norm": 0.8317283746521951, "learning_rate": 0.003, "loss": 4.1969, "step": 3033 }, { "epoch": 0.03034, "grad_norm": 0.7313668328730979, "learning_rate": 0.003, "loss": 4.1976, "step": 3034 }, { "epoch": 0.03035, "grad_norm": 0.7132789456809936, "learning_rate": 0.003, "loss": 4.2248, "step": 3035 }, { "epoch": 0.03036, "grad_norm": 0.674714029441459, "learning_rate": 0.003, "loss": 4.1962, "step": 3036 }, { "epoch": 0.03037, "grad_norm": 0.7009049198448731, "learning_rate": 0.003, "loss": 4.1813, "step": 3037 }, { "epoch": 0.03038, "grad_norm": 0.6254153988955986, "learning_rate": 0.003, "loss": 4.1778, "step": 3038 }, { "epoch": 0.03039, "grad_norm": 0.6327013070531727, "learning_rate": 0.003, "loss": 4.1827, "step": 3039 }, { "epoch": 0.0304, "grad_norm": 0.6070726773833675, "learning_rate": 0.003, "loss": 4.1868, "step": 3040 }, { "epoch": 0.03041, "grad_norm": 0.5945854551887333, "learning_rate": 0.003, "loss": 4.1928, "step": 3041 }, { "epoch": 0.03042, "grad_norm": 0.5653702360336539, "learning_rate": 0.003, "loss": 4.2138, "step": 3042 }, { "epoch": 0.03043, "grad_norm": 0.48339928862065246, "learning_rate": 0.003, "loss": 4.1665, "step": 3043 }, { "epoch": 0.03044, "grad_norm": 0.4882765542102775, "learning_rate": 0.003, "loss": 4.1658, "step": 3044 }, { "epoch": 0.03045, "grad_norm": 0.49906211495716746, "learning_rate": 0.003, "loss": 4.1682, "step": 3045 }, { "epoch": 0.03046, "grad_norm": 0.6073778663358335, "learning_rate": 0.003, "loss": 4.1808, "step": 3046 }, { "epoch": 0.03047, "grad_norm": 0.767480218845333, "learning_rate": 0.003, "loss": 4.1898, "step": 3047 }, { "epoch": 0.03048, "grad_norm": 0.8141142563495186, "learning_rate": 0.003, "loss": 4.1741, "step": 3048 }, { "epoch": 0.03049, "grad_norm": 0.882596084462819, "learning_rate": 0.003, "loss": 4.164, "step": 3049 }, { "epoch": 0.0305, "grad_norm": 0.872061418696622, "learning_rate": 0.003, "loss": 4.1847, "step": 3050 }, { "epoch": 0.03051, "grad_norm": 0.8096508372990807, "learning_rate": 0.003, "loss": 4.1923, "step": 3051 }, { "epoch": 0.03052, "grad_norm": 0.7731212144212846, "learning_rate": 0.003, "loss": 4.1995, "step": 3052 }, { "epoch": 0.03053, "grad_norm": 0.8121040926587938, "learning_rate": 0.003, "loss": 4.2108, "step": 3053 }, { "epoch": 0.03054, "grad_norm": 0.8031490148625648, "learning_rate": 0.003, "loss": 4.19, "step": 3054 }, { "epoch": 0.03055, "grad_norm": 0.7123837580650032, "learning_rate": 0.003, "loss": 4.1921, "step": 3055 }, { "epoch": 0.03056, "grad_norm": 0.6851652643314644, "learning_rate": 0.003, "loss": 4.2013, "step": 3056 }, { "epoch": 0.03057, "grad_norm": 0.7381209914846429, "learning_rate": 0.003, "loss": 4.205, "step": 3057 }, { "epoch": 0.03058, "grad_norm": 0.8318560657251862, "learning_rate": 0.003, "loss": 4.1792, "step": 3058 }, { "epoch": 0.03059, "grad_norm": 0.8791895081718305, "learning_rate": 0.003, "loss": 4.1917, "step": 3059 }, { "epoch": 0.0306, "grad_norm": 0.814947872922507, "learning_rate": 0.003, "loss": 4.1796, "step": 3060 }, { "epoch": 0.03061, "grad_norm": 0.6938901592564853, "learning_rate": 0.003, "loss": 4.1987, "step": 3061 }, { "epoch": 0.03062, "grad_norm": 0.7477036973476255, "learning_rate": 0.003, "loss": 4.225, "step": 3062 }, { "epoch": 0.03063, "grad_norm": 0.6602150387540897, "learning_rate": 0.003, "loss": 4.1652, "step": 3063 }, { "epoch": 0.03064, "grad_norm": 0.588757114074522, "learning_rate": 0.003, "loss": 4.1973, "step": 3064 }, { "epoch": 0.03065, "grad_norm": 0.5129768984617096, "learning_rate": 0.003, "loss": 4.1599, "step": 3065 }, { "epoch": 0.03066, "grad_norm": 0.5610278353764739, "learning_rate": 0.003, "loss": 4.2055, "step": 3066 }, { "epoch": 0.03067, "grad_norm": 0.5604919558505147, "learning_rate": 0.003, "loss": 4.209, "step": 3067 }, { "epoch": 0.03068, "grad_norm": 0.6135705525861784, "learning_rate": 0.003, "loss": 4.1804, "step": 3068 }, { "epoch": 0.03069, "grad_norm": 0.6668599988517637, "learning_rate": 0.003, "loss": 4.1668, "step": 3069 }, { "epoch": 0.0307, "grad_norm": 0.7555455276952089, "learning_rate": 0.003, "loss": 4.1799, "step": 3070 }, { "epoch": 0.03071, "grad_norm": 0.6246930367493289, "learning_rate": 0.003, "loss": 4.1743, "step": 3071 }, { "epoch": 0.03072, "grad_norm": 0.5918561396926248, "learning_rate": 0.003, "loss": 4.1774, "step": 3072 }, { "epoch": 0.03073, "grad_norm": 0.543534573866796, "learning_rate": 0.003, "loss": 4.1741, "step": 3073 }, { "epoch": 0.03074, "grad_norm": 0.5906611145957643, "learning_rate": 0.003, "loss": 4.1755, "step": 3074 }, { "epoch": 0.03075, "grad_norm": 0.6400240036041446, "learning_rate": 0.003, "loss": 4.2013, "step": 3075 }, { "epoch": 0.03076, "grad_norm": 0.5704867962206898, "learning_rate": 0.003, "loss": 4.1943, "step": 3076 }, { "epoch": 0.03077, "grad_norm": 0.5207074704544816, "learning_rate": 0.003, "loss": 4.1858, "step": 3077 }, { "epoch": 0.03078, "grad_norm": 0.49735495203144253, "learning_rate": 0.003, "loss": 4.1718, "step": 3078 }, { "epoch": 0.03079, "grad_norm": 0.49371618460089295, "learning_rate": 0.003, "loss": 4.159, "step": 3079 }, { "epoch": 0.0308, "grad_norm": 0.5286026033951621, "learning_rate": 0.003, "loss": 4.1869, "step": 3080 }, { "epoch": 0.03081, "grad_norm": 0.597818150474852, "learning_rate": 0.003, "loss": 4.1589, "step": 3081 }, { "epoch": 0.03082, "grad_norm": 0.673672308591032, "learning_rate": 0.003, "loss": 4.1645, "step": 3082 }, { "epoch": 0.03083, "grad_norm": 0.742812911307247, "learning_rate": 0.003, "loss": 4.1694, "step": 3083 }, { "epoch": 0.03084, "grad_norm": 0.7219826879276637, "learning_rate": 0.003, "loss": 4.1823, "step": 3084 }, { "epoch": 0.03085, "grad_norm": 0.6340360256960204, "learning_rate": 0.003, "loss": 4.1754, "step": 3085 }, { "epoch": 0.03086, "grad_norm": 0.5692756028815859, "learning_rate": 0.003, "loss": 4.1965, "step": 3086 }, { "epoch": 0.03087, "grad_norm": 0.6389893757240679, "learning_rate": 0.003, "loss": 4.1728, "step": 3087 }, { "epoch": 0.03088, "grad_norm": 0.6896729441240512, "learning_rate": 0.003, "loss": 4.1909, "step": 3088 }, { "epoch": 0.03089, "grad_norm": 0.8131458592240662, "learning_rate": 0.003, "loss": 4.1601, "step": 3089 }, { "epoch": 0.0309, "grad_norm": 0.8512834574988407, "learning_rate": 0.003, "loss": 4.1699, "step": 3090 }, { "epoch": 0.03091, "grad_norm": 0.8422191636508738, "learning_rate": 0.003, "loss": 4.1335, "step": 3091 }, { "epoch": 0.03092, "grad_norm": 0.7541794006345667, "learning_rate": 0.003, "loss": 4.1613, "step": 3092 }, { "epoch": 0.03093, "grad_norm": 0.6725158454989346, "learning_rate": 0.003, "loss": 4.1914, "step": 3093 }, { "epoch": 0.03094, "grad_norm": 0.7201284858059102, "learning_rate": 0.003, "loss": 4.1757, "step": 3094 }, { "epoch": 0.03095, "grad_norm": 0.7621210210503538, "learning_rate": 0.003, "loss": 4.1919, "step": 3095 }, { "epoch": 0.03096, "grad_norm": 0.6578929196198599, "learning_rate": 0.003, "loss": 4.1684, "step": 3096 }, { "epoch": 0.03097, "grad_norm": 0.695067390761494, "learning_rate": 0.003, "loss": 4.1922, "step": 3097 }, { "epoch": 0.03098, "grad_norm": 0.740033596710499, "learning_rate": 0.003, "loss": 4.1912, "step": 3098 }, { "epoch": 0.03099, "grad_norm": 0.7403836940802626, "learning_rate": 0.003, "loss": 4.185, "step": 3099 }, { "epoch": 0.031, "grad_norm": 0.6979751976411313, "learning_rate": 0.003, "loss": 4.179, "step": 3100 }, { "epoch": 0.03101, "grad_norm": 0.7670376808263826, "learning_rate": 0.003, "loss": 4.1742, "step": 3101 }, { "epoch": 0.03102, "grad_norm": 0.8040700995473709, "learning_rate": 0.003, "loss": 4.1768, "step": 3102 }, { "epoch": 0.03103, "grad_norm": 0.7605055478614147, "learning_rate": 0.003, "loss": 4.1649, "step": 3103 }, { "epoch": 0.03104, "grad_norm": 0.7299989579866449, "learning_rate": 0.003, "loss": 4.1722, "step": 3104 }, { "epoch": 0.03105, "grad_norm": 0.8293979417721875, "learning_rate": 0.003, "loss": 4.1542, "step": 3105 }, { "epoch": 0.03106, "grad_norm": 0.7907107069059506, "learning_rate": 0.003, "loss": 4.1755, "step": 3106 }, { "epoch": 0.03107, "grad_norm": 0.7368559192868307, "learning_rate": 0.003, "loss": 4.1551, "step": 3107 }, { "epoch": 0.03108, "grad_norm": 0.6702525929923716, "learning_rate": 0.003, "loss": 4.1641, "step": 3108 }, { "epoch": 0.03109, "grad_norm": 0.7199266096748919, "learning_rate": 0.003, "loss": 4.1721, "step": 3109 }, { "epoch": 0.0311, "grad_norm": 0.7526329246324333, "learning_rate": 0.003, "loss": 4.1926, "step": 3110 }, { "epoch": 0.03111, "grad_norm": 0.7398905284889996, "learning_rate": 0.003, "loss": 4.173, "step": 3111 }, { "epoch": 0.03112, "grad_norm": 0.8085890464082365, "learning_rate": 0.003, "loss": 4.1956, "step": 3112 }, { "epoch": 0.03113, "grad_norm": 0.8975031706869222, "learning_rate": 0.003, "loss": 4.1852, "step": 3113 }, { "epoch": 0.03114, "grad_norm": 0.9143315414333046, "learning_rate": 0.003, "loss": 4.1926, "step": 3114 }, { "epoch": 0.03115, "grad_norm": 0.7984002320222364, "learning_rate": 0.003, "loss": 4.1932, "step": 3115 }, { "epoch": 0.03116, "grad_norm": 0.7251259112559764, "learning_rate": 0.003, "loss": 4.1766, "step": 3116 }, { "epoch": 0.03117, "grad_norm": 0.6765418867772506, "learning_rate": 0.003, "loss": 4.1803, "step": 3117 }, { "epoch": 0.03118, "grad_norm": 0.6564914593167945, "learning_rate": 0.003, "loss": 4.1805, "step": 3118 }, { "epoch": 0.03119, "grad_norm": 0.6469214981930856, "learning_rate": 0.003, "loss": 4.1951, "step": 3119 }, { "epoch": 0.0312, "grad_norm": 0.6004553747682227, "learning_rate": 0.003, "loss": 4.1991, "step": 3120 }, { "epoch": 0.03121, "grad_norm": 0.5447439216302157, "learning_rate": 0.003, "loss": 4.1713, "step": 3121 }, { "epoch": 0.03122, "grad_norm": 0.472628289298214, "learning_rate": 0.003, "loss": 4.1883, "step": 3122 }, { "epoch": 0.03123, "grad_norm": 0.4637134521023551, "learning_rate": 0.003, "loss": 4.1843, "step": 3123 }, { "epoch": 0.03124, "grad_norm": 0.5031937627853148, "learning_rate": 0.003, "loss": 4.1606, "step": 3124 }, { "epoch": 0.03125, "grad_norm": 0.49290767185737955, "learning_rate": 0.003, "loss": 4.1619, "step": 3125 }, { "epoch": 0.03126, "grad_norm": 0.5046421633962421, "learning_rate": 0.003, "loss": 4.166, "step": 3126 }, { "epoch": 0.03127, "grad_norm": 0.5295145475165267, "learning_rate": 0.003, "loss": 4.1725, "step": 3127 }, { "epoch": 0.03128, "grad_norm": 0.5768647951144659, "learning_rate": 0.003, "loss": 4.1631, "step": 3128 }, { "epoch": 0.03129, "grad_norm": 0.5526301834618427, "learning_rate": 0.003, "loss": 4.1801, "step": 3129 }, { "epoch": 0.0313, "grad_norm": 0.6096664293295351, "learning_rate": 0.003, "loss": 4.187, "step": 3130 }, { "epoch": 0.03131, "grad_norm": 0.6262440538187416, "learning_rate": 0.003, "loss": 4.1696, "step": 3131 }, { "epoch": 0.03132, "grad_norm": 0.6399356853738972, "learning_rate": 0.003, "loss": 4.1889, "step": 3132 }, { "epoch": 0.03133, "grad_norm": 0.6986536472969243, "learning_rate": 0.003, "loss": 4.182, "step": 3133 }, { "epoch": 0.03134, "grad_norm": 0.8533836264241755, "learning_rate": 0.003, "loss": 4.1841, "step": 3134 }, { "epoch": 0.03135, "grad_norm": 0.9857590467203682, "learning_rate": 0.003, "loss": 4.1591, "step": 3135 }, { "epoch": 0.03136, "grad_norm": 0.985916813895558, "learning_rate": 0.003, "loss": 4.1745, "step": 3136 }, { "epoch": 0.03137, "grad_norm": 0.7996019468603507, "learning_rate": 0.003, "loss": 4.177, "step": 3137 }, { "epoch": 0.03138, "grad_norm": 0.7409168937664911, "learning_rate": 0.003, "loss": 4.1757, "step": 3138 }, { "epoch": 0.03139, "grad_norm": 0.6767699843822652, "learning_rate": 0.003, "loss": 4.1828, "step": 3139 }, { "epoch": 0.0314, "grad_norm": 0.7304920419704491, "learning_rate": 0.003, "loss": 4.1836, "step": 3140 }, { "epoch": 0.03141, "grad_norm": 0.8085325981037152, "learning_rate": 0.003, "loss": 4.2179, "step": 3141 }, { "epoch": 0.03142, "grad_norm": 0.7280300882309108, "learning_rate": 0.003, "loss": 4.1945, "step": 3142 }, { "epoch": 0.03143, "grad_norm": 0.723146974389316, "learning_rate": 0.003, "loss": 4.1826, "step": 3143 }, { "epoch": 0.03144, "grad_norm": 0.6065206122686737, "learning_rate": 0.003, "loss": 4.1484, "step": 3144 }, { "epoch": 0.03145, "grad_norm": 0.644934935501266, "learning_rate": 0.003, "loss": 4.1764, "step": 3145 }, { "epoch": 0.03146, "grad_norm": 0.6592280348022013, "learning_rate": 0.003, "loss": 4.1869, "step": 3146 }, { "epoch": 0.03147, "grad_norm": 0.6530269899474325, "learning_rate": 0.003, "loss": 4.1855, "step": 3147 }, { "epoch": 0.03148, "grad_norm": 0.5745843025691852, "learning_rate": 0.003, "loss": 4.1852, "step": 3148 }, { "epoch": 0.03149, "grad_norm": 0.6065668720311537, "learning_rate": 0.003, "loss": 4.1911, "step": 3149 }, { "epoch": 0.0315, "grad_norm": 0.6922007846823143, "learning_rate": 0.003, "loss": 4.1787, "step": 3150 }, { "epoch": 0.03151, "grad_norm": 0.7094328965622133, "learning_rate": 0.003, "loss": 4.1911, "step": 3151 }, { "epoch": 0.03152, "grad_norm": 0.6960957361399359, "learning_rate": 0.003, "loss": 4.1913, "step": 3152 }, { "epoch": 0.03153, "grad_norm": 0.7486079877329721, "learning_rate": 0.003, "loss": 4.1997, "step": 3153 }, { "epoch": 0.03154, "grad_norm": 0.7456103075414062, "learning_rate": 0.003, "loss": 4.1959, "step": 3154 }, { "epoch": 0.03155, "grad_norm": 0.8177011172560863, "learning_rate": 0.003, "loss": 4.1844, "step": 3155 }, { "epoch": 0.03156, "grad_norm": 0.8793941608963775, "learning_rate": 0.003, "loss": 4.1749, "step": 3156 }, { "epoch": 0.03157, "grad_norm": 0.704857810319287, "learning_rate": 0.003, "loss": 4.1956, "step": 3157 }, { "epoch": 0.03158, "grad_norm": 0.6113967004695787, "learning_rate": 0.003, "loss": 4.1799, "step": 3158 }, { "epoch": 0.03159, "grad_norm": 0.66211174258146, "learning_rate": 0.003, "loss": 4.1801, "step": 3159 }, { "epoch": 0.0316, "grad_norm": 0.5523265028957371, "learning_rate": 0.003, "loss": 4.1916, "step": 3160 }, { "epoch": 0.03161, "grad_norm": 0.608325644395274, "learning_rate": 0.003, "loss": 4.1998, "step": 3161 }, { "epoch": 0.03162, "grad_norm": 0.6269366928420552, "learning_rate": 0.003, "loss": 4.2023, "step": 3162 }, { "epoch": 0.03163, "grad_norm": 0.6819002848904914, "learning_rate": 0.003, "loss": 4.188, "step": 3163 }, { "epoch": 0.03164, "grad_norm": 0.7746466933382178, "learning_rate": 0.003, "loss": 4.1862, "step": 3164 }, { "epoch": 0.03165, "grad_norm": 0.8342216139344243, "learning_rate": 0.003, "loss": 4.1658, "step": 3165 }, { "epoch": 0.03166, "grad_norm": 0.854621355905751, "learning_rate": 0.003, "loss": 4.1812, "step": 3166 }, { "epoch": 0.03167, "grad_norm": 0.9512159018248347, "learning_rate": 0.003, "loss": 4.1756, "step": 3167 }, { "epoch": 0.03168, "grad_norm": 0.9746218610383118, "learning_rate": 0.003, "loss": 4.2065, "step": 3168 }, { "epoch": 0.03169, "grad_norm": 0.9858598226166518, "learning_rate": 0.003, "loss": 4.224, "step": 3169 }, { "epoch": 0.0317, "grad_norm": 0.7498416277122048, "learning_rate": 0.003, "loss": 4.1691, "step": 3170 }, { "epoch": 0.03171, "grad_norm": 0.6044881164445568, "learning_rate": 0.003, "loss": 4.2016, "step": 3171 }, { "epoch": 0.03172, "grad_norm": 0.6815636176834087, "learning_rate": 0.003, "loss": 4.191, "step": 3172 }, { "epoch": 0.03173, "grad_norm": 0.6654525131928357, "learning_rate": 0.003, "loss": 4.1647, "step": 3173 }, { "epoch": 0.03174, "grad_norm": 0.6474897594742762, "learning_rate": 0.003, "loss": 4.1744, "step": 3174 }, { "epoch": 0.03175, "grad_norm": 0.6418698031488217, "learning_rate": 0.003, "loss": 4.201, "step": 3175 }, { "epoch": 0.03176, "grad_norm": 0.6191051450233946, "learning_rate": 0.003, "loss": 4.1609, "step": 3176 }, { "epoch": 0.03177, "grad_norm": 0.5618627528537171, "learning_rate": 0.003, "loss": 4.1676, "step": 3177 }, { "epoch": 0.03178, "grad_norm": 0.574624051159771, "learning_rate": 0.003, "loss": 4.2133, "step": 3178 }, { "epoch": 0.03179, "grad_norm": 0.5170487183975983, "learning_rate": 0.003, "loss": 4.1718, "step": 3179 }, { "epoch": 0.0318, "grad_norm": 0.5443225922897362, "learning_rate": 0.003, "loss": 4.1734, "step": 3180 }, { "epoch": 0.03181, "grad_norm": 0.48857658279023347, "learning_rate": 0.003, "loss": 4.1797, "step": 3181 }, { "epoch": 0.03182, "grad_norm": 0.5003587458379335, "learning_rate": 0.003, "loss": 4.1768, "step": 3182 }, { "epoch": 0.03183, "grad_norm": 0.47427786201564304, "learning_rate": 0.003, "loss": 4.1791, "step": 3183 }, { "epoch": 0.03184, "grad_norm": 0.5056283615164667, "learning_rate": 0.003, "loss": 4.1534, "step": 3184 }, { "epoch": 0.03185, "grad_norm": 0.5400490284012653, "learning_rate": 0.003, "loss": 4.1741, "step": 3185 }, { "epoch": 0.03186, "grad_norm": 0.6985528632077707, "learning_rate": 0.003, "loss": 4.1767, "step": 3186 }, { "epoch": 0.03187, "grad_norm": 0.8150902063241722, "learning_rate": 0.003, "loss": 4.1448, "step": 3187 }, { "epoch": 0.03188, "grad_norm": 0.9325360176643979, "learning_rate": 0.003, "loss": 4.1849, "step": 3188 }, { "epoch": 0.03189, "grad_norm": 0.8937715656838259, "learning_rate": 0.003, "loss": 4.1792, "step": 3189 }, { "epoch": 0.0319, "grad_norm": 0.744447678504765, "learning_rate": 0.003, "loss": 4.1667, "step": 3190 }, { "epoch": 0.03191, "grad_norm": 0.6776373550333729, "learning_rate": 0.003, "loss": 4.1971, "step": 3191 }, { "epoch": 0.03192, "grad_norm": 0.6440396042092965, "learning_rate": 0.003, "loss": 4.1698, "step": 3192 }, { "epoch": 0.03193, "grad_norm": 0.6355371339608338, "learning_rate": 0.003, "loss": 4.17, "step": 3193 }, { "epoch": 0.03194, "grad_norm": 0.6540570284226682, "learning_rate": 0.003, "loss": 4.1966, "step": 3194 }, { "epoch": 0.03195, "grad_norm": 0.5776431167458079, "learning_rate": 0.003, "loss": 4.1621, "step": 3195 }, { "epoch": 0.03196, "grad_norm": 0.5567486580718531, "learning_rate": 0.003, "loss": 4.1736, "step": 3196 }, { "epoch": 0.03197, "grad_norm": 0.6211522115218998, "learning_rate": 0.003, "loss": 4.1433, "step": 3197 }, { "epoch": 0.03198, "grad_norm": 0.6754236762792072, "learning_rate": 0.003, "loss": 4.1783, "step": 3198 }, { "epoch": 0.03199, "grad_norm": 0.6909028789552336, "learning_rate": 0.003, "loss": 4.1534, "step": 3199 }, { "epoch": 0.032, "grad_norm": 0.6851130691205759, "learning_rate": 0.003, "loss": 4.1726, "step": 3200 }, { "epoch": 0.03201, "grad_norm": 0.8236929229091875, "learning_rate": 0.003, "loss": 4.1411, "step": 3201 }, { "epoch": 0.03202, "grad_norm": 0.951709446477759, "learning_rate": 0.003, "loss": 4.1883, "step": 3202 }, { "epoch": 0.03203, "grad_norm": 0.9116506118668389, "learning_rate": 0.003, "loss": 4.1713, "step": 3203 }, { "epoch": 0.03204, "grad_norm": 0.737364092313657, "learning_rate": 0.003, "loss": 4.182, "step": 3204 }, { "epoch": 0.03205, "grad_norm": 0.5733762258500861, "learning_rate": 0.003, "loss": 4.1751, "step": 3205 }, { "epoch": 0.03206, "grad_norm": 0.6520837940224375, "learning_rate": 0.003, "loss": 4.1707, "step": 3206 }, { "epoch": 0.03207, "grad_norm": 0.6713973023408082, "learning_rate": 0.003, "loss": 4.152, "step": 3207 }, { "epoch": 0.03208, "grad_norm": 0.6638942407046395, "learning_rate": 0.003, "loss": 4.1749, "step": 3208 }, { "epoch": 0.03209, "grad_norm": 0.6926100039707507, "learning_rate": 0.003, "loss": 4.2122, "step": 3209 }, { "epoch": 0.0321, "grad_norm": 0.7197536710037978, "learning_rate": 0.003, "loss": 4.1931, "step": 3210 }, { "epoch": 0.03211, "grad_norm": 0.7299656936668171, "learning_rate": 0.003, "loss": 4.1681, "step": 3211 }, { "epoch": 0.03212, "grad_norm": 0.6674134844073308, "learning_rate": 0.003, "loss": 4.1736, "step": 3212 }, { "epoch": 0.03213, "grad_norm": 0.602090480831783, "learning_rate": 0.003, "loss": 4.176, "step": 3213 }, { "epoch": 0.03214, "grad_norm": 0.5632210286186823, "learning_rate": 0.003, "loss": 4.1402, "step": 3214 }, { "epoch": 0.03215, "grad_norm": 0.6424317594884178, "learning_rate": 0.003, "loss": 4.1555, "step": 3215 }, { "epoch": 0.03216, "grad_norm": 0.6946877458250511, "learning_rate": 0.003, "loss": 4.1642, "step": 3216 }, { "epoch": 0.03217, "grad_norm": 0.7162006314166186, "learning_rate": 0.003, "loss": 4.1667, "step": 3217 }, { "epoch": 0.03218, "grad_norm": 0.6688710116110083, "learning_rate": 0.003, "loss": 4.1573, "step": 3218 }, { "epoch": 0.03219, "grad_norm": 0.6849969993386218, "learning_rate": 0.003, "loss": 4.1667, "step": 3219 }, { "epoch": 0.0322, "grad_norm": 0.6568426784434412, "learning_rate": 0.003, "loss": 4.1774, "step": 3220 }, { "epoch": 0.03221, "grad_norm": 0.7253483658822476, "learning_rate": 0.003, "loss": 4.1857, "step": 3221 }, { "epoch": 0.03222, "grad_norm": 0.793341013671458, "learning_rate": 0.003, "loss": 4.183, "step": 3222 }, { "epoch": 0.03223, "grad_norm": 0.824497677720901, "learning_rate": 0.003, "loss": 4.1862, "step": 3223 }, { "epoch": 0.03224, "grad_norm": 0.7552119583336456, "learning_rate": 0.003, "loss": 4.1463, "step": 3224 }, { "epoch": 0.03225, "grad_norm": 0.5734236212732641, "learning_rate": 0.003, "loss": 4.1756, "step": 3225 }, { "epoch": 0.03226, "grad_norm": 0.6220562562626234, "learning_rate": 0.003, "loss": 4.1558, "step": 3226 }, { "epoch": 0.03227, "grad_norm": 0.6820845746631659, "learning_rate": 0.003, "loss": 4.1783, "step": 3227 }, { "epoch": 0.03228, "grad_norm": 0.5933306652689198, "learning_rate": 0.003, "loss": 4.1516, "step": 3228 }, { "epoch": 0.03229, "grad_norm": 0.5119085736725962, "learning_rate": 0.003, "loss": 4.1681, "step": 3229 }, { "epoch": 0.0323, "grad_norm": 0.5310047730371823, "learning_rate": 0.003, "loss": 4.1594, "step": 3230 }, { "epoch": 0.03231, "grad_norm": 0.444458596160572, "learning_rate": 0.003, "loss": 4.1741, "step": 3231 }, { "epoch": 0.03232, "grad_norm": 0.48863148670767037, "learning_rate": 0.003, "loss": 4.1621, "step": 3232 }, { "epoch": 0.03233, "grad_norm": 0.5890179829718989, "learning_rate": 0.003, "loss": 4.1823, "step": 3233 }, { "epoch": 0.03234, "grad_norm": 0.6507977897435386, "learning_rate": 0.003, "loss": 4.1635, "step": 3234 }, { "epoch": 0.03235, "grad_norm": 0.6735430538160202, "learning_rate": 0.003, "loss": 4.1755, "step": 3235 }, { "epoch": 0.03236, "grad_norm": 0.6224436805872925, "learning_rate": 0.003, "loss": 4.1638, "step": 3236 }, { "epoch": 0.03237, "grad_norm": 0.6153286251420005, "learning_rate": 0.003, "loss": 4.1832, "step": 3237 }, { "epoch": 0.03238, "grad_norm": 0.5653843948515308, "learning_rate": 0.003, "loss": 4.1875, "step": 3238 }, { "epoch": 0.03239, "grad_norm": 0.5579474828935078, "learning_rate": 0.003, "loss": 4.1662, "step": 3239 }, { "epoch": 0.0324, "grad_norm": 0.5628889716808217, "learning_rate": 0.003, "loss": 4.1826, "step": 3240 }, { "epoch": 0.03241, "grad_norm": 0.618198116761749, "learning_rate": 0.003, "loss": 4.1781, "step": 3241 }, { "epoch": 0.03242, "grad_norm": 0.7766046798616605, "learning_rate": 0.003, "loss": 4.1586, "step": 3242 }, { "epoch": 0.03243, "grad_norm": 0.9785268652346966, "learning_rate": 0.003, "loss": 4.1547, "step": 3243 }, { "epoch": 0.03244, "grad_norm": 0.9580973048940927, "learning_rate": 0.003, "loss": 4.1852, "step": 3244 }, { "epoch": 0.03245, "grad_norm": 0.8576334375089313, "learning_rate": 0.003, "loss": 4.1916, "step": 3245 }, { "epoch": 0.03246, "grad_norm": 0.8423468581019681, "learning_rate": 0.003, "loss": 4.1821, "step": 3246 }, { "epoch": 0.03247, "grad_norm": 0.9544112800192335, "learning_rate": 0.003, "loss": 4.1972, "step": 3247 }, { "epoch": 0.03248, "grad_norm": 1.108974194257081, "learning_rate": 0.003, "loss": 4.2245, "step": 3248 }, { "epoch": 0.03249, "grad_norm": 0.922677655020294, "learning_rate": 0.003, "loss": 4.1848, "step": 3249 }, { "epoch": 0.0325, "grad_norm": 0.87283028555209, "learning_rate": 0.003, "loss": 4.2167, "step": 3250 }, { "epoch": 0.03251, "grad_norm": 0.9163748774315567, "learning_rate": 0.003, "loss": 4.2201, "step": 3251 }, { "epoch": 0.03252, "grad_norm": 0.81456143100221, "learning_rate": 0.003, "loss": 4.1827, "step": 3252 }, { "epoch": 0.03253, "grad_norm": 0.7968908258913222, "learning_rate": 0.003, "loss": 4.1861, "step": 3253 }, { "epoch": 0.03254, "grad_norm": 0.7235050634300025, "learning_rate": 0.003, "loss": 4.1768, "step": 3254 }, { "epoch": 0.03255, "grad_norm": 0.6266440712092289, "learning_rate": 0.003, "loss": 4.1867, "step": 3255 }, { "epoch": 0.03256, "grad_norm": 0.6449674375686789, "learning_rate": 0.003, "loss": 4.1852, "step": 3256 }, { "epoch": 0.03257, "grad_norm": 0.5622485206306796, "learning_rate": 0.003, "loss": 4.1703, "step": 3257 }, { "epoch": 0.03258, "grad_norm": 0.5825851281947257, "learning_rate": 0.003, "loss": 4.1799, "step": 3258 }, { "epoch": 0.03259, "grad_norm": 0.6444343388033511, "learning_rate": 0.003, "loss": 4.1826, "step": 3259 }, { "epoch": 0.0326, "grad_norm": 0.6989193151493163, "learning_rate": 0.003, "loss": 4.1815, "step": 3260 }, { "epoch": 0.03261, "grad_norm": 0.8302855362092207, "learning_rate": 0.003, "loss": 4.1597, "step": 3261 }, { "epoch": 0.03262, "grad_norm": 0.9498523114829631, "learning_rate": 0.003, "loss": 4.1887, "step": 3262 }, { "epoch": 0.03263, "grad_norm": 0.983071934935445, "learning_rate": 0.003, "loss": 4.2007, "step": 3263 }, { "epoch": 0.03264, "grad_norm": 0.8331337315585593, "learning_rate": 0.003, "loss": 4.208, "step": 3264 }, { "epoch": 0.03265, "grad_norm": 0.7498656994034634, "learning_rate": 0.003, "loss": 4.2003, "step": 3265 }, { "epoch": 0.03266, "grad_norm": 0.7807040232610372, "learning_rate": 0.003, "loss": 4.1549, "step": 3266 }, { "epoch": 0.03267, "grad_norm": 0.8713471135209633, "learning_rate": 0.003, "loss": 4.1715, "step": 3267 }, { "epoch": 0.03268, "grad_norm": 0.8995394972894192, "learning_rate": 0.003, "loss": 4.1612, "step": 3268 }, { "epoch": 0.03269, "grad_norm": 0.7799010442197561, "learning_rate": 0.003, "loss": 4.1875, "step": 3269 }, { "epoch": 0.0327, "grad_norm": 0.6333137664038297, "learning_rate": 0.003, "loss": 4.2075, "step": 3270 }, { "epoch": 0.03271, "grad_norm": 0.5431600237872565, "learning_rate": 0.003, "loss": 4.1673, "step": 3271 }, { "epoch": 0.03272, "grad_norm": 0.5431026489790501, "learning_rate": 0.003, "loss": 4.1751, "step": 3272 }, { "epoch": 0.03273, "grad_norm": 0.45854609240948824, "learning_rate": 0.003, "loss": 4.1729, "step": 3273 }, { "epoch": 0.03274, "grad_norm": 0.44688508931158466, "learning_rate": 0.003, "loss": 4.1868, "step": 3274 }, { "epoch": 0.03275, "grad_norm": 0.45373894096093864, "learning_rate": 0.003, "loss": 4.1852, "step": 3275 }, { "epoch": 0.03276, "grad_norm": 0.41006589936140064, "learning_rate": 0.003, "loss": 4.1613, "step": 3276 }, { "epoch": 0.03277, "grad_norm": 0.38524925786232783, "learning_rate": 0.003, "loss": 4.1604, "step": 3277 }, { "epoch": 0.03278, "grad_norm": 0.35950981826812556, "learning_rate": 0.003, "loss": 4.1675, "step": 3278 }, { "epoch": 0.03279, "grad_norm": 0.38292545999031286, "learning_rate": 0.003, "loss": 4.155, "step": 3279 }, { "epoch": 0.0328, "grad_norm": 0.4242164622322683, "learning_rate": 0.003, "loss": 4.1405, "step": 3280 }, { "epoch": 0.03281, "grad_norm": 0.488691641648266, "learning_rate": 0.003, "loss": 4.1888, "step": 3281 }, { "epoch": 0.03282, "grad_norm": 0.5739177249611346, "learning_rate": 0.003, "loss": 4.1447, "step": 3282 }, { "epoch": 0.03283, "grad_norm": 0.554048169102402, "learning_rate": 0.003, "loss": 4.1403, "step": 3283 }, { "epoch": 0.03284, "grad_norm": 0.4915028252102749, "learning_rate": 0.003, "loss": 4.1816, "step": 3284 }, { "epoch": 0.03285, "grad_norm": 0.5580359822634506, "learning_rate": 0.003, "loss": 4.1293, "step": 3285 }, { "epoch": 0.03286, "grad_norm": 0.6079781410505047, "learning_rate": 0.003, "loss": 4.1723, "step": 3286 }, { "epoch": 0.03287, "grad_norm": 0.5821835778978477, "learning_rate": 0.003, "loss": 4.176, "step": 3287 }, { "epoch": 0.03288, "grad_norm": 0.5438338131339984, "learning_rate": 0.003, "loss": 4.1565, "step": 3288 }, { "epoch": 0.03289, "grad_norm": 0.570468604964685, "learning_rate": 0.003, "loss": 4.1609, "step": 3289 }, { "epoch": 0.0329, "grad_norm": 0.6096925395272201, "learning_rate": 0.003, "loss": 4.1931, "step": 3290 }, { "epoch": 0.03291, "grad_norm": 0.7214109941856076, "learning_rate": 0.003, "loss": 4.1807, "step": 3291 }, { "epoch": 0.03292, "grad_norm": 0.9305508295977749, "learning_rate": 0.003, "loss": 4.1568, "step": 3292 }, { "epoch": 0.03293, "grad_norm": 0.9205488988397595, "learning_rate": 0.003, "loss": 4.1782, "step": 3293 }, { "epoch": 0.03294, "grad_norm": 0.8983261374723139, "learning_rate": 0.003, "loss": 4.1833, "step": 3294 }, { "epoch": 0.03295, "grad_norm": 0.9704372414132879, "learning_rate": 0.003, "loss": 4.1739, "step": 3295 }, { "epoch": 0.03296, "grad_norm": 1.4039738619774527, "learning_rate": 0.003, "loss": 4.1989, "step": 3296 }, { "epoch": 0.03297, "grad_norm": 0.8478482918673438, "learning_rate": 0.003, "loss": 4.188, "step": 3297 }, { "epoch": 0.03298, "grad_norm": 0.8625233369451641, "learning_rate": 0.003, "loss": 4.2085, "step": 3298 }, { "epoch": 0.03299, "grad_norm": 0.946434504765274, "learning_rate": 0.003, "loss": 4.201, "step": 3299 }, { "epoch": 0.033, "grad_norm": 0.9408833966323229, "learning_rate": 0.003, "loss": 4.1893, "step": 3300 }, { "epoch": 0.03301, "grad_norm": 1.0399735000004064, "learning_rate": 0.003, "loss": 4.2331, "step": 3301 }, { "epoch": 0.03302, "grad_norm": 1.060889182219052, "learning_rate": 0.003, "loss": 4.2111, "step": 3302 }, { "epoch": 0.03303, "grad_norm": 0.910480011898513, "learning_rate": 0.003, "loss": 4.2117, "step": 3303 }, { "epoch": 0.03304, "grad_norm": 0.9051761770071215, "learning_rate": 0.003, "loss": 4.2061, "step": 3304 }, { "epoch": 0.03305, "grad_norm": 0.8593496427676475, "learning_rate": 0.003, "loss": 4.2046, "step": 3305 }, { "epoch": 0.03306, "grad_norm": 0.788406787032699, "learning_rate": 0.003, "loss": 4.2208, "step": 3306 }, { "epoch": 0.03307, "grad_norm": 0.6264143830894844, "learning_rate": 0.003, "loss": 4.2132, "step": 3307 }, { "epoch": 0.03308, "grad_norm": 0.5647145644735359, "learning_rate": 0.003, "loss": 4.1897, "step": 3308 }, { "epoch": 0.03309, "grad_norm": 0.5464103710288922, "learning_rate": 0.003, "loss": 4.1758, "step": 3309 }, { "epoch": 0.0331, "grad_norm": 0.4958683830756014, "learning_rate": 0.003, "loss": 4.1767, "step": 3310 }, { "epoch": 0.03311, "grad_norm": 0.4522879146976332, "learning_rate": 0.003, "loss": 4.1941, "step": 3311 }, { "epoch": 0.03312, "grad_norm": 0.4414886343825775, "learning_rate": 0.003, "loss": 4.1614, "step": 3312 }, { "epoch": 0.03313, "grad_norm": 0.4255185332999733, "learning_rate": 0.003, "loss": 4.1887, "step": 3313 }, { "epoch": 0.03314, "grad_norm": 0.4381607158366914, "learning_rate": 0.003, "loss": 4.1299, "step": 3314 }, { "epoch": 0.03315, "grad_norm": 0.42262937541001916, "learning_rate": 0.003, "loss": 4.1816, "step": 3315 }, { "epoch": 0.03316, "grad_norm": 0.4970426554871505, "learning_rate": 0.003, "loss": 4.1826, "step": 3316 }, { "epoch": 0.03317, "grad_norm": 0.5627776265011807, "learning_rate": 0.003, "loss": 4.1717, "step": 3317 }, { "epoch": 0.03318, "grad_norm": 0.6893835679497516, "learning_rate": 0.003, "loss": 4.1564, "step": 3318 }, { "epoch": 0.03319, "grad_norm": 0.7468347673987352, "learning_rate": 0.003, "loss": 4.1531, "step": 3319 }, { "epoch": 0.0332, "grad_norm": 0.8688918354050132, "learning_rate": 0.003, "loss": 4.1819, "step": 3320 }, { "epoch": 0.03321, "grad_norm": 0.9441155117884158, "learning_rate": 0.003, "loss": 4.2001, "step": 3321 }, { "epoch": 0.03322, "grad_norm": 0.8347403031059345, "learning_rate": 0.003, "loss": 4.1519, "step": 3322 }, { "epoch": 0.03323, "grad_norm": 0.7804052129413228, "learning_rate": 0.003, "loss": 4.1955, "step": 3323 }, { "epoch": 0.03324, "grad_norm": 0.7276937121862084, "learning_rate": 0.003, "loss": 4.1636, "step": 3324 }, { "epoch": 0.03325, "grad_norm": 0.7364714952754682, "learning_rate": 0.003, "loss": 4.1961, "step": 3325 }, { "epoch": 0.03326, "grad_norm": 0.6138475460467973, "learning_rate": 0.003, "loss": 4.1513, "step": 3326 }, { "epoch": 0.03327, "grad_norm": 0.5484242765422025, "learning_rate": 0.003, "loss": 4.1774, "step": 3327 }, { "epoch": 0.03328, "grad_norm": 0.5021334597809965, "learning_rate": 0.003, "loss": 4.1949, "step": 3328 }, { "epoch": 0.03329, "grad_norm": 0.531052392624268, "learning_rate": 0.003, "loss": 4.1523, "step": 3329 }, { "epoch": 0.0333, "grad_norm": 0.5172636277516343, "learning_rate": 0.003, "loss": 4.1562, "step": 3330 }, { "epoch": 0.03331, "grad_norm": 0.5347276948931869, "learning_rate": 0.003, "loss": 4.1547, "step": 3331 }, { "epoch": 0.03332, "grad_norm": 0.5266769298617034, "learning_rate": 0.003, "loss": 4.1514, "step": 3332 }, { "epoch": 0.03333, "grad_norm": 0.5119624308137339, "learning_rate": 0.003, "loss": 4.1366, "step": 3333 }, { "epoch": 0.03334, "grad_norm": 0.5227592678561254, "learning_rate": 0.003, "loss": 4.1672, "step": 3334 }, { "epoch": 0.03335, "grad_norm": 0.5475154089666096, "learning_rate": 0.003, "loss": 4.1732, "step": 3335 }, { "epoch": 0.03336, "grad_norm": 0.6301047632403133, "learning_rate": 0.003, "loss": 4.1763, "step": 3336 }, { "epoch": 0.03337, "grad_norm": 0.8194265063356702, "learning_rate": 0.003, "loss": 4.1621, "step": 3337 }, { "epoch": 0.03338, "grad_norm": 1.1445574176004276, "learning_rate": 0.003, "loss": 4.1827, "step": 3338 }, { "epoch": 0.03339, "grad_norm": 0.8189097928363258, "learning_rate": 0.003, "loss": 4.1787, "step": 3339 }, { "epoch": 0.0334, "grad_norm": 0.660449401420429, "learning_rate": 0.003, "loss": 4.1521, "step": 3340 }, { "epoch": 0.03341, "grad_norm": 0.7754430989891157, "learning_rate": 0.003, "loss": 4.1764, "step": 3341 }, { "epoch": 0.03342, "grad_norm": 0.8792442379704091, "learning_rate": 0.003, "loss": 4.1663, "step": 3342 }, { "epoch": 0.03343, "grad_norm": 0.8167670859904461, "learning_rate": 0.003, "loss": 4.1402, "step": 3343 }, { "epoch": 0.03344, "grad_norm": 0.7480631734050127, "learning_rate": 0.003, "loss": 4.1936, "step": 3344 }, { "epoch": 0.03345, "grad_norm": 0.6496750703892737, "learning_rate": 0.003, "loss": 4.1695, "step": 3345 }, { "epoch": 0.03346, "grad_norm": 0.6081719260800688, "learning_rate": 0.003, "loss": 4.1825, "step": 3346 }, { "epoch": 0.03347, "grad_norm": 0.6721063598583025, "learning_rate": 0.003, "loss": 4.168, "step": 3347 }, { "epoch": 0.03348, "grad_norm": 0.6283159116392718, "learning_rate": 0.003, "loss": 4.1561, "step": 3348 }, { "epoch": 0.03349, "grad_norm": 0.5798454168131916, "learning_rate": 0.003, "loss": 4.2006, "step": 3349 }, { "epoch": 0.0335, "grad_norm": 0.588557434396112, "learning_rate": 0.003, "loss": 4.1558, "step": 3350 }, { "epoch": 0.03351, "grad_norm": 0.572435380329449, "learning_rate": 0.003, "loss": 4.1639, "step": 3351 }, { "epoch": 0.03352, "grad_norm": 0.5946129998027531, "learning_rate": 0.003, "loss": 4.1756, "step": 3352 }, { "epoch": 0.03353, "grad_norm": 0.6650155372083263, "learning_rate": 0.003, "loss": 4.1783, "step": 3353 }, { "epoch": 0.03354, "grad_norm": 0.6819620362483191, "learning_rate": 0.003, "loss": 4.1426, "step": 3354 }, { "epoch": 0.03355, "grad_norm": 0.7239397479894265, "learning_rate": 0.003, "loss": 4.1701, "step": 3355 }, { "epoch": 0.03356, "grad_norm": 0.7696928848130231, "learning_rate": 0.003, "loss": 4.197, "step": 3356 }, { "epoch": 0.03357, "grad_norm": 0.6661695560767795, "learning_rate": 0.003, "loss": 4.1658, "step": 3357 }, { "epoch": 0.03358, "grad_norm": 0.6123082718814552, "learning_rate": 0.003, "loss": 4.1823, "step": 3358 }, { "epoch": 0.03359, "grad_norm": 0.770393140263048, "learning_rate": 0.003, "loss": 4.1771, "step": 3359 }, { "epoch": 0.0336, "grad_norm": 0.8303005904862942, "learning_rate": 0.003, "loss": 4.1911, "step": 3360 }, { "epoch": 0.03361, "grad_norm": 0.9034060736336073, "learning_rate": 0.003, "loss": 4.1953, "step": 3361 }, { "epoch": 0.03362, "grad_norm": 0.8488627855760285, "learning_rate": 0.003, "loss": 4.1936, "step": 3362 }, { "epoch": 0.03363, "grad_norm": 0.8500592898597722, "learning_rate": 0.003, "loss": 4.1482, "step": 3363 }, { "epoch": 0.03364, "grad_norm": 0.8053495290807956, "learning_rate": 0.003, "loss": 4.167, "step": 3364 }, { "epoch": 0.03365, "grad_norm": 0.6918565332600101, "learning_rate": 0.003, "loss": 4.1581, "step": 3365 }, { "epoch": 0.03366, "grad_norm": 0.6631827092506345, "learning_rate": 0.003, "loss": 4.1325, "step": 3366 }, { "epoch": 0.03367, "grad_norm": 0.5898333237323294, "learning_rate": 0.003, "loss": 4.1467, "step": 3367 }, { "epoch": 0.03368, "grad_norm": 0.5105692997315376, "learning_rate": 0.003, "loss": 4.1944, "step": 3368 }, { "epoch": 0.03369, "grad_norm": 0.4833179204197231, "learning_rate": 0.003, "loss": 4.1504, "step": 3369 }, { "epoch": 0.0337, "grad_norm": 0.4434936266579207, "learning_rate": 0.003, "loss": 4.1827, "step": 3370 }, { "epoch": 0.03371, "grad_norm": 0.43752326668000696, "learning_rate": 0.003, "loss": 4.1182, "step": 3371 }, { "epoch": 0.03372, "grad_norm": 0.5212167164579171, "learning_rate": 0.003, "loss": 4.1472, "step": 3372 }, { "epoch": 0.03373, "grad_norm": 0.5782660861068077, "learning_rate": 0.003, "loss": 4.1812, "step": 3373 }, { "epoch": 0.03374, "grad_norm": 0.6248054334937597, "learning_rate": 0.003, "loss": 4.1659, "step": 3374 }, { "epoch": 0.03375, "grad_norm": 0.5656089592222998, "learning_rate": 0.003, "loss": 4.1486, "step": 3375 }, { "epoch": 0.03376, "grad_norm": 0.533250430480493, "learning_rate": 0.003, "loss": 4.1263, "step": 3376 }, { "epoch": 0.03377, "grad_norm": 0.6893576110724466, "learning_rate": 0.003, "loss": 4.1726, "step": 3377 }, { "epoch": 0.03378, "grad_norm": 0.8724259510840375, "learning_rate": 0.003, "loss": 4.1825, "step": 3378 }, { "epoch": 0.03379, "grad_norm": 1.0969642859125113, "learning_rate": 0.003, "loss": 4.1661, "step": 3379 }, { "epoch": 0.0338, "grad_norm": 0.9222105012112505, "learning_rate": 0.003, "loss": 4.1929, "step": 3380 }, { "epoch": 0.03381, "grad_norm": 0.6636117483340874, "learning_rate": 0.003, "loss": 4.1858, "step": 3381 }, { "epoch": 0.03382, "grad_norm": 0.6448818843218406, "learning_rate": 0.003, "loss": 4.1756, "step": 3382 }, { "epoch": 0.03383, "grad_norm": 0.7558871316698539, "learning_rate": 0.003, "loss": 4.1454, "step": 3383 }, { "epoch": 0.03384, "grad_norm": 0.784895312607238, "learning_rate": 0.003, "loss": 4.17, "step": 3384 }, { "epoch": 0.03385, "grad_norm": 0.7068746569705847, "learning_rate": 0.003, "loss": 4.1685, "step": 3385 }, { "epoch": 0.03386, "grad_norm": 0.5763966935533144, "learning_rate": 0.003, "loss": 4.1617, "step": 3386 }, { "epoch": 0.03387, "grad_norm": 0.5637260656795919, "learning_rate": 0.003, "loss": 4.1642, "step": 3387 }, { "epoch": 0.03388, "grad_norm": 0.5764694412310111, "learning_rate": 0.003, "loss": 4.1757, "step": 3388 }, { "epoch": 0.03389, "grad_norm": 0.5063857748030218, "learning_rate": 0.003, "loss": 4.1917, "step": 3389 }, { "epoch": 0.0339, "grad_norm": 0.48268849043201856, "learning_rate": 0.003, "loss": 4.1773, "step": 3390 }, { "epoch": 0.03391, "grad_norm": 0.46369864578553305, "learning_rate": 0.003, "loss": 4.1547, "step": 3391 }, { "epoch": 0.03392, "grad_norm": 0.44960224345626854, "learning_rate": 0.003, "loss": 4.1611, "step": 3392 }, { "epoch": 0.03393, "grad_norm": 0.4909883361811901, "learning_rate": 0.003, "loss": 4.1707, "step": 3393 }, { "epoch": 0.03394, "grad_norm": 0.6259272860691563, "learning_rate": 0.003, "loss": 4.1742, "step": 3394 }, { "epoch": 0.03395, "grad_norm": 0.7506927184483131, "learning_rate": 0.003, "loss": 4.1835, "step": 3395 }, { "epoch": 0.03396, "grad_norm": 0.9370743209078207, "learning_rate": 0.003, "loss": 4.1713, "step": 3396 }, { "epoch": 0.03397, "grad_norm": 1.1783116967146359, "learning_rate": 0.003, "loss": 4.1545, "step": 3397 }, { "epoch": 0.03398, "grad_norm": 0.8089632724306486, "learning_rate": 0.003, "loss": 4.1941, "step": 3398 }, { "epoch": 0.03399, "grad_norm": 0.7596909526208954, "learning_rate": 0.003, "loss": 4.2163, "step": 3399 }, { "epoch": 0.034, "grad_norm": 0.8562517296822207, "learning_rate": 0.003, "loss": 4.2009, "step": 3400 }, { "epoch": 0.03401, "grad_norm": 1.0068688368658953, "learning_rate": 0.003, "loss": 4.188, "step": 3401 }, { "epoch": 0.03402, "grad_norm": 0.99343979847827, "learning_rate": 0.003, "loss": 4.2136, "step": 3402 }, { "epoch": 0.03403, "grad_norm": 1.1467610890884863, "learning_rate": 0.003, "loss": 4.1901, "step": 3403 }, { "epoch": 0.03404, "grad_norm": 1.0385468087744694, "learning_rate": 0.003, "loss": 4.2248, "step": 3404 }, { "epoch": 0.03405, "grad_norm": 0.9754588896396058, "learning_rate": 0.003, "loss": 4.2094, "step": 3405 }, { "epoch": 0.03406, "grad_norm": 1.0318386962627828, "learning_rate": 0.003, "loss": 4.193, "step": 3406 }, { "epoch": 0.03407, "grad_norm": 1.0407604591460764, "learning_rate": 0.003, "loss": 4.2114, "step": 3407 }, { "epoch": 0.03408, "grad_norm": 0.9991368525042554, "learning_rate": 0.003, "loss": 4.1916, "step": 3408 }, { "epoch": 0.03409, "grad_norm": 0.9201374434882152, "learning_rate": 0.003, "loss": 4.2017, "step": 3409 }, { "epoch": 0.0341, "grad_norm": 0.7897132061669699, "learning_rate": 0.003, "loss": 4.1919, "step": 3410 }, { "epoch": 0.03411, "grad_norm": 0.8111203199333381, "learning_rate": 0.003, "loss": 4.2024, "step": 3411 }, { "epoch": 0.03412, "grad_norm": 0.7752208914216027, "learning_rate": 0.003, "loss": 4.219, "step": 3412 }, { "epoch": 0.03413, "grad_norm": 0.8018027400540946, "learning_rate": 0.003, "loss": 4.2281, "step": 3413 }, { "epoch": 0.03414, "grad_norm": 0.7248771456875713, "learning_rate": 0.003, "loss": 4.2139, "step": 3414 }, { "epoch": 0.03415, "grad_norm": 0.6901233244425752, "learning_rate": 0.003, "loss": 4.1927, "step": 3415 }, { "epoch": 0.03416, "grad_norm": 0.6847318566109114, "learning_rate": 0.003, "loss": 4.1943, "step": 3416 }, { "epoch": 0.03417, "grad_norm": 0.6799194082415111, "learning_rate": 0.003, "loss": 4.1883, "step": 3417 }, { "epoch": 0.03418, "grad_norm": 0.6775692861547594, "learning_rate": 0.003, "loss": 4.1576, "step": 3418 }, { "epoch": 0.03419, "grad_norm": 0.6946056576603092, "learning_rate": 0.003, "loss": 4.1802, "step": 3419 }, { "epoch": 0.0342, "grad_norm": 0.7015136907830581, "learning_rate": 0.003, "loss": 4.1882, "step": 3420 }, { "epoch": 0.03421, "grad_norm": 0.8322618875343657, "learning_rate": 0.003, "loss": 4.184, "step": 3421 }, { "epoch": 0.03422, "grad_norm": 0.877410615683893, "learning_rate": 0.003, "loss": 4.2019, "step": 3422 }, { "epoch": 0.03423, "grad_norm": 0.7815901156339347, "learning_rate": 0.003, "loss": 4.18, "step": 3423 }, { "epoch": 0.03424, "grad_norm": 0.7461859372495792, "learning_rate": 0.003, "loss": 4.2038, "step": 3424 }, { "epoch": 0.03425, "grad_norm": 0.6612154502175626, "learning_rate": 0.003, "loss": 4.1654, "step": 3425 }, { "epoch": 0.03426, "grad_norm": 0.5350775915801592, "learning_rate": 0.003, "loss": 4.182, "step": 3426 }, { "epoch": 0.03427, "grad_norm": 0.5758126205992435, "learning_rate": 0.003, "loss": 4.1525, "step": 3427 }, { "epoch": 0.03428, "grad_norm": 0.57747553400605, "learning_rate": 0.003, "loss": 4.151, "step": 3428 }, { "epoch": 0.03429, "grad_norm": 0.6495727211029925, "learning_rate": 0.003, "loss": 4.1682, "step": 3429 }, { "epoch": 0.0343, "grad_norm": 0.6970333948744754, "learning_rate": 0.003, "loss": 4.1638, "step": 3430 }, { "epoch": 0.03431, "grad_norm": 0.7157526291407539, "learning_rate": 0.003, "loss": 4.1584, "step": 3431 }, { "epoch": 0.03432, "grad_norm": 0.6010910249067187, "learning_rate": 0.003, "loss": 4.163, "step": 3432 }, { "epoch": 0.03433, "grad_norm": 0.42007320422820055, "learning_rate": 0.003, "loss": 4.13, "step": 3433 }, { "epoch": 0.03434, "grad_norm": 0.47622504855038245, "learning_rate": 0.003, "loss": 4.153, "step": 3434 }, { "epoch": 0.03435, "grad_norm": 0.5574984023909559, "learning_rate": 0.003, "loss": 4.1379, "step": 3435 }, { "epoch": 0.03436, "grad_norm": 0.6117380821653216, "learning_rate": 0.003, "loss": 4.1654, "step": 3436 }, { "epoch": 0.03437, "grad_norm": 0.5586731673929375, "learning_rate": 0.003, "loss": 4.1549, "step": 3437 }, { "epoch": 0.03438, "grad_norm": 0.41091865924021914, "learning_rate": 0.003, "loss": 4.1663, "step": 3438 }, { "epoch": 0.03439, "grad_norm": 0.3844506479299855, "learning_rate": 0.003, "loss": 4.1632, "step": 3439 }, { "epoch": 0.0344, "grad_norm": 0.47026248688155026, "learning_rate": 0.003, "loss": 4.1395, "step": 3440 }, { "epoch": 0.03441, "grad_norm": 0.49960141515204526, "learning_rate": 0.003, "loss": 4.1602, "step": 3441 }, { "epoch": 0.03442, "grad_norm": 0.48160504106231705, "learning_rate": 0.003, "loss": 4.1263, "step": 3442 }, { "epoch": 0.03443, "grad_norm": 0.4503151278381613, "learning_rate": 0.003, "loss": 4.1555, "step": 3443 }, { "epoch": 0.03444, "grad_norm": 0.4618551136735495, "learning_rate": 0.003, "loss": 4.1663, "step": 3444 }, { "epoch": 0.03445, "grad_norm": 0.5002612465674857, "learning_rate": 0.003, "loss": 4.1323, "step": 3445 }, { "epoch": 0.03446, "grad_norm": 0.49371947051027193, "learning_rate": 0.003, "loss": 4.1213, "step": 3446 }, { "epoch": 0.03447, "grad_norm": 0.48167624788075264, "learning_rate": 0.003, "loss": 4.1697, "step": 3447 }, { "epoch": 0.03448, "grad_norm": 0.48645338009833317, "learning_rate": 0.003, "loss": 4.1398, "step": 3448 }, { "epoch": 0.03449, "grad_norm": 0.42185936569719606, "learning_rate": 0.003, "loss": 4.1255, "step": 3449 }, { "epoch": 0.0345, "grad_norm": 0.39079332947086687, "learning_rate": 0.003, "loss": 4.1609, "step": 3450 }, { "epoch": 0.03451, "grad_norm": 0.42129819422128534, "learning_rate": 0.003, "loss": 4.1342, "step": 3451 }, { "epoch": 0.03452, "grad_norm": 0.5355871931302184, "learning_rate": 0.003, "loss": 4.1655, "step": 3452 }, { "epoch": 0.03453, "grad_norm": 0.7320917006665237, "learning_rate": 0.003, "loss": 4.1474, "step": 3453 }, { "epoch": 0.03454, "grad_norm": 1.1401386064666243, "learning_rate": 0.003, "loss": 4.1442, "step": 3454 }, { "epoch": 0.03455, "grad_norm": 1.05569726012347, "learning_rate": 0.003, "loss": 4.1531, "step": 3455 }, { "epoch": 0.03456, "grad_norm": 0.7753833366744568, "learning_rate": 0.003, "loss": 4.163, "step": 3456 }, { "epoch": 0.03457, "grad_norm": 0.8398952536178134, "learning_rate": 0.003, "loss": 4.1307, "step": 3457 }, { "epoch": 0.03458, "grad_norm": 0.9216186608796643, "learning_rate": 0.003, "loss": 4.1748, "step": 3458 }, { "epoch": 0.03459, "grad_norm": 0.8741651793984747, "learning_rate": 0.003, "loss": 4.1982, "step": 3459 }, { "epoch": 0.0346, "grad_norm": 0.7072954882447885, "learning_rate": 0.003, "loss": 4.1995, "step": 3460 }, { "epoch": 0.03461, "grad_norm": 0.6974866321122205, "learning_rate": 0.003, "loss": 4.1749, "step": 3461 }, { "epoch": 0.03462, "grad_norm": 0.6828331664556785, "learning_rate": 0.003, "loss": 4.1676, "step": 3462 }, { "epoch": 0.03463, "grad_norm": 0.8097332341464493, "learning_rate": 0.003, "loss": 4.1557, "step": 3463 }, { "epoch": 0.03464, "grad_norm": 0.8057895102942476, "learning_rate": 0.003, "loss": 4.1731, "step": 3464 }, { "epoch": 0.03465, "grad_norm": 0.754598199709423, "learning_rate": 0.003, "loss": 4.1797, "step": 3465 }, { "epoch": 0.03466, "grad_norm": 0.6771478949940347, "learning_rate": 0.003, "loss": 4.1778, "step": 3466 }, { "epoch": 0.03467, "grad_norm": 0.6186375156162432, "learning_rate": 0.003, "loss": 4.1574, "step": 3467 }, { "epoch": 0.03468, "grad_norm": 0.6769160235929496, "learning_rate": 0.003, "loss": 4.1742, "step": 3468 }, { "epoch": 0.03469, "grad_norm": 0.6809204256661774, "learning_rate": 0.003, "loss": 4.1707, "step": 3469 }, { "epoch": 0.0347, "grad_norm": 0.7756647140071914, "learning_rate": 0.003, "loss": 4.186, "step": 3470 }, { "epoch": 0.03471, "grad_norm": 0.8803633516066148, "learning_rate": 0.003, "loss": 4.1775, "step": 3471 }, { "epoch": 0.03472, "grad_norm": 0.8377841449731822, "learning_rate": 0.003, "loss": 4.177, "step": 3472 }, { "epoch": 0.03473, "grad_norm": 0.8583404258765982, "learning_rate": 0.003, "loss": 4.1747, "step": 3473 }, { "epoch": 0.03474, "grad_norm": 0.821362643848504, "learning_rate": 0.003, "loss": 4.2036, "step": 3474 }, { "epoch": 0.03475, "grad_norm": 0.7366543444137226, "learning_rate": 0.003, "loss": 4.171, "step": 3475 }, { "epoch": 0.03476, "grad_norm": 0.8990414035833436, "learning_rate": 0.003, "loss": 4.1854, "step": 3476 }, { "epoch": 0.03477, "grad_norm": 0.9251586453057641, "learning_rate": 0.003, "loss": 4.1836, "step": 3477 }, { "epoch": 0.03478, "grad_norm": 0.9081762714287535, "learning_rate": 0.003, "loss": 4.1933, "step": 3478 }, { "epoch": 0.03479, "grad_norm": 0.95014653125921, "learning_rate": 0.003, "loss": 4.1698, "step": 3479 }, { "epoch": 0.0348, "grad_norm": 0.8211598773714466, "learning_rate": 0.003, "loss": 4.1704, "step": 3480 }, { "epoch": 0.03481, "grad_norm": 0.6714433574061417, "learning_rate": 0.003, "loss": 4.182, "step": 3481 }, { "epoch": 0.03482, "grad_norm": 0.6624685031516976, "learning_rate": 0.003, "loss": 4.1874, "step": 3482 }, { "epoch": 0.03483, "grad_norm": 0.6364069563539299, "learning_rate": 0.003, "loss": 4.163, "step": 3483 }, { "epoch": 0.03484, "grad_norm": 0.7252638284821149, "learning_rate": 0.003, "loss": 4.2109, "step": 3484 }, { "epoch": 0.03485, "grad_norm": 0.6599203755955035, "learning_rate": 0.003, "loss": 4.2011, "step": 3485 }, { "epoch": 0.03486, "grad_norm": 0.6351455036938864, "learning_rate": 0.003, "loss": 4.1709, "step": 3486 }, { "epoch": 0.03487, "grad_norm": 0.7047689274973644, "learning_rate": 0.003, "loss": 4.1641, "step": 3487 }, { "epoch": 0.03488, "grad_norm": 0.5998971286625794, "learning_rate": 0.003, "loss": 4.1568, "step": 3488 }, { "epoch": 0.03489, "grad_norm": 0.5928637335909946, "learning_rate": 0.003, "loss": 4.1727, "step": 3489 }, { "epoch": 0.0349, "grad_norm": 0.7224065582654237, "learning_rate": 0.003, "loss": 4.1727, "step": 3490 }, { "epoch": 0.03491, "grad_norm": 0.8948866147367468, "learning_rate": 0.003, "loss": 4.1927, "step": 3491 }, { "epoch": 0.03492, "grad_norm": 1.0698897809029222, "learning_rate": 0.003, "loss": 4.1887, "step": 3492 }, { "epoch": 0.03493, "grad_norm": 0.879044396872005, "learning_rate": 0.003, "loss": 4.1775, "step": 3493 }, { "epoch": 0.03494, "grad_norm": 0.7308396541826835, "learning_rate": 0.003, "loss": 4.1738, "step": 3494 }, { "epoch": 0.03495, "grad_norm": 0.7338448975678681, "learning_rate": 0.003, "loss": 4.1823, "step": 3495 }, { "epoch": 0.03496, "grad_norm": 0.8206218724499419, "learning_rate": 0.003, "loss": 4.1564, "step": 3496 }, { "epoch": 0.03497, "grad_norm": 0.8420218799600944, "learning_rate": 0.003, "loss": 4.1943, "step": 3497 }, { "epoch": 0.03498, "grad_norm": 0.7276352170340858, "learning_rate": 0.003, "loss": 4.1624, "step": 3498 }, { "epoch": 0.03499, "grad_norm": 0.6164613363129237, "learning_rate": 0.003, "loss": 4.1637, "step": 3499 }, { "epoch": 0.035, "grad_norm": 0.5533625720370539, "learning_rate": 0.003, "loss": 4.1821, "step": 3500 }, { "epoch": 0.03501, "grad_norm": 0.5440318172984431, "learning_rate": 0.003, "loss": 4.1716, "step": 3501 }, { "epoch": 0.03502, "grad_norm": 0.570449376318866, "learning_rate": 0.003, "loss": 4.1707, "step": 3502 }, { "epoch": 0.03503, "grad_norm": 0.5938098529331512, "learning_rate": 0.003, "loss": 4.1865, "step": 3503 }, { "epoch": 0.03504, "grad_norm": 0.5807097638534694, "learning_rate": 0.003, "loss": 4.1574, "step": 3504 }, { "epoch": 0.03505, "grad_norm": 0.5346263597287685, "learning_rate": 0.003, "loss": 4.1718, "step": 3505 }, { "epoch": 0.03506, "grad_norm": 0.5906950615820623, "learning_rate": 0.003, "loss": 4.182, "step": 3506 }, { "epoch": 0.03507, "grad_norm": 0.5854291049171028, "learning_rate": 0.003, "loss": 4.1601, "step": 3507 }, { "epoch": 0.03508, "grad_norm": 0.5919853377246803, "learning_rate": 0.003, "loss": 4.1373, "step": 3508 }, { "epoch": 0.03509, "grad_norm": 0.6611088078675262, "learning_rate": 0.003, "loss": 4.1537, "step": 3509 }, { "epoch": 0.0351, "grad_norm": 0.694428361139572, "learning_rate": 0.003, "loss": 4.1446, "step": 3510 }, { "epoch": 0.03511, "grad_norm": 0.5309620036695369, "learning_rate": 0.003, "loss": 4.1557, "step": 3511 }, { "epoch": 0.03512, "grad_norm": 0.4298187490647193, "learning_rate": 0.003, "loss": 4.1416, "step": 3512 }, { "epoch": 0.03513, "grad_norm": 0.4111172909761725, "learning_rate": 0.003, "loss": 4.1448, "step": 3513 }, { "epoch": 0.03514, "grad_norm": 0.45911964934305066, "learning_rate": 0.003, "loss": 4.1562, "step": 3514 }, { "epoch": 0.03515, "grad_norm": 0.5301625577665102, "learning_rate": 0.003, "loss": 4.1654, "step": 3515 }, { "epoch": 0.03516, "grad_norm": 0.6537735331219108, "learning_rate": 0.003, "loss": 4.1947, "step": 3516 }, { "epoch": 0.03517, "grad_norm": 0.8603631139968849, "learning_rate": 0.003, "loss": 4.153, "step": 3517 }, { "epoch": 0.03518, "grad_norm": 0.9381334187285012, "learning_rate": 0.003, "loss": 4.1471, "step": 3518 }, { "epoch": 0.03519, "grad_norm": 0.7499517998636877, "learning_rate": 0.003, "loss": 4.1858, "step": 3519 }, { "epoch": 0.0352, "grad_norm": 0.6063052992661483, "learning_rate": 0.003, "loss": 4.1619, "step": 3520 }, { "epoch": 0.03521, "grad_norm": 0.6898167453059805, "learning_rate": 0.003, "loss": 4.1613, "step": 3521 }, { "epoch": 0.03522, "grad_norm": 0.7285294145828302, "learning_rate": 0.003, "loss": 4.1629, "step": 3522 }, { "epoch": 0.03523, "grad_norm": 0.786382262009744, "learning_rate": 0.003, "loss": 4.1349, "step": 3523 }, { "epoch": 0.03524, "grad_norm": 0.8485073208145252, "learning_rate": 0.003, "loss": 4.1811, "step": 3524 }, { "epoch": 0.03525, "grad_norm": 0.7884181271432541, "learning_rate": 0.003, "loss": 4.1487, "step": 3525 }, { "epoch": 0.03526, "grad_norm": 0.6895666981950603, "learning_rate": 0.003, "loss": 4.1533, "step": 3526 }, { "epoch": 0.03527, "grad_norm": 0.6609448619741584, "learning_rate": 0.003, "loss": 4.1841, "step": 3527 }, { "epoch": 0.03528, "grad_norm": 0.6552099898208482, "learning_rate": 0.003, "loss": 4.1683, "step": 3528 }, { "epoch": 0.03529, "grad_norm": 0.5657486402844111, "learning_rate": 0.003, "loss": 4.1582, "step": 3529 }, { "epoch": 0.0353, "grad_norm": 0.6271377691303136, "learning_rate": 0.003, "loss": 4.1603, "step": 3530 }, { "epoch": 0.03531, "grad_norm": 0.6215946372415877, "learning_rate": 0.003, "loss": 4.1691, "step": 3531 }, { "epoch": 0.03532, "grad_norm": 0.6387289961881946, "learning_rate": 0.003, "loss": 4.1723, "step": 3532 }, { "epoch": 0.03533, "grad_norm": 0.6872453476711173, "learning_rate": 0.003, "loss": 4.1733, "step": 3533 }, { "epoch": 0.03534, "grad_norm": 0.7783886281484835, "learning_rate": 0.003, "loss": 4.1711, "step": 3534 }, { "epoch": 0.03535, "grad_norm": 0.8725146995031193, "learning_rate": 0.003, "loss": 4.1506, "step": 3535 }, { "epoch": 0.03536, "grad_norm": 0.9246122970504058, "learning_rate": 0.003, "loss": 4.1551, "step": 3536 }, { "epoch": 0.03537, "grad_norm": 1.065477970890386, "learning_rate": 0.003, "loss": 4.1712, "step": 3537 }, { "epoch": 0.03538, "grad_norm": 0.9594169294264835, "learning_rate": 0.003, "loss": 4.1706, "step": 3538 }, { "epoch": 0.03539, "grad_norm": 0.8330662665517189, "learning_rate": 0.003, "loss": 4.1586, "step": 3539 }, { "epoch": 0.0354, "grad_norm": 0.8526080452896662, "learning_rate": 0.003, "loss": 4.2005, "step": 3540 }, { "epoch": 0.03541, "grad_norm": 0.9085329983346553, "learning_rate": 0.003, "loss": 4.1886, "step": 3541 }, { "epoch": 0.03542, "grad_norm": 0.833985994066796, "learning_rate": 0.003, "loss": 4.1702, "step": 3542 }, { "epoch": 0.03543, "grad_norm": 0.7626821688858162, "learning_rate": 0.003, "loss": 4.1763, "step": 3543 }, { "epoch": 0.03544, "grad_norm": 0.806738250706975, "learning_rate": 0.003, "loss": 4.1843, "step": 3544 }, { "epoch": 0.03545, "grad_norm": 0.9242341338381844, "learning_rate": 0.003, "loss": 4.1613, "step": 3545 }, { "epoch": 0.03546, "grad_norm": 0.8293267592237652, "learning_rate": 0.003, "loss": 4.1759, "step": 3546 }, { "epoch": 0.03547, "grad_norm": 0.7605341067555178, "learning_rate": 0.003, "loss": 4.1823, "step": 3547 }, { "epoch": 0.03548, "grad_norm": 0.697933140723856, "learning_rate": 0.003, "loss": 4.1877, "step": 3548 }, { "epoch": 0.03549, "grad_norm": 0.5804050326776297, "learning_rate": 0.003, "loss": 4.1739, "step": 3549 }, { "epoch": 0.0355, "grad_norm": 0.603076289815789, "learning_rate": 0.003, "loss": 4.1292, "step": 3550 }, { "epoch": 0.03551, "grad_norm": 0.5646969274817457, "learning_rate": 0.003, "loss": 4.1847, "step": 3551 }, { "epoch": 0.03552, "grad_norm": 0.5353048682624879, "learning_rate": 0.003, "loss": 4.1445, "step": 3552 }, { "epoch": 0.03553, "grad_norm": 0.43948675430004575, "learning_rate": 0.003, "loss": 4.1551, "step": 3553 }, { "epoch": 0.03554, "grad_norm": 0.4404992722313449, "learning_rate": 0.003, "loss": 4.1584, "step": 3554 }, { "epoch": 0.03555, "grad_norm": 0.4408782434286909, "learning_rate": 0.003, "loss": 4.1708, "step": 3555 }, { "epoch": 0.03556, "grad_norm": 0.4089405885332322, "learning_rate": 0.003, "loss": 4.1496, "step": 3556 }, { "epoch": 0.03557, "grad_norm": 0.4886396097558162, "learning_rate": 0.003, "loss": 4.1516, "step": 3557 }, { "epoch": 0.03558, "grad_norm": 0.5683554684886454, "learning_rate": 0.003, "loss": 4.1794, "step": 3558 }, { "epoch": 0.03559, "grad_norm": 0.7367360745457323, "learning_rate": 0.003, "loss": 4.1684, "step": 3559 }, { "epoch": 0.0356, "grad_norm": 0.8520355513833522, "learning_rate": 0.003, "loss": 4.1751, "step": 3560 }, { "epoch": 0.03561, "grad_norm": 0.8538012995310599, "learning_rate": 0.003, "loss": 4.1983, "step": 3561 }, { "epoch": 0.03562, "grad_norm": 0.6793279389206137, "learning_rate": 0.003, "loss": 4.1602, "step": 3562 }, { "epoch": 0.03563, "grad_norm": 0.5212190292484681, "learning_rate": 0.003, "loss": 4.1876, "step": 3563 }, { "epoch": 0.03564, "grad_norm": 0.6272603340476319, "learning_rate": 0.003, "loss": 4.1651, "step": 3564 }, { "epoch": 0.03565, "grad_norm": 0.7385189206717337, "learning_rate": 0.003, "loss": 4.1607, "step": 3565 }, { "epoch": 0.03566, "grad_norm": 0.6717765632340529, "learning_rate": 0.003, "loss": 4.1622, "step": 3566 }, { "epoch": 0.03567, "grad_norm": 0.6137260235822769, "learning_rate": 0.003, "loss": 4.154, "step": 3567 }, { "epoch": 0.03568, "grad_norm": 0.5473859937693163, "learning_rate": 0.003, "loss": 4.1497, "step": 3568 }, { "epoch": 0.03569, "grad_norm": 0.5260257267109332, "learning_rate": 0.003, "loss": 4.1459, "step": 3569 }, { "epoch": 0.0357, "grad_norm": 0.5464547294802432, "learning_rate": 0.003, "loss": 4.1462, "step": 3570 }, { "epoch": 0.03571, "grad_norm": 0.5170616946027875, "learning_rate": 0.003, "loss": 4.1547, "step": 3571 }, { "epoch": 0.03572, "grad_norm": 0.5245302678289541, "learning_rate": 0.003, "loss": 4.1721, "step": 3572 }, { "epoch": 0.03573, "grad_norm": 0.5539674018003774, "learning_rate": 0.003, "loss": 4.1421, "step": 3573 }, { "epoch": 0.03574, "grad_norm": 0.5717457987893073, "learning_rate": 0.003, "loss": 4.1694, "step": 3574 }, { "epoch": 0.03575, "grad_norm": 0.6597241159959504, "learning_rate": 0.003, "loss": 4.1498, "step": 3575 }, { "epoch": 0.03576, "grad_norm": 0.7987769449923495, "learning_rate": 0.003, "loss": 4.1377, "step": 3576 }, { "epoch": 0.03577, "grad_norm": 0.9166920267526417, "learning_rate": 0.003, "loss": 4.1801, "step": 3577 }, { "epoch": 0.03578, "grad_norm": 0.8840919078143189, "learning_rate": 0.003, "loss": 4.1728, "step": 3578 }, { "epoch": 0.03579, "grad_norm": 0.8228224911003803, "learning_rate": 0.003, "loss": 4.1888, "step": 3579 }, { "epoch": 0.0358, "grad_norm": 0.9419353853796917, "learning_rate": 0.003, "loss": 4.1488, "step": 3580 }, { "epoch": 0.03581, "grad_norm": 0.8980601226690328, "learning_rate": 0.003, "loss": 4.1607, "step": 3581 }, { "epoch": 0.03582, "grad_norm": 0.8072733688617605, "learning_rate": 0.003, "loss": 4.1621, "step": 3582 }, { "epoch": 0.03583, "grad_norm": 0.8230842831411875, "learning_rate": 0.003, "loss": 4.1635, "step": 3583 }, { "epoch": 0.03584, "grad_norm": 0.6285337030399905, "learning_rate": 0.003, "loss": 4.1474, "step": 3584 }, { "epoch": 0.03585, "grad_norm": 0.6220326404370078, "learning_rate": 0.003, "loss": 4.1449, "step": 3585 }, { "epoch": 0.03586, "grad_norm": 0.6861320623389772, "learning_rate": 0.003, "loss": 4.1677, "step": 3586 }, { "epoch": 0.03587, "grad_norm": 0.7015780277654053, "learning_rate": 0.003, "loss": 4.1689, "step": 3587 }, { "epoch": 0.03588, "grad_norm": 0.6838138540479132, "learning_rate": 0.003, "loss": 4.1653, "step": 3588 }, { "epoch": 0.03589, "grad_norm": 0.6735354715174068, "learning_rate": 0.003, "loss": 4.1806, "step": 3589 }, { "epoch": 0.0359, "grad_norm": 0.6330341634771458, "learning_rate": 0.003, "loss": 4.1448, "step": 3590 }, { "epoch": 0.03591, "grad_norm": 0.5910317698548738, "learning_rate": 0.003, "loss": 4.1771, "step": 3591 }, { "epoch": 0.03592, "grad_norm": 0.5618523931442762, "learning_rate": 0.003, "loss": 4.1551, "step": 3592 }, { "epoch": 0.03593, "grad_norm": 0.8151654550358379, "learning_rate": 0.003, "loss": 4.1589, "step": 3593 }, { "epoch": 0.03594, "grad_norm": 1.0948812647746626, "learning_rate": 0.003, "loss": 4.1768, "step": 3594 }, { "epoch": 0.03595, "grad_norm": 0.8469599816388995, "learning_rate": 0.003, "loss": 4.1615, "step": 3595 }, { "epoch": 0.03596, "grad_norm": 0.5860459737545368, "learning_rate": 0.003, "loss": 4.1904, "step": 3596 }, { "epoch": 0.03597, "grad_norm": 0.6216520906256218, "learning_rate": 0.003, "loss": 4.1663, "step": 3597 }, { "epoch": 0.03598, "grad_norm": 0.716220967428177, "learning_rate": 0.003, "loss": 4.1706, "step": 3598 }, { "epoch": 0.03599, "grad_norm": 0.7599051671476444, "learning_rate": 0.003, "loss": 4.1682, "step": 3599 }, { "epoch": 0.036, "grad_norm": 0.6520846019848653, "learning_rate": 0.003, "loss": 4.1898, "step": 3600 }, { "epoch": 0.03601, "grad_norm": 0.5364426998203575, "learning_rate": 0.003, "loss": 4.1587, "step": 3601 }, { "epoch": 0.03602, "grad_norm": 0.5192638145820389, "learning_rate": 0.003, "loss": 4.1402, "step": 3602 }, { "epoch": 0.03603, "grad_norm": 0.48444783985893647, "learning_rate": 0.003, "loss": 4.131, "step": 3603 }, { "epoch": 0.03604, "grad_norm": 0.44019612697059707, "learning_rate": 0.003, "loss": 4.1673, "step": 3604 }, { "epoch": 0.03605, "grad_norm": 0.46867930201266456, "learning_rate": 0.003, "loss": 4.1559, "step": 3605 }, { "epoch": 0.03606, "grad_norm": 0.4990137003463985, "learning_rate": 0.003, "loss": 4.1437, "step": 3606 }, { "epoch": 0.03607, "grad_norm": 0.6032395506240065, "learning_rate": 0.003, "loss": 4.1394, "step": 3607 }, { "epoch": 0.03608, "grad_norm": 0.6248433459385002, "learning_rate": 0.003, "loss": 4.1451, "step": 3608 }, { "epoch": 0.03609, "grad_norm": 0.6134188004656983, "learning_rate": 0.003, "loss": 4.1716, "step": 3609 }, { "epoch": 0.0361, "grad_norm": 0.6290690472912259, "learning_rate": 0.003, "loss": 4.1749, "step": 3610 }, { "epoch": 0.03611, "grad_norm": 0.72557440929179, "learning_rate": 0.003, "loss": 4.1283, "step": 3611 }, { "epoch": 0.03612, "grad_norm": 0.8893798610273862, "learning_rate": 0.003, "loss": 4.1323, "step": 3612 }, { "epoch": 0.03613, "grad_norm": 0.9668223551113271, "learning_rate": 0.003, "loss": 4.1826, "step": 3613 }, { "epoch": 0.03614, "grad_norm": 0.734407911853949, "learning_rate": 0.003, "loss": 4.1531, "step": 3614 }, { "epoch": 0.03615, "grad_norm": 0.6729100967664783, "learning_rate": 0.003, "loss": 4.1648, "step": 3615 }, { "epoch": 0.03616, "grad_norm": 0.8657549358453647, "learning_rate": 0.003, "loss": 4.177, "step": 3616 }, { "epoch": 0.03617, "grad_norm": 0.9260684291150802, "learning_rate": 0.003, "loss": 4.1798, "step": 3617 }, { "epoch": 0.03618, "grad_norm": 0.8570047461014029, "learning_rate": 0.003, "loss": 4.17, "step": 3618 }, { "epoch": 0.03619, "grad_norm": 0.866479505486592, "learning_rate": 0.003, "loss": 4.175, "step": 3619 }, { "epoch": 0.0362, "grad_norm": 0.8958378530573641, "learning_rate": 0.003, "loss": 4.1828, "step": 3620 }, { "epoch": 0.03621, "grad_norm": 0.8754496119000232, "learning_rate": 0.003, "loss": 4.1836, "step": 3621 }, { "epoch": 0.03622, "grad_norm": 0.7829921953128642, "learning_rate": 0.003, "loss": 4.1499, "step": 3622 }, { "epoch": 0.03623, "grad_norm": 0.7264263724470046, "learning_rate": 0.003, "loss": 4.1427, "step": 3623 }, { "epoch": 0.03624, "grad_norm": 0.7103073858138196, "learning_rate": 0.003, "loss": 4.1467, "step": 3624 }, { "epoch": 0.03625, "grad_norm": 0.7362483226450557, "learning_rate": 0.003, "loss": 4.1852, "step": 3625 }, { "epoch": 0.03626, "grad_norm": 0.6700204258570956, "learning_rate": 0.003, "loss": 4.1584, "step": 3626 }, { "epoch": 0.03627, "grad_norm": 0.8160965768748923, "learning_rate": 0.003, "loss": 4.1658, "step": 3627 }, { "epoch": 0.03628, "grad_norm": 0.7835131802445204, "learning_rate": 0.003, "loss": 4.1739, "step": 3628 }, { "epoch": 0.03629, "grad_norm": 0.6985149126811981, "learning_rate": 0.003, "loss": 4.1895, "step": 3629 }, { "epoch": 0.0363, "grad_norm": 0.6039041743694569, "learning_rate": 0.003, "loss": 4.1703, "step": 3630 }, { "epoch": 0.03631, "grad_norm": 0.5592412769390824, "learning_rate": 0.003, "loss": 4.165, "step": 3631 }, { "epoch": 0.03632, "grad_norm": 0.49011079854965545, "learning_rate": 0.003, "loss": 4.1538, "step": 3632 }, { "epoch": 0.03633, "grad_norm": 0.5382307200734877, "learning_rate": 0.003, "loss": 4.1853, "step": 3633 }, { "epoch": 0.03634, "grad_norm": 0.5367696632611142, "learning_rate": 0.003, "loss": 4.1594, "step": 3634 }, { "epoch": 0.03635, "grad_norm": 0.5142486579138349, "learning_rate": 0.003, "loss": 4.1415, "step": 3635 }, { "epoch": 0.03636, "grad_norm": 0.5652066346276485, "learning_rate": 0.003, "loss": 4.1613, "step": 3636 }, { "epoch": 0.03637, "grad_norm": 0.59668194695564, "learning_rate": 0.003, "loss": 4.1502, "step": 3637 }, { "epoch": 0.03638, "grad_norm": 0.6925042816123054, "learning_rate": 0.003, "loss": 4.1624, "step": 3638 }, { "epoch": 0.03639, "grad_norm": 0.8037659165365143, "learning_rate": 0.003, "loss": 4.1869, "step": 3639 }, { "epoch": 0.0364, "grad_norm": 0.8148685837655183, "learning_rate": 0.003, "loss": 4.1832, "step": 3640 }, { "epoch": 0.03641, "grad_norm": 0.7919945416520671, "learning_rate": 0.003, "loss": 4.1676, "step": 3641 }, { "epoch": 0.03642, "grad_norm": 0.8387878217923936, "learning_rate": 0.003, "loss": 4.1883, "step": 3642 }, { "epoch": 0.03643, "grad_norm": 0.7524748876313961, "learning_rate": 0.003, "loss": 4.1494, "step": 3643 }, { "epoch": 0.03644, "grad_norm": 0.7273454421844323, "learning_rate": 0.003, "loss": 4.1748, "step": 3644 }, { "epoch": 0.03645, "grad_norm": 0.7955485790365344, "learning_rate": 0.003, "loss": 4.1475, "step": 3645 }, { "epoch": 0.03646, "grad_norm": 0.7720894887238786, "learning_rate": 0.003, "loss": 4.1682, "step": 3646 }, { "epoch": 0.03647, "grad_norm": 0.7086978218067327, "learning_rate": 0.003, "loss": 4.1616, "step": 3647 }, { "epoch": 0.03648, "grad_norm": 0.6770260559623669, "learning_rate": 0.003, "loss": 4.1506, "step": 3648 }, { "epoch": 0.03649, "grad_norm": 0.5881027107624417, "learning_rate": 0.003, "loss": 4.1738, "step": 3649 }, { "epoch": 0.0365, "grad_norm": 0.6131807187241587, "learning_rate": 0.003, "loss": 4.1529, "step": 3650 }, { "epoch": 0.03651, "grad_norm": 0.594204825708316, "learning_rate": 0.003, "loss": 4.1729, "step": 3651 }, { "epoch": 0.03652, "grad_norm": 0.6399255321695406, "learning_rate": 0.003, "loss": 4.1571, "step": 3652 }, { "epoch": 0.03653, "grad_norm": 0.6119522794582246, "learning_rate": 0.003, "loss": 4.1419, "step": 3653 }, { "epoch": 0.03654, "grad_norm": 0.5566016631522781, "learning_rate": 0.003, "loss": 4.1386, "step": 3654 }, { "epoch": 0.03655, "grad_norm": 0.5193773149688491, "learning_rate": 0.003, "loss": 4.1618, "step": 3655 }, { "epoch": 0.03656, "grad_norm": 0.6264013089429903, "learning_rate": 0.003, "loss": 4.1523, "step": 3656 }, { "epoch": 0.03657, "grad_norm": 0.6979744938653328, "learning_rate": 0.003, "loss": 4.1269, "step": 3657 }, { "epoch": 0.03658, "grad_norm": 0.8558728547971467, "learning_rate": 0.003, "loss": 4.1964, "step": 3658 }, { "epoch": 0.03659, "grad_norm": 0.8839768938776124, "learning_rate": 0.003, "loss": 4.1477, "step": 3659 }, { "epoch": 0.0366, "grad_norm": 0.738686352865498, "learning_rate": 0.003, "loss": 4.1749, "step": 3660 }, { "epoch": 0.03661, "grad_norm": 0.7093891716169941, "learning_rate": 0.003, "loss": 4.1523, "step": 3661 }, { "epoch": 0.03662, "grad_norm": 0.7587081351482545, "learning_rate": 0.003, "loss": 4.1525, "step": 3662 }, { "epoch": 0.03663, "grad_norm": 0.7076929569924106, "learning_rate": 0.003, "loss": 4.169, "step": 3663 }, { "epoch": 0.03664, "grad_norm": 0.7127846105810646, "learning_rate": 0.003, "loss": 4.1482, "step": 3664 }, { "epoch": 0.03665, "grad_norm": 0.7078459190405836, "learning_rate": 0.003, "loss": 4.1741, "step": 3665 }, { "epoch": 0.03666, "grad_norm": 0.6805684369907511, "learning_rate": 0.003, "loss": 4.1297, "step": 3666 }, { "epoch": 0.03667, "grad_norm": 0.7150672278612442, "learning_rate": 0.003, "loss": 4.1398, "step": 3667 }, { "epoch": 0.03668, "grad_norm": 0.7699785342164407, "learning_rate": 0.003, "loss": 4.159, "step": 3668 }, { "epoch": 0.03669, "grad_norm": 0.7952877417083865, "learning_rate": 0.003, "loss": 4.1361, "step": 3669 }, { "epoch": 0.0367, "grad_norm": 0.7569891220037553, "learning_rate": 0.003, "loss": 4.1731, "step": 3670 }, { "epoch": 0.03671, "grad_norm": 0.7098731435624128, "learning_rate": 0.003, "loss": 4.1585, "step": 3671 }, { "epoch": 0.03672, "grad_norm": 0.6352485396363875, "learning_rate": 0.003, "loss": 4.1619, "step": 3672 }, { "epoch": 0.03673, "grad_norm": 0.6885305441098153, "learning_rate": 0.003, "loss": 4.172, "step": 3673 }, { "epoch": 0.03674, "grad_norm": 0.7059131456483599, "learning_rate": 0.003, "loss": 4.1377, "step": 3674 }, { "epoch": 0.03675, "grad_norm": 0.6038823828670427, "learning_rate": 0.003, "loss": 4.1744, "step": 3675 }, { "epoch": 0.03676, "grad_norm": 0.57222994957781, "learning_rate": 0.003, "loss": 4.1319, "step": 3676 }, { "epoch": 0.03677, "grad_norm": 0.6802347986469087, "learning_rate": 0.003, "loss": 4.1492, "step": 3677 }, { "epoch": 0.03678, "grad_norm": 0.7740620961238671, "learning_rate": 0.003, "loss": 4.1812, "step": 3678 }, { "epoch": 0.03679, "grad_norm": 0.8635840591832115, "learning_rate": 0.003, "loss": 4.1775, "step": 3679 }, { "epoch": 0.0368, "grad_norm": 0.9019802875931285, "learning_rate": 0.003, "loss": 4.173, "step": 3680 }, { "epoch": 0.03681, "grad_norm": 0.9372442716730703, "learning_rate": 0.003, "loss": 4.1511, "step": 3681 }, { "epoch": 0.03682, "grad_norm": 0.9892266156518691, "learning_rate": 0.003, "loss": 4.1916, "step": 3682 }, { "epoch": 0.03683, "grad_norm": 1.1018562984300873, "learning_rate": 0.003, "loss": 4.1632, "step": 3683 }, { "epoch": 0.03684, "grad_norm": 0.9775119392439015, "learning_rate": 0.003, "loss": 4.1875, "step": 3684 }, { "epoch": 0.03685, "grad_norm": 0.9024302815079079, "learning_rate": 0.003, "loss": 4.1723, "step": 3685 }, { "epoch": 0.03686, "grad_norm": 0.9419765899805256, "learning_rate": 0.003, "loss": 4.1901, "step": 3686 }, { "epoch": 0.03687, "grad_norm": 1.1488204202771457, "learning_rate": 0.003, "loss": 4.2226, "step": 3687 }, { "epoch": 0.03688, "grad_norm": 0.9052265907645448, "learning_rate": 0.003, "loss": 4.2006, "step": 3688 }, { "epoch": 0.03689, "grad_norm": 0.8161052846618367, "learning_rate": 0.003, "loss": 4.1667, "step": 3689 }, { "epoch": 0.0369, "grad_norm": 0.8630363350313656, "learning_rate": 0.003, "loss": 4.1995, "step": 3690 }, { "epoch": 0.03691, "grad_norm": 1.000347066652698, "learning_rate": 0.003, "loss": 4.1833, "step": 3691 }, { "epoch": 0.03692, "grad_norm": 1.0125314193391763, "learning_rate": 0.003, "loss": 4.2237, "step": 3692 }, { "epoch": 0.03693, "grad_norm": 0.7792710807207066, "learning_rate": 0.003, "loss": 4.187, "step": 3693 }, { "epoch": 0.03694, "grad_norm": 0.6567620211689106, "learning_rate": 0.003, "loss": 4.1772, "step": 3694 }, { "epoch": 0.03695, "grad_norm": 0.6069200650214434, "learning_rate": 0.003, "loss": 4.1749, "step": 3695 }, { "epoch": 0.03696, "grad_norm": 0.6245228527312463, "learning_rate": 0.003, "loss": 4.169, "step": 3696 }, { "epoch": 0.03697, "grad_norm": 0.5956476153449444, "learning_rate": 0.003, "loss": 4.1568, "step": 3697 }, { "epoch": 0.03698, "grad_norm": 0.6172146239896532, "learning_rate": 0.003, "loss": 4.1911, "step": 3698 }, { "epoch": 0.03699, "grad_norm": 0.6369440678819488, "learning_rate": 0.003, "loss": 4.1784, "step": 3699 }, { "epoch": 0.037, "grad_norm": 0.7397659340274994, "learning_rate": 0.003, "loss": 4.1703, "step": 3700 }, { "epoch": 0.03701, "grad_norm": 0.7400755144798878, "learning_rate": 0.003, "loss": 4.1708, "step": 3701 }, { "epoch": 0.03702, "grad_norm": 0.6011624195612609, "learning_rate": 0.003, "loss": 4.1568, "step": 3702 }, { "epoch": 0.03703, "grad_norm": 0.45688971739502526, "learning_rate": 0.003, "loss": 4.1658, "step": 3703 }, { "epoch": 0.03704, "grad_norm": 0.4625467744385196, "learning_rate": 0.003, "loss": 4.1661, "step": 3704 }, { "epoch": 0.03705, "grad_norm": 0.4601609886250813, "learning_rate": 0.003, "loss": 4.1799, "step": 3705 }, { "epoch": 0.03706, "grad_norm": 0.4966406347936221, "learning_rate": 0.003, "loss": 4.1635, "step": 3706 }, { "epoch": 0.03707, "grad_norm": 0.42477832568061835, "learning_rate": 0.003, "loss": 4.142, "step": 3707 }, { "epoch": 0.03708, "grad_norm": 0.40682526312634587, "learning_rate": 0.003, "loss": 4.1616, "step": 3708 }, { "epoch": 0.03709, "grad_norm": 0.4133271539515952, "learning_rate": 0.003, "loss": 4.1372, "step": 3709 }, { "epoch": 0.0371, "grad_norm": 0.43081117839287136, "learning_rate": 0.003, "loss": 4.1728, "step": 3710 }, { "epoch": 0.03711, "grad_norm": 0.46229609767440477, "learning_rate": 0.003, "loss": 4.1425, "step": 3711 }, { "epoch": 0.03712, "grad_norm": 0.4820292193047058, "learning_rate": 0.003, "loss": 4.1421, "step": 3712 }, { "epoch": 0.03713, "grad_norm": 0.5404513625982291, "learning_rate": 0.003, "loss": 4.1328, "step": 3713 }, { "epoch": 0.03714, "grad_norm": 0.6436389631284867, "learning_rate": 0.003, "loss": 4.1474, "step": 3714 }, { "epoch": 0.03715, "grad_norm": 0.8290141406987748, "learning_rate": 0.003, "loss": 4.1603, "step": 3715 }, { "epoch": 0.03716, "grad_norm": 0.9048610214356393, "learning_rate": 0.003, "loss": 4.1556, "step": 3716 }, { "epoch": 0.03717, "grad_norm": 0.7058124491704675, "learning_rate": 0.003, "loss": 4.1532, "step": 3717 }, { "epoch": 0.03718, "grad_norm": 0.4918162555153287, "learning_rate": 0.003, "loss": 4.117, "step": 3718 }, { "epoch": 0.03719, "grad_norm": 0.6622934600969125, "learning_rate": 0.003, "loss": 4.1532, "step": 3719 }, { "epoch": 0.0372, "grad_norm": 0.7298879766213411, "learning_rate": 0.003, "loss": 4.1559, "step": 3720 }, { "epoch": 0.03721, "grad_norm": 0.7368836467114779, "learning_rate": 0.003, "loss": 4.1451, "step": 3721 }, { "epoch": 0.03722, "grad_norm": 0.6530001576353514, "learning_rate": 0.003, "loss": 4.1484, "step": 3722 }, { "epoch": 0.03723, "grad_norm": 0.6828551463380831, "learning_rate": 0.003, "loss": 4.1778, "step": 3723 }, { "epoch": 0.03724, "grad_norm": 0.8098638547098356, "learning_rate": 0.003, "loss": 4.1595, "step": 3724 }, { "epoch": 0.03725, "grad_norm": 0.787968577179379, "learning_rate": 0.003, "loss": 4.1709, "step": 3725 }, { "epoch": 0.03726, "grad_norm": 0.790721512863178, "learning_rate": 0.003, "loss": 4.1451, "step": 3726 }, { "epoch": 0.03727, "grad_norm": 0.7875255929753493, "learning_rate": 0.003, "loss": 4.1842, "step": 3727 }, { "epoch": 0.03728, "grad_norm": 0.7766724710074185, "learning_rate": 0.003, "loss": 4.1313, "step": 3728 }, { "epoch": 0.03729, "grad_norm": 0.7446811620432703, "learning_rate": 0.003, "loss": 4.1678, "step": 3729 }, { "epoch": 0.0373, "grad_norm": 0.7631969023181137, "learning_rate": 0.003, "loss": 4.1535, "step": 3730 }, { "epoch": 0.03731, "grad_norm": 0.8301810283690936, "learning_rate": 0.003, "loss": 4.1798, "step": 3731 }, { "epoch": 0.03732, "grad_norm": 0.795728492246486, "learning_rate": 0.003, "loss": 4.1797, "step": 3732 }, { "epoch": 0.03733, "grad_norm": 0.6502399531894205, "learning_rate": 0.003, "loss": 4.152, "step": 3733 }, { "epoch": 0.03734, "grad_norm": 0.6112985055672381, "learning_rate": 0.003, "loss": 4.1521, "step": 3734 }, { "epoch": 0.03735, "grad_norm": 0.7852832594186502, "learning_rate": 0.003, "loss": 4.1517, "step": 3735 }, { "epoch": 0.03736, "grad_norm": 0.9467543719299969, "learning_rate": 0.003, "loss": 4.1902, "step": 3736 }, { "epoch": 0.03737, "grad_norm": 0.9659184343775771, "learning_rate": 0.003, "loss": 4.1847, "step": 3737 }, { "epoch": 0.03738, "grad_norm": 0.7573591347533927, "learning_rate": 0.003, "loss": 4.1903, "step": 3738 }, { "epoch": 0.03739, "grad_norm": 0.7992106244448375, "learning_rate": 0.003, "loss": 4.1561, "step": 3739 }, { "epoch": 0.0374, "grad_norm": 0.7486649819121083, "learning_rate": 0.003, "loss": 4.1738, "step": 3740 }, { "epoch": 0.03741, "grad_norm": 0.5976608138563914, "learning_rate": 0.003, "loss": 4.1544, "step": 3741 }, { "epoch": 0.03742, "grad_norm": 0.5995036064200935, "learning_rate": 0.003, "loss": 4.1461, "step": 3742 }, { "epoch": 0.03743, "grad_norm": 0.6901682901857126, "learning_rate": 0.003, "loss": 4.1447, "step": 3743 }, { "epoch": 0.03744, "grad_norm": 0.6080608979861319, "learning_rate": 0.003, "loss": 4.1734, "step": 3744 }, { "epoch": 0.03745, "grad_norm": 0.6576923197541351, "learning_rate": 0.003, "loss": 4.1593, "step": 3745 }, { "epoch": 0.03746, "grad_norm": 0.6727287733203032, "learning_rate": 0.003, "loss": 4.1675, "step": 3746 }, { "epoch": 0.03747, "grad_norm": 0.7572729409130596, "learning_rate": 0.003, "loss": 4.175, "step": 3747 }, { "epoch": 0.03748, "grad_norm": 0.7027720695663551, "learning_rate": 0.003, "loss": 4.1438, "step": 3748 }, { "epoch": 0.03749, "grad_norm": 0.7669711680997199, "learning_rate": 0.003, "loss": 4.1588, "step": 3749 }, { "epoch": 0.0375, "grad_norm": 0.7408458574924264, "learning_rate": 0.003, "loss": 4.1786, "step": 3750 }, { "epoch": 0.03751, "grad_norm": 0.6282480274827662, "learning_rate": 0.003, "loss": 4.1276, "step": 3751 }, { "epoch": 0.03752, "grad_norm": 0.6086039975418626, "learning_rate": 0.003, "loss": 4.1587, "step": 3752 }, { "epoch": 0.03753, "grad_norm": 0.6045335662772724, "learning_rate": 0.003, "loss": 4.1601, "step": 3753 }, { "epoch": 0.03754, "grad_norm": 0.6756444191450836, "learning_rate": 0.003, "loss": 4.172, "step": 3754 }, { "epoch": 0.03755, "grad_norm": 0.6814576187053439, "learning_rate": 0.003, "loss": 4.1638, "step": 3755 }, { "epoch": 0.03756, "grad_norm": 0.6979671812601237, "learning_rate": 0.003, "loss": 4.1331, "step": 3756 }, { "epoch": 0.03757, "grad_norm": 0.7762754751971193, "learning_rate": 0.003, "loss": 4.1703, "step": 3757 }, { "epoch": 0.03758, "grad_norm": 0.9320275397343457, "learning_rate": 0.003, "loss": 4.1158, "step": 3758 }, { "epoch": 0.03759, "grad_norm": 1.029430729208351, "learning_rate": 0.003, "loss": 4.1823, "step": 3759 }, { "epoch": 0.0376, "grad_norm": 0.6884103372684509, "learning_rate": 0.003, "loss": 4.1603, "step": 3760 }, { "epoch": 0.03761, "grad_norm": 0.5726368511742514, "learning_rate": 0.003, "loss": 4.1248, "step": 3761 }, { "epoch": 0.03762, "grad_norm": 0.7431155489308362, "learning_rate": 0.003, "loss": 4.1857, "step": 3762 }, { "epoch": 0.03763, "grad_norm": 0.7334829914103421, "learning_rate": 0.003, "loss": 4.1677, "step": 3763 }, { "epoch": 0.03764, "grad_norm": 0.7755402436276377, "learning_rate": 0.003, "loss": 4.182, "step": 3764 }, { "epoch": 0.03765, "grad_norm": 0.8071077893148694, "learning_rate": 0.003, "loss": 4.2095, "step": 3765 }, { "epoch": 0.03766, "grad_norm": 0.8062094613718822, "learning_rate": 0.003, "loss": 4.1459, "step": 3766 }, { "epoch": 0.03767, "grad_norm": 0.7450385148464626, "learning_rate": 0.003, "loss": 4.1846, "step": 3767 }, { "epoch": 0.03768, "grad_norm": 0.8501525775153304, "learning_rate": 0.003, "loss": 4.1688, "step": 3768 }, { "epoch": 0.03769, "grad_norm": 0.8490270851264778, "learning_rate": 0.003, "loss": 4.1598, "step": 3769 }, { "epoch": 0.0377, "grad_norm": 0.6528754246262384, "learning_rate": 0.003, "loss": 4.1366, "step": 3770 }, { "epoch": 0.03771, "grad_norm": 0.514456285674655, "learning_rate": 0.003, "loss": 4.1472, "step": 3771 }, { "epoch": 0.03772, "grad_norm": 0.5778069788565984, "learning_rate": 0.003, "loss": 4.1704, "step": 3772 }, { "epoch": 0.03773, "grad_norm": 0.590491520248524, "learning_rate": 0.003, "loss": 4.1726, "step": 3773 }, { "epoch": 0.03774, "grad_norm": 0.6156498757993644, "learning_rate": 0.003, "loss": 4.16, "step": 3774 }, { "epoch": 0.03775, "grad_norm": 0.5373760491110208, "learning_rate": 0.003, "loss": 4.1714, "step": 3775 }, { "epoch": 0.03776, "grad_norm": 0.58872450634308, "learning_rate": 0.003, "loss": 4.1508, "step": 3776 }, { "epoch": 0.03777, "grad_norm": 0.57682380030575, "learning_rate": 0.003, "loss": 4.1632, "step": 3777 }, { "epoch": 0.03778, "grad_norm": 0.5253188081583163, "learning_rate": 0.003, "loss": 4.1383, "step": 3778 }, { "epoch": 0.03779, "grad_norm": 0.47209668216344913, "learning_rate": 0.003, "loss": 4.1517, "step": 3779 }, { "epoch": 0.0378, "grad_norm": 0.5332773200600602, "learning_rate": 0.003, "loss": 4.1514, "step": 3780 }, { "epoch": 0.03781, "grad_norm": 0.6825162371448047, "learning_rate": 0.003, "loss": 4.1838, "step": 3781 }, { "epoch": 0.03782, "grad_norm": 0.8418563836091014, "learning_rate": 0.003, "loss": 4.1608, "step": 3782 }, { "epoch": 0.03783, "grad_norm": 0.8512086413504435, "learning_rate": 0.003, "loss": 4.1648, "step": 3783 }, { "epoch": 0.03784, "grad_norm": 0.6671577794115535, "learning_rate": 0.003, "loss": 4.1611, "step": 3784 }, { "epoch": 0.03785, "grad_norm": 0.5730225846531266, "learning_rate": 0.003, "loss": 4.1395, "step": 3785 }, { "epoch": 0.03786, "grad_norm": 0.6939236048367627, "learning_rate": 0.003, "loss": 4.1621, "step": 3786 }, { "epoch": 0.03787, "grad_norm": 0.7368644792806699, "learning_rate": 0.003, "loss": 4.1509, "step": 3787 }, { "epoch": 0.03788, "grad_norm": 0.6874505246320723, "learning_rate": 0.003, "loss": 4.1821, "step": 3788 }, { "epoch": 0.03789, "grad_norm": 0.6323049623986694, "learning_rate": 0.003, "loss": 4.1126, "step": 3789 }, { "epoch": 0.0379, "grad_norm": 0.6673306922274641, "learning_rate": 0.003, "loss": 4.1402, "step": 3790 }, { "epoch": 0.03791, "grad_norm": 0.6943632593992325, "learning_rate": 0.003, "loss": 4.1536, "step": 3791 }, { "epoch": 0.03792, "grad_norm": 0.6791072764782452, "learning_rate": 0.003, "loss": 4.1837, "step": 3792 }, { "epoch": 0.03793, "grad_norm": 0.6789029894480806, "learning_rate": 0.003, "loss": 4.1784, "step": 3793 }, { "epoch": 0.03794, "grad_norm": 0.7369905694175998, "learning_rate": 0.003, "loss": 4.181, "step": 3794 }, { "epoch": 0.03795, "grad_norm": 0.7808489227373923, "learning_rate": 0.003, "loss": 4.1682, "step": 3795 }, { "epoch": 0.03796, "grad_norm": 0.8037760915310143, "learning_rate": 0.003, "loss": 4.1535, "step": 3796 }, { "epoch": 0.03797, "grad_norm": 0.7083927011043929, "learning_rate": 0.003, "loss": 4.1407, "step": 3797 }, { "epoch": 0.03798, "grad_norm": 0.706481627859496, "learning_rate": 0.003, "loss": 4.1545, "step": 3798 }, { "epoch": 0.03799, "grad_norm": 0.6536139692203335, "learning_rate": 0.003, "loss": 4.1627, "step": 3799 }, { "epoch": 0.038, "grad_norm": 0.667914966075993, "learning_rate": 0.003, "loss": 4.1697, "step": 3800 }, { "epoch": 0.03801, "grad_norm": 0.6933040831212794, "learning_rate": 0.003, "loss": 4.1758, "step": 3801 }, { "epoch": 0.03802, "grad_norm": 0.6580821376084047, "learning_rate": 0.003, "loss": 4.1491, "step": 3802 }, { "epoch": 0.03803, "grad_norm": 0.7779130392910976, "learning_rate": 0.003, "loss": 4.1782, "step": 3803 }, { "epoch": 0.03804, "grad_norm": 0.8777193399244893, "learning_rate": 0.003, "loss": 4.1529, "step": 3804 }, { "epoch": 0.03805, "grad_norm": 1.0020099764204873, "learning_rate": 0.003, "loss": 4.1747, "step": 3805 }, { "epoch": 0.03806, "grad_norm": 0.9634820397091458, "learning_rate": 0.003, "loss": 4.1743, "step": 3806 }, { "epoch": 0.03807, "grad_norm": 0.7477715351876884, "learning_rate": 0.003, "loss": 4.161, "step": 3807 }, { "epoch": 0.03808, "grad_norm": 0.6392070927979916, "learning_rate": 0.003, "loss": 4.1744, "step": 3808 }, { "epoch": 0.03809, "grad_norm": 0.6899923101840185, "learning_rate": 0.003, "loss": 4.1548, "step": 3809 }, { "epoch": 0.0381, "grad_norm": 0.6421320844146812, "learning_rate": 0.003, "loss": 4.1399, "step": 3810 }, { "epoch": 0.03811, "grad_norm": 0.5707513677972008, "learning_rate": 0.003, "loss": 4.1678, "step": 3811 }, { "epoch": 0.03812, "grad_norm": 0.6104281618032581, "learning_rate": 0.003, "loss": 4.1598, "step": 3812 }, { "epoch": 0.03813, "grad_norm": 0.7409188794471754, "learning_rate": 0.003, "loss": 4.1356, "step": 3813 }, { "epoch": 0.03814, "grad_norm": 0.9399869172219258, "learning_rate": 0.003, "loss": 4.175, "step": 3814 }, { "epoch": 0.03815, "grad_norm": 1.0585281835423583, "learning_rate": 0.003, "loss": 4.1727, "step": 3815 }, { "epoch": 0.03816, "grad_norm": 0.9799276300500098, "learning_rate": 0.003, "loss": 4.1748, "step": 3816 }, { "epoch": 0.03817, "grad_norm": 0.9941910477954538, "learning_rate": 0.003, "loss": 4.1578, "step": 3817 }, { "epoch": 0.03818, "grad_norm": 0.9313720636167057, "learning_rate": 0.003, "loss": 4.2098, "step": 3818 }, { "epoch": 0.03819, "grad_norm": 0.8314238810075018, "learning_rate": 0.003, "loss": 4.1583, "step": 3819 }, { "epoch": 0.0382, "grad_norm": 0.7056825175881773, "learning_rate": 0.003, "loss": 4.2022, "step": 3820 }, { "epoch": 0.03821, "grad_norm": 0.6762157784042842, "learning_rate": 0.003, "loss": 4.148, "step": 3821 }, { "epoch": 0.03822, "grad_norm": 0.6810353799131027, "learning_rate": 0.003, "loss": 4.1704, "step": 3822 }, { "epoch": 0.03823, "grad_norm": 0.693219699873982, "learning_rate": 0.003, "loss": 4.1655, "step": 3823 }, { "epoch": 0.03824, "grad_norm": 0.7816987127966681, "learning_rate": 0.003, "loss": 4.1624, "step": 3824 }, { "epoch": 0.03825, "grad_norm": 0.8580364555019204, "learning_rate": 0.003, "loss": 4.1796, "step": 3825 }, { "epoch": 0.03826, "grad_norm": 0.737317401518091, "learning_rate": 0.003, "loss": 4.1636, "step": 3826 }, { "epoch": 0.03827, "grad_norm": 0.6028910295160049, "learning_rate": 0.003, "loss": 4.1852, "step": 3827 }, { "epoch": 0.03828, "grad_norm": 0.6588227313580575, "learning_rate": 0.003, "loss": 4.1624, "step": 3828 }, { "epoch": 0.03829, "grad_norm": 0.6720290422366224, "learning_rate": 0.003, "loss": 4.1669, "step": 3829 }, { "epoch": 0.0383, "grad_norm": 0.5935234244676734, "learning_rate": 0.003, "loss": 4.1537, "step": 3830 }, { "epoch": 0.03831, "grad_norm": 0.5027973321474926, "learning_rate": 0.003, "loss": 4.151, "step": 3831 }, { "epoch": 0.03832, "grad_norm": 0.4793372599756564, "learning_rate": 0.003, "loss": 4.1472, "step": 3832 }, { "epoch": 0.03833, "grad_norm": 0.46250153234520147, "learning_rate": 0.003, "loss": 4.1459, "step": 3833 }, { "epoch": 0.03834, "grad_norm": 0.5005957325640198, "learning_rate": 0.003, "loss": 4.1758, "step": 3834 }, { "epoch": 0.03835, "grad_norm": 0.5638040713097137, "learning_rate": 0.003, "loss": 4.1646, "step": 3835 }, { "epoch": 0.03836, "grad_norm": 0.7454000939697492, "learning_rate": 0.003, "loss": 4.1705, "step": 3836 }, { "epoch": 0.03837, "grad_norm": 0.84129158678745, "learning_rate": 0.003, "loss": 4.1598, "step": 3837 }, { "epoch": 0.03838, "grad_norm": 0.8657748961672681, "learning_rate": 0.003, "loss": 4.1511, "step": 3838 }, { "epoch": 0.03839, "grad_norm": 0.7593243937614058, "learning_rate": 0.003, "loss": 4.1537, "step": 3839 }, { "epoch": 0.0384, "grad_norm": 0.6950210484679211, "learning_rate": 0.003, "loss": 4.1578, "step": 3840 }, { "epoch": 0.03841, "grad_norm": 0.6626542758150601, "learning_rate": 0.003, "loss": 4.1611, "step": 3841 }, { "epoch": 0.03842, "grad_norm": 0.6047316460014022, "learning_rate": 0.003, "loss": 4.159, "step": 3842 }, { "epoch": 0.03843, "grad_norm": 0.662178693915001, "learning_rate": 0.003, "loss": 4.1461, "step": 3843 }, { "epoch": 0.03844, "grad_norm": 0.706679379810732, "learning_rate": 0.003, "loss": 4.1568, "step": 3844 }, { "epoch": 0.03845, "grad_norm": 0.7114665877564104, "learning_rate": 0.003, "loss": 4.1576, "step": 3845 }, { "epoch": 0.03846, "grad_norm": 0.6429293496562273, "learning_rate": 0.003, "loss": 4.1681, "step": 3846 }, { "epoch": 0.03847, "grad_norm": 0.6208196779861823, "learning_rate": 0.003, "loss": 4.1565, "step": 3847 }, { "epoch": 0.03848, "grad_norm": 0.5991718704769494, "learning_rate": 0.003, "loss": 4.1418, "step": 3848 }, { "epoch": 0.03849, "grad_norm": 0.6327221679341399, "learning_rate": 0.003, "loss": 4.1542, "step": 3849 }, { "epoch": 0.0385, "grad_norm": 0.6393009934280616, "learning_rate": 0.003, "loss": 4.1654, "step": 3850 }, { "epoch": 0.03851, "grad_norm": 0.7219030496541264, "learning_rate": 0.003, "loss": 4.1303, "step": 3851 }, { "epoch": 0.03852, "grad_norm": 0.741478675399483, "learning_rate": 0.003, "loss": 4.1667, "step": 3852 }, { "epoch": 0.03853, "grad_norm": 0.6467861453700433, "learning_rate": 0.003, "loss": 4.1618, "step": 3853 }, { "epoch": 0.03854, "grad_norm": 0.698278301052957, "learning_rate": 0.003, "loss": 4.1565, "step": 3854 }, { "epoch": 0.03855, "grad_norm": 0.7121845392242947, "learning_rate": 0.003, "loss": 4.1765, "step": 3855 }, { "epoch": 0.03856, "grad_norm": 0.6723670160555435, "learning_rate": 0.003, "loss": 4.1461, "step": 3856 }, { "epoch": 0.03857, "grad_norm": 0.6243401732081879, "learning_rate": 0.003, "loss": 4.1494, "step": 3857 }, { "epoch": 0.03858, "grad_norm": 0.5301294562723087, "learning_rate": 0.003, "loss": 4.1414, "step": 3858 }, { "epoch": 0.03859, "grad_norm": 0.50901761681548, "learning_rate": 0.003, "loss": 4.1651, "step": 3859 }, { "epoch": 0.0386, "grad_norm": 0.5147249879881411, "learning_rate": 0.003, "loss": 4.1236, "step": 3860 }, { "epoch": 0.03861, "grad_norm": 0.4939497869282373, "learning_rate": 0.003, "loss": 4.1638, "step": 3861 }, { "epoch": 0.03862, "grad_norm": 0.5565978694443094, "learning_rate": 0.003, "loss": 4.1425, "step": 3862 }, { "epoch": 0.03863, "grad_norm": 0.6930312131073376, "learning_rate": 0.003, "loss": 4.1403, "step": 3863 }, { "epoch": 0.03864, "grad_norm": 0.7338788764609174, "learning_rate": 0.003, "loss": 4.1481, "step": 3864 }, { "epoch": 0.03865, "grad_norm": 0.716452591015328, "learning_rate": 0.003, "loss": 4.1614, "step": 3865 }, { "epoch": 0.03866, "grad_norm": 0.8035841485064186, "learning_rate": 0.003, "loss": 4.1395, "step": 3866 }, { "epoch": 0.03867, "grad_norm": 0.9790747131837212, "learning_rate": 0.003, "loss": 4.1305, "step": 3867 }, { "epoch": 0.03868, "grad_norm": 0.9557607039135098, "learning_rate": 0.003, "loss": 4.1685, "step": 3868 }, { "epoch": 0.03869, "grad_norm": 0.8269471619194556, "learning_rate": 0.003, "loss": 4.1558, "step": 3869 }, { "epoch": 0.0387, "grad_norm": 0.736530154455655, "learning_rate": 0.003, "loss": 4.1642, "step": 3870 }, { "epoch": 0.03871, "grad_norm": 0.5662896465927079, "learning_rate": 0.003, "loss": 4.1341, "step": 3871 }, { "epoch": 0.03872, "grad_norm": 0.7064773963599414, "learning_rate": 0.003, "loss": 4.1325, "step": 3872 }, { "epoch": 0.03873, "grad_norm": 0.7511440909675522, "learning_rate": 0.003, "loss": 4.1762, "step": 3873 }, { "epoch": 0.03874, "grad_norm": 0.8087487769850815, "learning_rate": 0.003, "loss": 4.1496, "step": 3874 }, { "epoch": 0.03875, "grad_norm": 0.840451180426337, "learning_rate": 0.003, "loss": 4.1553, "step": 3875 }, { "epoch": 0.03876, "grad_norm": 0.8711005630005544, "learning_rate": 0.003, "loss": 4.1888, "step": 3876 }, { "epoch": 0.03877, "grad_norm": 0.7489732856457578, "learning_rate": 0.003, "loss": 4.1645, "step": 3877 }, { "epoch": 0.03878, "grad_norm": 0.6479494485701219, "learning_rate": 0.003, "loss": 4.1753, "step": 3878 }, { "epoch": 0.03879, "grad_norm": 0.5974171547568563, "learning_rate": 0.003, "loss": 4.1755, "step": 3879 }, { "epoch": 0.0388, "grad_norm": 0.6504260745071084, "learning_rate": 0.003, "loss": 4.141, "step": 3880 }, { "epoch": 0.03881, "grad_norm": 0.6733083067264833, "learning_rate": 0.003, "loss": 4.1685, "step": 3881 }, { "epoch": 0.03882, "grad_norm": 0.660030960264416, "learning_rate": 0.003, "loss": 4.1768, "step": 3882 }, { "epoch": 0.03883, "grad_norm": 0.7215073977898953, "learning_rate": 0.003, "loss": 4.1491, "step": 3883 }, { "epoch": 0.03884, "grad_norm": 0.6784371169912143, "learning_rate": 0.003, "loss": 4.1572, "step": 3884 }, { "epoch": 0.03885, "grad_norm": 0.6961417852606488, "learning_rate": 0.003, "loss": 4.1747, "step": 3885 }, { "epoch": 0.03886, "grad_norm": 0.6846762234353307, "learning_rate": 0.003, "loss": 4.1479, "step": 3886 }, { "epoch": 0.03887, "grad_norm": 0.6018705177852628, "learning_rate": 0.003, "loss": 4.1575, "step": 3887 }, { "epoch": 0.03888, "grad_norm": 0.7545299954550155, "learning_rate": 0.003, "loss": 4.138, "step": 3888 }, { "epoch": 0.03889, "grad_norm": 0.9698514203515268, "learning_rate": 0.003, "loss": 4.1627, "step": 3889 }, { "epoch": 0.0389, "grad_norm": 1.1649616934398974, "learning_rate": 0.003, "loss": 4.1644, "step": 3890 }, { "epoch": 0.03891, "grad_norm": 0.7396396017755505, "learning_rate": 0.003, "loss": 4.1579, "step": 3891 }, { "epoch": 0.03892, "grad_norm": 0.7127971514741257, "learning_rate": 0.003, "loss": 4.1754, "step": 3892 }, { "epoch": 0.03893, "grad_norm": 0.8147903386909522, "learning_rate": 0.003, "loss": 4.1498, "step": 3893 }, { "epoch": 0.03894, "grad_norm": 0.8553199932435573, "learning_rate": 0.003, "loss": 4.1639, "step": 3894 }, { "epoch": 0.03895, "grad_norm": 0.8064783210870279, "learning_rate": 0.003, "loss": 4.1385, "step": 3895 }, { "epoch": 0.03896, "grad_norm": 0.7327396202925885, "learning_rate": 0.003, "loss": 4.1635, "step": 3896 }, { "epoch": 0.03897, "grad_norm": 0.6832165978250844, "learning_rate": 0.003, "loss": 4.1362, "step": 3897 }, { "epoch": 0.03898, "grad_norm": 0.7381820210282796, "learning_rate": 0.003, "loss": 4.1722, "step": 3898 }, { "epoch": 0.03899, "grad_norm": 0.7120798145498066, "learning_rate": 0.003, "loss": 4.163, "step": 3899 }, { "epoch": 0.039, "grad_norm": 0.706935890187933, "learning_rate": 0.003, "loss": 4.1611, "step": 3900 }, { "epoch": 0.03901, "grad_norm": 0.7172271819943744, "learning_rate": 0.003, "loss": 4.1754, "step": 3901 }, { "epoch": 0.03902, "grad_norm": 0.6436464145708447, "learning_rate": 0.003, "loss": 4.1447, "step": 3902 }, { "epoch": 0.03903, "grad_norm": 0.7120130287904197, "learning_rate": 0.003, "loss": 4.1839, "step": 3903 }, { "epoch": 0.03904, "grad_norm": 0.7363979573377551, "learning_rate": 0.003, "loss": 4.1613, "step": 3904 }, { "epoch": 0.03905, "grad_norm": 0.6989523475801707, "learning_rate": 0.003, "loss": 4.1777, "step": 3905 }, { "epoch": 0.03906, "grad_norm": 0.7664511025672687, "learning_rate": 0.003, "loss": 4.1524, "step": 3906 }, { "epoch": 0.03907, "grad_norm": 0.6973849941812954, "learning_rate": 0.003, "loss": 4.1633, "step": 3907 }, { "epoch": 0.03908, "grad_norm": 0.6968835116178974, "learning_rate": 0.003, "loss": 4.1467, "step": 3908 }, { "epoch": 0.03909, "grad_norm": 0.6214766339187717, "learning_rate": 0.003, "loss": 4.1479, "step": 3909 }, { "epoch": 0.0391, "grad_norm": 0.606928879147604, "learning_rate": 0.003, "loss": 4.1783, "step": 3910 }, { "epoch": 0.03911, "grad_norm": 0.6150917723982117, "learning_rate": 0.003, "loss": 4.1618, "step": 3911 }, { "epoch": 0.03912, "grad_norm": 0.5315811614956929, "learning_rate": 0.003, "loss": 4.1576, "step": 3912 }, { "epoch": 0.03913, "grad_norm": 0.49513392829810154, "learning_rate": 0.003, "loss": 4.1298, "step": 3913 }, { "epoch": 0.03914, "grad_norm": 0.5316802449083329, "learning_rate": 0.003, "loss": 4.1655, "step": 3914 }, { "epoch": 0.03915, "grad_norm": 0.5092855956655066, "learning_rate": 0.003, "loss": 4.1352, "step": 3915 }, { "epoch": 0.03916, "grad_norm": 0.45924359376251017, "learning_rate": 0.003, "loss": 4.1598, "step": 3916 }, { "epoch": 0.03917, "grad_norm": 0.45484869852430065, "learning_rate": 0.003, "loss": 4.1506, "step": 3917 }, { "epoch": 0.03918, "grad_norm": 0.4501493787308128, "learning_rate": 0.003, "loss": 4.1358, "step": 3918 }, { "epoch": 0.03919, "grad_norm": 0.5428853651303225, "learning_rate": 0.003, "loss": 4.1072, "step": 3919 }, { "epoch": 0.0392, "grad_norm": 0.7284545482558801, "learning_rate": 0.003, "loss": 4.1372, "step": 3920 }, { "epoch": 0.03921, "grad_norm": 0.9552348159276692, "learning_rate": 0.003, "loss": 4.1389, "step": 3921 }, { "epoch": 0.03922, "grad_norm": 1.132432474415542, "learning_rate": 0.003, "loss": 4.1914, "step": 3922 }, { "epoch": 0.03923, "grad_norm": 0.6932654094565733, "learning_rate": 0.003, "loss": 4.1356, "step": 3923 }, { "epoch": 0.03924, "grad_norm": 0.6532999077661216, "learning_rate": 0.003, "loss": 4.1431, "step": 3924 }, { "epoch": 0.03925, "grad_norm": 0.9135572051042363, "learning_rate": 0.003, "loss": 4.155, "step": 3925 }, { "epoch": 0.03926, "grad_norm": 0.9923603322118285, "learning_rate": 0.003, "loss": 4.1508, "step": 3926 }, { "epoch": 0.03927, "grad_norm": 0.9789133497360833, "learning_rate": 0.003, "loss": 4.168, "step": 3927 }, { "epoch": 0.03928, "grad_norm": 0.8942775586441536, "learning_rate": 0.003, "loss": 4.1834, "step": 3928 }, { "epoch": 0.03929, "grad_norm": 1.0319586305334827, "learning_rate": 0.003, "loss": 4.2001, "step": 3929 }, { "epoch": 0.0393, "grad_norm": 1.0750467044406122, "learning_rate": 0.003, "loss": 4.1904, "step": 3930 }, { "epoch": 0.03931, "grad_norm": 0.9184595530968939, "learning_rate": 0.003, "loss": 4.1919, "step": 3931 }, { "epoch": 0.03932, "grad_norm": 0.7648894274462913, "learning_rate": 0.003, "loss": 4.1855, "step": 3932 }, { "epoch": 0.03933, "grad_norm": 0.724112317823659, "learning_rate": 0.003, "loss": 4.172, "step": 3933 }, { "epoch": 0.03934, "grad_norm": 0.6449540845441923, "learning_rate": 0.003, "loss": 4.1627, "step": 3934 }, { "epoch": 0.03935, "grad_norm": 0.7490619555451586, "learning_rate": 0.003, "loss": 4.1618, "step": 3935 }, { "epoch": 0.03936, "grad_norm": 0.7177217614636706, "learning_rate": 0.003, "loss": 4.1226, "step": 3936 }, { "epoch": 0.03937, "grad_norm": 0.653893710648041, "learning_rate": 0.003, "loss": 4.1584, "step": 3937 }, { "epoch": 0.03938, "grad_norm": 0.689030208208419, "learning_rate": 0.003, "loss": 4.1605, "step": 3938 }, { "epoch": 0.03939, "grad_norm": 0.7098991679111472, "learning_rate": 0.003, "loss": 4.1446, "step": 3939 }, { "epoch": 0.0394, "grad_norm": 0.6955232506989616, "learning_rate": 0.003, "loss": 4.1709, "step": 3940 }, { "epoch": 0.03941, "grad_norm": 0.6598355292317952, "learning_rate": 0.003, "loss": 4.1607, "step": 3941 }, { "epoch": 0.03942, "grad_norm": 0.738032728669511, "learning_rate": 0.003, "loss": 4.1785, "step": 3942 }, { "epoch": 0.03943, "grad_norm": 0.8081766812397826, "learning_rate": 0.003, "loss": 4.1681, "step": 3943 }, { "epoch": 0.03944, "grad_norm": 0.8593029564204311, "learning_rate": 0.003, "loss": 4.1857, "step": 3944 }, { "epoch": 0.03945, "grad_norm": 0.8814910808518954, "learning_rate": 0.003, "loss": 4.1561, "step": 3945 }, { "epoch": 0.03946, "grad_norm": 0.8816608783626361, "learning_rate": 0.003, "loss": 4.1852, "step": 3946 }, { "epoch": 0.03947, "grad_norm": 0.8064135711097247, "learning_rate": 0.003, "loss": 4.1834, "step": 3947 }, { "epoch": 0.03948, "grad_norm": 0.8804914835705606, "learning_rate": 0.003, "loss": 4.2055, "step": 3948 }, { "epoch": 0.03949, "grad_norm": 0.8976280856029432, "learning_rate": 0.003, "loss": 4.1479, "step": 3949 }, { "epoch": 0.0395, "grad_norm": 0.7915245308890937, "learning_rate": 0.003, "loss": 4.1755, "step": 3950 }, { "epoch": 0.03951, "grad_norm": 0.7023597435152198, "learning_rate": 0.003, "loss": 4.2015, "step": 3951 }, { "epoch": 0.03952, "grad_norm": 0.5970926719547551, "learning_rate": 0.003, "loss": 4.1776, "step": 3952 }, { "epoch": 0.03953, "grad_norm": 0.5127193759525883, "learning_rate": 0.003, "loss": 4.1524, "step": 3953 }, { "epoch": 0.03954, "grad_norm": 0.5083765022250437, "learning_rate": 0.003, "loss": 4.1525, "step": 3954 }, { "epoch": 0.03955, "grad_norm": 0.5581234697293194, "learning_rate": 0.003, "loss": 4.1532, "step": 3955 }, { "epoch": 0.03956, "grad_norm": 0.5875448440882535, "learning_rate": 0.003, "loss": 4.1566, "step": 3956 }, { "epoch": 0.03957, "grad_norm": 0.597994191226493, "learning_rate": 0.003, "loss": 4.1624, "step": 3957 }, { "epoch": 0.03958, "grad_norm": 0.5717890299511298, "learning_rate": 0.003, "loss": 4.1378, "step": 3958 }, { "epoch": 0.03959, "grad_norm": 0.5436034530009164, "learning_rate": 0.003, "loss": 4.1361, "step": 3959 }, { "epoch": 0.0396, "grad_norm": 0.49029266691650636, "learning_rate": 0.003, "loss": 4.1519, "step": 3960 }, { "epoch": 0.03961, "grad_norm": 0.517401344634264, "learning_rate": 0.003, "loss": 4.1482, "step": 3961 }, { "epoch": 0.03962, "grad_norm": 0.5330594146947688, "learning_rate": 0.003, "loss": 4.1598, "step": 3962 }, { "epoch": 0.03963, "grad_norm": 0.5905667821025009, "learning_rate": 0.003, "loss": 4.1463, "step": 3963 }, { "epoch": 0.03964, "grad_norm": 0.558494601973704, "learning_rate": 0.003, "loss": 4.1469, "step": 3964 }, { "epoch": 0.03965, "grad_norm": 0.5647463812091732, "learning_rate": 0.003, "loss": 4.1433, "step": 3965 }, { "epoch": 0.03966, "grad_norm": 0.5672438671895076, "learning_rate": 0.003, "loss": 4.1833, "step": 3966 }, { "epoch": 0.03967, "grad_norm": 0.5990092369456635, "learning_rate": 0.003, "loss": 4.1392, "step": 3967 }, { "epoch": 0.03968, "grad_norm": 0.5306380158511645, "learning_rate": 0.003, "loss": 4.1653, "step": 3968 }, { "epoch": 0.03969, "grad_norm": 0.5335941140441545, "learning_rate": 0.003, "loss": 4.1448, "step": 3969 }, { "epoch": 0.0397, "grad_norm": 0.5101263285016783, "learning_rate": 0.003, "loss": 4.1497, "step": 3970 }, { "epoch": 0.03971, "grad_norm": 0.4861456325644423, "learning_rate": 0.003, "loss": 4.1253, "step": 3971 }, { "epoch": 0.03972, "grad_norm": 0.5082221340172262, "learning_rate": 0.003, "loss": 4.1461, "step": 3972 }, { "epoch": 0.03973, "grad_norm": 0.6064259836747726, "learning_rate": 0.003, "loss": 4.1523, "step": 3973 }, { "epoch": 0.03974, "grad_norm": 0.8810228923861501, "learning_rate": 0.003, "loss": 4.1552, "step": 3974 }, { "epoch": 0.03975, "grad_norm": 1.1594440991062704, "learning_rate": 0.003, "loss": 4.1581, "step": 3975 }, { "epoch": 0.03976, "grad_norm": 0.6242516132147851, "learning_rate": 0.003, "loss": 4.1795, "step": 3976 }, { "epoch": 0.03977, "grad_norm": 0.6310756758939645, "learning_rate": 0.003, "loss": 4.1651, "step": 3977 }, { "epoch": 0.03978, "grad_norm": 0.9800843159523053, "learning_rate": 0.003, "loss": 4.1417, "step": 3978 }, { "epoch": 0.03979, "grad_norm": 0.8894332385272683, "learning_rate": 0.003, "loss": 4.1646, "step": 3979 }, { "epoch": 0.0398, "grad_norm": 0.5906456865918563, "learning_rate": 0.003, "loss": 4.1475, "step": 3980 }, { "epoch": 0.03981, "grad_norm": 0.6582973485295706, "learning_rate": 0.003, "loss": 4.1444, "step": 3981 }, { "epoch": 0.03982, "grad_norm": 0.7206583498121457, "learning_rate": 0.003, "loss": 4.1283, "step": 3982 }, { "epoch": 0.03983, "grad_norm": 0.8701732888100308, "learning_rate": 0.003, "loss": 4.155, "step": 3983 }, { "epoch": 0.03984, "grad_norm": 0.8661641192026968, "learning_rate": 0.003, "loss": 4.1668, "step": 3984 }, { "epoch": 0.03985, "grad_norm": 0.9243378871553372, "learning_rate": 0.003, "loss": 4.1681, "step": 3985 }, { "epoch": 0.03986, "grad_norm": 0.9956369167150667, "learning_rate": 0.003, "loss": 4.1705, "step": 3986 }, { "epoch": 0.03987, "grad_norm": 0.8887499652438708, "learning_rate": 0.003, "loss": 4.1771, "step": 3987 }, { "epoch": 0.03988, "grad_norm": 0.7725996597052387, "learning_rate": 0.003, "loss": 4.1764, "step": 3988 }, { "epoch": 0.03989, "grad_norm": 0.9540752093113514, "learning_rate": 0.003, "loss": 4.1732, "step": 3989 }, { "epoch": 0.0399, "grad_norm": 0.9866842358730207, "learning_rate": 0.003, "loss": 4.1507, "step": 3990 }, { "epoch": 0.03991, "grad_norm": 1.0032653576960733, "learning_rate": 0.003, "loss": 4.1979, "step": 3991 }, { "epoch": 0.03992, "grad_norm": 0.9799832060702593, "learning_rate": 0.003, "loss": 4.1809, "step": 3992 }, { "epoch": 0.03993, "grad_norm": 0.9143397893823952, "learning_rate": 0.003, "loss": 4.1772, "step": 3993 }, { "epoch": 0.03994, "grad_norm": 0.8867228330129578, "learning_rate": 0.003, "loss": 4.1785, "step": 3994 }, { "epoch": 0.03995, "grad_norm": 0.9777127443180489, "learning_rate": 0.003, "loss": 4.1672, "step": 3995 }, { "epoch": 0.03996, "grad_norm": 1.121916971358567, "learning_rate": 0.003, "loss": 4.2091, "step": 3996 }, { "epoch": 0.03997, "grad_norm": 1.0260835587044206, "learning_rate": 0.003, "loss": 4.21, "step": 3997 }, { "epoch": 0.03998, "grad_norm": 0.9733483073411772, "learning_rate": 0.003, "loss": 4.2091, "step": 3998 }, { "epoch": 0.03999, "grad_norm": 0.9253787235141333, "learning_rate": 0.003, "loss": 4.2098, "step": 3999 }, { "epoch": 0.04, "grad_norm": 0.860383401913097, "learning_rate": 0.003, "loss": 4.2101, "step": 4000 }, { "epoch": 0.04001, "grad_norm": 0.8527166028367188, "learning_rate": 0.003, "loss": 4.2089, "step": 4001 }, { "epoch": 0.04002, "grad_norm": 0.7955822227821913, "learning_rate": 0.003, "loss": 4.1762, "step": 4002 }, { "epoch": 0.04003, "grad_norm": 0.8159059659987256, "learning_rate": 0.003, "loss": 4.1966, "step": 4003 }, { "epoch": 0.04004, "grad_norm": 0.8242608605578613, "learning_rate": 0.003, "loss": 4.2014, "step": 4004 }, { "epoch": 0.04005, "grad_norm": 0.7442104756929572, "learning_rate": 0.003, "loss": 4.2052, "step": 4005 }, { "epoch": 0.04006, "grad_norm": 0.6862974810950646, "learning_rate": 0.003, "loss": 4.1579, "step": 4006 }, { "epoch": 0.04007, "grad_norm": 0.6713665895022044, "learning_rate": 0.003, "loss": 4.1851, "step": 4007 }, { "epoch": 0.04008, "grad_norm": 0.6515792038366235, "learning_rate": 0.003, "loss": 4.1589, "step": 4008 }, { "epoch": 0.04009, "grad_norm": 0.6670829454786272, "learning_rate": 0.003, "loss": 4.1561, "step": 4009 }, { "epoch": 0.0401, "grad_norm": 0.5890722285246915, "learning_rate": 0.003, "loss": 4.1713, "step": 4010 }, { "epoch": 0.04011, "grad_norm": 0.5154042218197626, "learning_rate": 0.003, "loss": 4.1727, "step": 4011 }, { "epoch": 0.04012, "grad_norm": 0.511904267386738, "learning_rate": 0.003, "loss": 4.1666, "step": 4012 }, { "epoch": 0.04013, "grad_norm": 0.4301130431551674, "learning_rate": 0.003, "loss": 4.1595, "step": 4013 }, { "epoch": 0.04014, "grad_norm": 0.4755384019802336, "learning_rate": 0.003, "loss": 4.145, "step": 4014 }, { "epoch": 0.04015, "grad_norm": 0.4526642613482027, "learning_rate": 0.003, "loss": 4.1259, "step": 4015 }, { "epoch": 0.04016, "grad_norm": 0.5025871602100715, "learning_rate": 0.003, "loss": 4.1428, "step": 4016 }, { "epoch": 0.04017, "grad_norm": 0.613279269884007, "learning_rate": 0.003, "loss": 4.1847, "step": 4017 }, { "epoch": 0.04018, "grad_norm": 0.8281359691209577, "learning_rate": 0.003, "loss": 4.1594, "step": 4018 }, { "epoch": 0.04019, "grad_norm": 0.941035832708172, "learning_rate": 0.003, "loss": 4.1434, "step": 4019 }, { "epoch": 0.0402, "grad_norm": 0.8795192610150039, "learning_rate": 0.003, "loss": 4.1785, "step": 4020 }, { "epoch": 0.04021, "grad_norm": 0.6515100017542129, "learning_rate": 0.003, "loss": 4.1464, "step": 4021 }, { "epoch": 0.04022, "grad_norm": 0.5063361011739121, "learning_rate": 0.003, "loss": 4.1404, "step": 4022 }, { "epoch": 0.04023, "grad_norm": 0.6481572855287395, "learning_rate": 0.003, "loss": 4.1413, "step": 4023 }, { "epoch": 0.04024, "grad_norm": 0.7010903711503769, "learning_rate": 0.003, "loss": 4.131, "step": 4024 }, { "epoch": 0.04025, "grad_norm": 0.7840447772680712, "learning_rate": 0.003, "loss": 4.1399, "step": 4025 }, { "epoch": 0.04026, "grad_norm": 0.7341068822639932, "learning_rate": 0.003, "loss": 4.1655, "step": 4026 }, { "epoch": 0.04027, "grad_norm": 0.6238472142782437, "learning_rate": 0.003, "loss": 4.1663, "step": 4027 }, { "epoch": 0.04028, "grad_norm": 0.4935095791712522, "learning_rate": 0.003, "loss": 4.1428, "step": 4028 }, { "epoch": 0.04029, "grad_norm": 0.4905704871714714, "learning_rate": 0.003, "loss": 4.1713, "step": 4029 }, { "epoch": 0.0403, "grad_norm": 0.5059882465087885, "learning_rate": 0.003, "loss": 4.1199, "step": 4030 }, { "epoch": 0.04031, "grad_norm": 0.49185506270358387, "learning_rate": 0.003, "loss": 4.1374, "step": 4031 }, { "epoch": 0.04032, "grad_norm": 0.5505039809907691, "learning_rate": 0.003, "loss": 4.1625, "step": 4032 }, { "epoch": 0.04033, "grad_norm": 0.5349055922421693, "learning_rate": 0.003, "loss": 4.1577, "step": 4033 }, { "epoch": 0.04034, "grad_norm": 0.4887336563792077, "learning_rate": 0.003, "loss": 4.1256, "step": 4034 }, { "epoch": 0.04035, "grad_norm": 0.5026476582372887, "learning_rate": 0.003, "loss": 4.1498, "step": 4035 }, { "epoch": 0.04036, "grad_norm": 0.5741733182678186, "learning_rate": 0.003, "loss": 4.1638, "step": 4036 }, { "epoch": 0.04037, "grad_norm": 0.6338724243857007, "learning_rate": 0.003, "loss": 4.1187, "step": 4037 }, { "epoch": 0.04038, "grad_norm": 0.5866801909982847, "learning_rate": 0.003, "loss": 4.1345, "step": 4038 }, { "epoch": 0.04039, "grad_norm": 0.4469263176133214, "learning_rate": 0.003, "loss": 4.1291, "step": 4039 }, { "epoch": 0.0404, "grad_norm": 0.5203352096732194, "learning_rate": 0.003, "loss": 4.1298, "step": 4040 }, { "epoch": 0.04041, "grad_norm": 0.5903209349905957, "learning_rate": 0.003, "loss": 4.1276, "step": 4041 }, { "epoch": 0.04042, "grad_norm": 0.7850173938632014, "learning_rate": 0.003, "loss": 4.1729, "step": 4042 }, { "epoch": 0.04043, "grad_norm": 1.0374432013836312, "learning_rate": 0.003, "loss": 4.1727, "step": 4043 }, { "epoch": 0.04044, "grad_norm": 1.0008411828594295, "learning_rate": 0.003, "loss": 4.1662, "step": 4044 }, { "epoch": 0.04045, "grad_norm": 0.721767328260429, "learning_rate": 0.003, "loss": 4.1048, "step": 4045 }, { "epoch": 0.04046, "grad_norm": 0.6027053735164074, "learning_rate": 0.003, "loss": 4.1496, "step": 4046 }, { "epoch": 0.04047, "grad_norm": 0.8146571762133998, "learning_rate": 0.003, "loss": 4.1466, "step": 4047 }, { "epoch": 0.04048, "grad_norm": 0.8947220522958645, "learning_rate": 0.003, "loss": 4.1436, "step": 4048 }, { "epoch": 0.04049, "grad_norm": 0.9307304863945178, "learning_rate": 0.003, "loss": 4.1891, "step": 4049 }, { "epoch": 0.0405, "grad_norm": 0.7045766049752963, "learning_rate": 0.003, "loss": 4.178, "step": 4050 }, { "epoch": 0.04051, "grad_norm": 0.6846375832757631, "learning_rate": 0.003, "loss": 4.1564, "step": 4051 }, { "epoch": 0.04052, "grad_norm": 0.8615665313113385, "learning_rate": 0.003, "loss": 4.1722, "step": 4052 }, { "epoch": 0.04053, "grad_norm": 0.9373878770279569, "learning_rate": 0.003, "loss": 4.125, "step": 4053 }, { "epoch": 0.04054, "grad_norm": 0.8371593982203549, "learning_rate": 0.003, "loss": 4.1443, "step": 4054 }, { "epoch": 0.04055, "grad_norm": 0.6967664819995452, "learning_rate": 0.003, "loss": 4.138, "step": 4055 }, { "epoch": 0.04056, "grad_norm": 0.6741494833756686, "learning_rate": 0.003, "loss": 4.1595, "step": 4056 }, { "epoch": 0.04057, "grad_norm": 0.8030676782045835, "learning_rate": 0.003, "loss": 4.1632, "step": 4057 }, { "epoch": 0.04058, "grad_norm": 0.774339384731219, "learning_rate": 0.003, "loss": 4.1338, "step": 4058 }, { "epoch": 0.04059, "grad_norm": 0.8430830883517224, "learning_rate": 0.003, "loss": 4.1192, "step": 4059 }, { "epoch": 0.0406, "grad_norm": 0.6742657640683123, "learning_rate": 0.003, "loss": 4.1739, "step": 4060 }, { "epoch": 0.04061, "grad_norm": 0.6572926238853734, "learning_rate": 0.003, "loss": 4.1598, "step": 4061 }, { "epoch": 0.04062, "grad_norm": 0.5997992092489303, "learning_rate": 0.003, "loss": 4.1368, "step": 4062 }, { "epoch": 0.04063, "grad_norm": 0.5451542397429993, "learning_rate": 0.003, "loss": 4.1422, "step": 4063 }, { "epoch": 0.04064, "grad_norm": 0.5706051829517365, "learning_rate": 0.003, "loss": 4.1561, "step": 4064 }, { "epoch": 0.04065, "grad_norm": 0.6573749206271802, "learning_rate": 0.003, "loss": 4.1936, "step": 4065 }, { "epoch": 0.04066, "grad_norm": 0.6711377067040638, "learning_rate": 0.003, "loss": 4.1533, "step": 4066 }, { "epoch": 0.04067, "grad_norm": 0.6151905993857413, "learning_rate": 0.003, "loss": 4.1541, "step": 4067 }, { "epoch": 0.04068, "grad_norm": 0.610139511076282, "learning_rate": 0.003, "loss": 4.1627, "step": 4068 }, { "epoch": 0.04069, "grad_norm": 0.6435562548550586, "learning_rate": 0.003, "loss": 4.153, "step": 4069 }, { "epoch": 0.0407, "grad_norm": 0.5473010440748541, "learning_rate": 0.003, "loss": 4.1547, "step": 4070 }, { "epoch": 0.04071, "grad_norm": 0.601566675296323, "learning_rate": 0.003, "loss": 4.1223, "step": 4071 }, { "epoch": 0.04072, "grad_norm": 0.6509579069490173, "learning_rate": 0.003, "loss": 4.146, "step": 4072 }, { "epoch": 0.04073, "grad_norm": 0.5973245201602951, "learning_rate": 0.003, "loss": 4.1302, "step": 4073 }, { "epoch": 0.04074, "grad_norm": 0.607520676438575, "learning_rate": 0.003, "loss": 4.1574, "step": 4074 }, { "epoch": 0.04075, "grad_norm": 0.8288720279764606, "learning_rate": 0.003, "loss": 4.1612, "step": 4075 }, { "epoch": 0.04076, "grad_norm": 0.9232154672269041, "learning_rate": 0.003, "loss": 4.1605, "step": 4076 }, { "epoch": 0.04077, "grad_norm": 0.9604554082892389, "learning_rate": 0.003, "loss": 4.1752, "step": 4077 }, { "epoch": 0.04078, "grad_norm": 0.9176998486040583, "learning_rate": 0.003, "loss": 4.1501, "step": 4078 }, { "epoch": 0.04079, "grad_norm": 0.7781092418368329, "learning_rate": 0.003, "loss": 4.1586, "step": 4079 }, { "epoch": 0.0408, "grad_norm": 0.7144390148261879, "learning_rate": 0.003, "loss": 4.1488, "step": 4080 }, { "epoch": 0.04081, "grad_norm": 0.7985914865590669, "learning_rate": 0.003, "loss": 4.1329, "step": 4081 }, { "epoch": 0.04082, "grad_norm": 0.8262338793037239, "learning_rate": 0.003, "loss": 4.1699, "step": 4082 }, { "epoch": 0.04083, "grad_norm": 0.7465376916541703, "learning_rate": 0.003, "loss": 4.1459, "step": 4083 }, { "epoch": 0.04084, "grad_norm": 0.6728219238709465, "learning_rate": 0.003, "loss": 4.1811, "step": 4084 }, { "epoch": 0.04085, "grad_norm": 0.755821930925202, "learning_rate": 0.003, "loss": 4.163, "step": 4085 }, { "epoch": 0.04086, "grad_norm": 0.637261349390268, "learning_rate": 0.003, "loss": 4.1791, "step": 4086 }, { "epoch": 0.04087, "grad_norm": 0.5906659081612129, "learning_rate": 0.003, "loss": 4.1597, "step": 4087 }, { "epoch": 0.04088, "grad_norm": 0.547995647785304, "learning_rate": 0.003, "loss": 4.1366, "step": 4088 }, { "epoch": 0.04089, "grad_norm": 0.5708520101184565, "learning_rate": 0.003, "loss": 4.1354, "step": 4089 }, { "epoch": 0.0409, "grad_norm": 0.6099350559443698, "learning_rate": 0.003, "loss": 4.1539, "step": 4090 }, { "epoch": 0.04091, "grad_norm": 0.696520536628286, "learning_rate": 0.003, "loss": 4.1662, "step": 4091 }, { "epoch": 0.04092, "grad_norm": 0.8551793129611549, "learning_rate": 0.003, "loss": 4.129, "step": 4092 }, { "epoch": 0.04093, "grad_norm": 1.0511160284337668, "learning_rate": 0.003, "loss": 4.1606, "step": 4093 }, { "epoch": 0.04094, "grad_norm": 0.9303098870853981, "learning_rate": 0.003, "loss": 4.151, "step": 4094 }, { "epoch": 0.04095, "grad_norm": 0.7394320217083529, "learning_rate": 0.003, "loss": 4.1258, "step": 4095 }, { "epoch": 0.04096, "grad_norm": 0.7856690666821323, "learning_rate": 0.003, "loss": 4.1325, "step": 4096 }, { "epoch": 0.04097, "grad_norm": 0.7724952999564255, "learning_rate": 0.003, "loss": 4.1507, "step": 4097 }, { "epoch": 0.04098, "grad_norm": 0.6418338461235239, "learning_rate": 0.003, "loss": 4.1525, "step": 4098 }, { "epoch": 0.04099, "grad_norm": 0.6141872194967258, "learning_rate": 0.003, "loss": 4.1751, "step": 4099 }, { "epoch": 0.041, "grad_norm": 0.6337061381438701, "learning_rate": 0.003, "loss": 4.1314, "step": 4100 }, { "epoch": 0.04101, "grad_norm": 0.7005605990414404, "learning_rate": 0.003, "loss": 4.129, "step": 4101 }, { "epoch": 0.04102, "grad_norm": 0.7781518273799513, "learning_rate": 0.003, "loss": 4.151, "step": 4102 }, { "epoch": 0.04103, "grad_norm": 0.7717092605525707, "learning_rate": 0.003, "loss": 4.1769, "step": 4103 }, { "epoch": 0.04104, "grad_norm": 0.7998045960483534, "learning_rate": 0.003, "loss": 4.1621, "step": 4104 }, { "epoch": 0.04105, "grad_norm": 0.7266397106851961, "learning_rate": 0.003, "loss": 4.1275, "step": 4105 }, { "epoch": 0.04106, "grad_norm": 0.7375729801536005, "learning_rate": 0.003, "loss": 4.1665, "step": 4106 }, { "epoch": 0.04107, "grad_norm": 0.8517602436934472, "learning_rate": 0.003, "loss": 4.1672, "step": 4107 }, { "epoch": 0.04108, "grad_norm": 0.7155354553416942, "learning_rate": 0.003, "loss": 4.1487, "step": 4108 }, { "epoch": 0.04109, "grad_norm": 0.8472831379025649, "learning_rate": 0.003, "loss": 4.1412, "step": 4109 }, { "epoch": 0.0411, "grad_norm": 0.9994412956163683, "learning_rate": 0.003, "loss": 4.1607, "step": 4110 }, { "epoch": 0.04111, "grad_norm": 1.0865359108420907, "learning_rate": 0.003, "loss": 4.1499, "step": 4111 }, { "epoch": 0.04112, "grad_norm": 0.7944687116955357, "learning_rate": 0.003, "loss": 4.1621, "step": 4112 }, { "epoch": 0.04113, "grad_norm": 0.6695248184455779, "learning_rate": 0.003, "loss": 4.2009, "step": 4113 }, { "epoch": 0.04114, "grad_norm": 0.8304852025142981, "learning_rate": 0.003, "loss": 4.1456, "step": 4114 }, { "epoch": 0.04115, "grad_norm": 0.9696455727697199, "learning_rate": 0.003, "loss": 4.164, "step": 4115 }, { "epoch": 0.04116, "grad_norm": 1.0300022050612345, "learning_rate": 0.003, "loss": 4.1831, "step": 4116 }, { "epoch": 0.04117, "grad_norm": 0.7267792671268358, "learning_rate": 0.003, "loss": 4.1373, "step": 4117 }, { "epoch": 0.04118, "grad_norm": 0.6123016813019814, "learning_rate": 0.003, "loss": 4.1925, "step": 4118 }, { "epoch": 0.04119, "grad_norm": 0.6465840606735817, "learning_rate": 0.003, "loss": 4.1697, "step": 4119 }, { "epoch": 0.0412, "grad_norm": 0.7384604480296841, "learning_rate": 0.003, "loss": 4.1608, "step": 4120 }, { "epoch": 0.04121, "grad_norm": 0.6577587889661327, "learning_rate": 0.003, "loss": 4.1798, "step": 4121 }, { "epoch": 0.04122, "grad_norm": 0.5559068248334778, "learning_rate": 0.003, "loss": 4.1519, "step": 4122 }, { "epoch": 0.04123, "grad_norm": 0.596362393082762, "learning_rate": 0.003, "loss": 4.1613, "step": 4123 }, { "epoch": 0.04124, "grad_norm": 0.5443723484819623, "learning_rate": 0.003, "loss": 4.1386, "step": 4124 }, { "epoch": 0.04125, "grad_norm": 0.5563286329797662, "learning_rate": 0.003, "loss": 4.1617, "step": 4125 }, { "epoch": 0.04126, "grad_norm": 0.523384653689282, "learning_rate": 0.003, "loss": 4.1506, "step": 4126 }, { "epoch": 0.04127, "grad_norm": 0.4495176170519381, "learning_rate": 0.003, "loss": 4.1384, "step": 4127 }, { "epoch": 0.04128, "grad_norm": 0.5072318783524735, "learning_rate": 0.003, "loss": 4.1231, "step": 4128 }, { "epoch": 0.04129, "grad_norm": 0.5052405740035331, "learning_rate": 0.003, "loss": 4.1627, "step": 4129 }, { "epoch": 0.0413, "grad_norm": 0.575809172371195, "learning_rate": 0.003, "loss": 4.1293, "step": 4130 }, { "epoch": 0.04131, "grad_norm": 0.6143169354531615, "learning_rate": 0.003, "loss": 4.155, "step": 4131 }, { "epoch": 0.04132, "grad_norm": 0.6223256630595382, "learning_rate": 0.003, "loss": 4.1196, "step": 4132 }, { "epoch": 0.04133, "grad_norm": 0.5894212387409717, "learning_rate": 0.003, "loss": 4.161, "step": 4133 }, { "epoch": 0.04134, "grad_norm": 0.5069566465455504, "learning_rate": 0.003, "loss": 4.1487, "step": 4134 }, { "epoch": 0.04135, "grad_norm": 0.47827820618941536, "learning_rate": 0.003, "loss": 4.1296, "step": 4135 }, { "epoch": 0.04136, "grad_norm": 0.4653725022496872, "learning_rate": 0.003, "loss": 4.1252, "step": 4136 }, { "epoch": 0.04137, "grad_norm": 0.47773052230780755, "learning_rate": 0.003, "loss": 4.1658, "step": 4137 }, { "epoch": 0.04138, "grad_norm": 0.5380882786268274, "learning_rate": 0.003, "loss": 4.15, "step": 4138 }, { "epoch": 0.04139, "grad_norm": 0.6469827605874696, "learning_rate": 0.003, "loss": 4.1496, "step": 4139 }, { "epoch": 0.0414, "grad_norm": 0.6947344870174545, "learning_rate": 0.003, "loss": 4.1555, "step": 4140 }, { "epoch": 0.04141, "grad_norm": 0.748345923041137, "learning_rate": 0.003, "loss": 4.1318, "step": 4141 }, { "epoch": 0.04142, "grad_norm": 0.7319349091665185, "learning_rate": 0.003, "loss": 4.157, "step": 4142 }, { "epoch": 0.04143, "grad_norm": 0.628358425706577, "learning_rate": 0.003, "loss": 4.1389, "step": 4143 }, { "epoch": 0.04144, "grad_norm": 0.6834841445843723, "learning_rate": 0.003, "loss": 4.1412, "step": 4144 }, { "epoch": 0.04145, "grad_norm": 0.7376058073772749, "learning_rate": 0.003, "loss": 4.1295, "step": 4145 }, { "epoch": 0.04146, "grad_norm": 0.7587407207350676, "learning_rate": 0.003, "loss": 4.1526, "step": 4146 }, { "epoch": 0.04147, "grad_norm": 0.7947181963356904, "learning_rate": 0.003, "loss": 4.1641, "step": 4147 }, { "epoch": 0.04148, "grad_norm": 0.7182381427435356, "learning_rate": 0.003, "loss": 4.1204, "step": 4148 }, { "epoch": 0.04149, "grad_norm": 0.7323007766800972, "learning_rate": 0.003, "loss": 4.1348, "step": 4149 }, { "epoch": 0.0415, "grad_norm": 0.857281738554918, "learning_rate": 0.003, "loss": 4.1422, "step": 4150 }, { "epoch": 0.04151, "grad_norm": 0.9753058360121778, "learning_rate": 0.003, "loss": 4.1696, "step": 4151 }, { "epoch": 0.04152, "grad_norm": 1.0237271601242721, "learning_rate": 0.003, "loss": 4.1707, "step": 4152 }, { "epoch": 0.04153, "grad_norm": 0.9306677581318379, "learning_rate": 0.003, "loss": 4.135, "step": 4153 }, { "epoch": 0.04154, "grad_norm": 0.8382038016807482, "learning_rate": 0.003, "loss": 4.1686, "step": 4154 }, { "epoch": 0.04155, "grad_norm": 0.7006562175213897, "learning_rate": 0.003, "loss": 4.1385, "step": 4155 }, { "epoch": 0.04156, "grad_norm": 0.6295351230747147, "learning_rate": 0.003, "loss": 4.1592, "step": 4156 }, { "epoch": 0.04157, "grad_norm": 0.7233308933121653, "learning_rate": 0.003, "loss": 4.1755, "step": 4157 }, { "epoch": 0.04158, "grad_norm": 0.7870559063957332, "learning_rate": 0.003, "loss": 4.1784, "step": 4158 }, { "epoch": 0.04159, "grad_norm": 0.9866760938792348, "learning_rate": 0.003, "loss": 4.1559, "step": 4159 }, { "epoch": 0.0416, "grad_norm": 1.0470738285698136, "learning_rate": 0.003, "loss": 4.1774, "step": 4160 }, { "epoch": 0.04161, "grad_norm": 0.9307304193810675, "learning_rate": 0.003, "loss": 4.1786, "step": 4161 }, { "epoch": 0.04162, "grad_norm": 0.8721482244521436, "learning_rate": 0.003, "loss": 4.1406, "step": 4162 }, { "epoch": 0.04163, "grad_norm": 0.8678721020792026, "learning_rate": 0.003, "loss": 4.1682, "step": 4163 }, { "epoch": 0.04164, "grad_norm": 0.8226440605014521, "learning_rate": 0.003, "loss": 4.1918, "step": 4164 }, { "epoch": 0.04165, "grad_norm": 0.825975020523602, "learning_rate": 0.003, "loss": 4.1651, "step": 4165 }, { "epoch": 0.04166, "grad_norm": 0.8820368404280395, "learning_rate": 0.003, "loss": 4.1594, "step": 4166 }, { "epoch": 0.04167, "grad_norm": 0.8857197236082065, "learning_rate": 0.003, "loss": 4.1834, "step": 4167 }, { "epoch": 0.04168, "grad_norm": 0.9094188409333465, "learning_rate": 0.003, "loss": 4.1886, "step": 4168 }, { "epoch": 0.04169, "grad_norm": 0.8608115262399164, "learning_rate": 0.003, "loss": 4.1926, "step": 4169 }, { "epoch": 0.0417, "grad_norm": 0.8339384653513617, "learning_rate": 0.003, "loss": 4.1527, "step": 4170 }, { "epoch": 0.04171, "grad_norm": 0.8053429340063298, "learning_rate": 0.003, "loss": 4.1752, "step": 4171 }, { "epoch": 0.04172, "grad_norm": 0.8778996219659019, "learning_rate": 0.003, "loss": 4.1968, "step": 4172 }, { "epoch": 0.04173, "grad_norm": 0.9642055884954371, "learning_rate": 0.003, "loss": 4.1939, "step": 4173 }, { "epoch": 0.04174, "grad_norm": 1.0380118480990967, "learning_rate": 0.003, "loss": 4.1636, "step": 4174 }, { "epoch": 0.04175, "grad_norm": 0.9536264797990276, "learning_rate": 0.003, "loss": 4.2107, "step": 4175 }, { "epoch": 0.04176, "grad_norm": 0.8997005452165456, "learning_rate": 0.003, "loss": 4.1724, "step": 4176 }, { "epoch": 0.04177, "grad_norm": 0.9315086547062951, "learning_rate": 0.003, "loss": 4.151, "step": 4177 }, { "epoch": 0.04178, "grad_norm": 0.7486982272609372, "learning_rate": 0.003, "loss": 4.1756, "step": 4178 }, { "epoch": 0.04179, "grad_norm": 0.7322985384737938, "learning_rate": 0.003, "loss": 4.1539, "step": 4179 }, { "epoch": 0.0418, "grad_norm": 0.6634118485342885, "learning_rate": 0.003, "loss": 4.1634, "step": 4180 }, { "epoch": 0.04181, "grad_norm": 0.6257131964317331, "learning_rate": 0.003, "loss": 4.1609, "step": 4181 }, { "epoch": 0.04182, "grad_norm": 0.6155966650745514, "learning_rate": 0.003, "loss": 4.1442, "step": 4182 }, { "epoch": 0.04183, "grad_norm": 0.5888122942739422, "learning_rate": 0.003, "loss": 4.147, "step": 4183 }, { "epoch": 0.04184, "grad_norm": 0.48396597336103814, "learning_rate": 0.003, "loss": 4.1975, "step": 4184 }, { "epoch": 0.04185, "grad_norm": 0.46085090618271063, "learning_rate": 0.003, "loss": 4.1762, "step": 4185 }, { "epoch": 0.04186, "grad_norm": 0.40263981014725103, "learning_rate": 0.003, "loss": 4.1172, "step": 4186 }, { "epoch": 0.04187, "grad_norm": 0.4072854325073614, "learning_rate": 0.003, "loss": 4.1673, "step": 4187 }, { "epoch": 0.04188, "grad_norm": 0.3949031488342765, "learning_rate": 0.003, "loss": 4.129, "step": 4188 }, { "epoch": 0.04189, "grad_norm": 0.4614400695553244, "learning_rate": 0.003, "loss": 4.1471, "step": 4189 }, { "epoch": 0.0419, "grad_norm": 0.591341946932479, "learning_rate": 0.003, "loss": 4.1422, "step": 4190 }, { "epoch": 0.04191, "grad_norm": 0.6794599625720497, "learning_rate": 0.003, "loss": 4.1502, "step": 4191 }, { "epoch": 0.04192, "grad_norm": 0.8639843192836074, "learning_rate": 0.003, "loss": 4.1353, "step": 4192 }, { "epoch": 0.04193, "grad_norm": 0.9261826031541774, "learning_rate": 0.003, "loss": 4.1201, "step": 4193 }, { "epoch": 0.04194, "grad_norm": 0.6816042869205202, "learning_rate": 0.003, "loss": 4.1131, "step": 4194 }, { "epoch": 0.04195, "grad_norm": 0.4846458487192246, "learning_rate": 0.003, "loss": 4.1594, "step": 4195 }, { "epoch": 0.04196, "grad_norm": 0.6733457250112305, "learning_rate": 0.003, "loss": 4.1205, "step": 4196 }, { "epoch": 0.04197, "grad_norm": 0.8628147041804719, "learning_rate": 0.003, "loss": 4.1368, "step": 4197 }, { "epoch": 0.04198, "grad_norm": 0.8069270994807441, "learning_rate": 0.003, "loss": 4.1332, "step": 4198 }, { "epoch": 0.04199, "grad_norm": 0.5843819059317946, "learning_rate": 0.003, "loss": 4.1649, "step": 4199 }, { "epoch": 0.042, "grad_norm": 0.6524001973236165, "learning_rate": 0.003, "loss": 4.1484, "step": 4200 }, { "epoch": 0.04201, "grad_norm": 0.7455750389366074, "learning_rate": 0.003, "loss": 4.1283, "step": 4201 }, { "epoch": 0.04202, "grad_norm": 0.8073311014615949, "learning_rate": 0.003, "loss": 4.1482, "step": 4202 }, { "epoch": 0.04203, "grad_norm": 0.6797266710402026, "learning_rate": 0.003, "loss": 4.165, "step": 4203 }, { "epoch": 0.04204, "grad_norm": 0.6818305150769544, "learning_rate": 0.003, "loss": 4.1288, "step": 4204 }, { "epoch": 0.04205, "grad_norm": 0.5915119548922819, "learning_rate": 0.003, "loss": 4.1526, "step": 4205 }, { "epoch": 0.04206, "grad_norm": 0.5385651548116344, "learning_rate": 0.003, "loss": 4.1466, "step": 4206 }, { "epoch": 0.04207, "grad_norm": 0.48635296552930585, "learning_rate": 0.003, "loss": 4.1349, "step": 4207 }, { "epoch": 0.04208, "grad_norm": 0.501480728550346, "learning_rate": 0.003, "loss": 4.1426, "step": 4208 }, { "epoch": 0.04209, "grad_norm": 0.5039540680425979, "learning_rate": 0.003, "loss": 4.1448, "step": 4209 }, { "epoch": 0.0421, "grad_norm": 0.5190305179039434, "learning_rate": 0.003, "loss": 4.1031, "step": 4210 }, { "epoch": 0.04211, "grad_norm": 0.5620311787567622, "learning_rate": 0.003, "loss": 4.1438, "step": 4211 }, { "epoch": 0.04212, "grad_norm": 0.5635968224621314, "learning_rate": 0.003, "loss": 4.1366, "step": 4212 }, { "epoch": 0.04213, "grad_norm": 0.5728113060887386, "learning_rate": 0.003, "loss": 4.1176, "step": 4213 }, { "epoch": 0.04214, "grad_norm": 0.548258707730465, "learning_rate": 0.003, "loss": 4.1223, "step": 4214 }, { "epoch": 0.04215, "grad_norm": 0.599545067228449, "learning_rate": 0.003, "loss": 4.1464, "step": 4215 }, { "epoch": 0.04216, "grad_norm": 0.5748773014990753, "learning_rate": 0.003, "loss": 4.1096, "step": 4216 }, { "epoch": 0.04217, "grad_norm": 0.48220599086889576, "learning_rate": 0.003, "loss": 4.1305, "step": 4217 }, { "epoch": 0.04218, "grad_norm": 0.5634734523353527, "learning_rate": 0.003, "loss": 4.1642, "step": 4218 }, { "epoch": 0.04219, "grad_norm": 0.6074224940320497, "learning_rate": 0.003, "loss": 4.0856, "step": 4219 }, { "epoch": 0.0422, "grad_norm": 0.6971835957336069, "learning_rate": 0.003, "loss": 4.155, "step": 4220 }, { "epoch": 0.04221, "grad_norm": 0.8744022212709924, "learning_rate": 0.003, "loss": 4.1343, "step": 4221 }, { "epoch": 0.04222, "grad_norm": 1.091644449624445, "learning_rate": 0.003, "loss": 4.1115, "step": 4222 }, { "epoch": 0.04223, "grad_norm": 0.9670759098939631, "learning_rate": 0.003, "loss": 4.1714, "step": 4223 }, { "epoch": 0.04224, "grad_norm": 1.0117671228316643, "learning_rate": 0.003, "loss": 4.1837, "step": 4224 }, { "epoch": 0.04225, "grad_norm": 0.9113076215312503, "learning_rate": 0.003, "loss": 4.1713, "step": 4225 }, { "epoch": 0.04226, "grad_norm": 0.8960930319481113, "learning_rate": 0.003, "loss": 4.1523, "step": 4226 }, { "epoch": 0.04227, "grad_norm": 0.9167424550159433, "learning_rate": 0.003, "loss": 4.156, "step": 4227 }, { "epoch": 0.04228, "grad_norm": 0.967478343035116, "learning_rate": 0.003, "loss": 4.1417, "step": 4228 }, { "epoch": 0.04229, "grad_norm": 0.8656039877374752, "learning_rate": 0.003, "loss": 4.1711, "step": 4229 }, { "epoch": 0.0423, "grad_norm": 0.8626263746774161, "learning_rate": 0.003, "loss": 4.1599, "step": 4230 }, { "epoch": 0.04231, "grad_norm": 0.7823486614059999, "learning_rate": 0.003, "loss": 4.1578, "step": 4231 }, { "epoch": 0.04232, "grad_norm": 0.6738192762506559, "learning_rate": 0.003, "loss": 4.1671, "step": 4232 }, { "epoch": 0.04233, "grad_norm": 0.6857804646466646, "learning_rate": 0.003, "loss": 4.119, "step": 4233 }, { "epoch": 0.04234, "grad_norm": 0.7651833364067767, "learning_rate": 0.003, "loss": 4.1545, "step": 4234 }, { "epoch": 0.04235, "grad_norm": 0.8557695250207406, "learning_rate": 0.003, "loss": 4.1383, "step": 4235 }, { "epoch": 0.04236, "grad_norm": 0.8879388664685244, "learning_rate": 0.003, "loss": 4.1514, "step": 4236 }, { "epoch": 0.04237, "grad_norm": 0.913885936433401, "learning_rate": 0.003, "loss": 4.1255, "step": 4237 }, { "epoch": 0.04238, "grad_norm": 0.8386396394298068, "learning_rate": 0.003, "loss": 4.1852, "step": 4238 }, { "epoch": 0.04239, "grad_norm": 0.8690235809658854, "learning_rate": 0.003, "loss": 4.1661, "step": 4239 }, { "epoch": 0.0424, "grad_norm": 0.8289113096147429, "learning_rate": 0.003, "loss": 4.1637, "step": 4240 }, { "epoch": 0.04241, "grad_norm": 0.7332475305569869, "learning_rate": 0.003, "loss": 4.1777, "step": 4241 }, { "epoch": 0.04242, "grad_norm": 0.6138420318092145, "learning_rate": 0.003, "loss": 4.1602, "step": 4242 }, { "epoch": 0.04243, "grad_norm": 0.6214280439461072, "learning_rate": 0.003, "loss": 4.1436, "step": 4243 }, { "epoch": 0.04244, "grad_norm": 0.618625833924493, "learning_rate": 0.003, "loss": 4.1493, "step": 4244 }, { "epoch": 0.04245, "grad_norm": 0.5938287908990628, "learning_rate": 0.003, "loss": 4.1435, "step": 4245 }, { "epoch": 0.04246, "grad_norm": 0.5233831349524802, "learning_rate": 0.003, "loss": 4.1675, "step": 4246 }, { "epoch": 0.04247, "grad_norm": 0.4617503229970775, "learning_rate": 0.003, "loss": 4.138, "step": 4247 }, { "epoch": 0.04248, "grad_norm": 0.4580443757090383, "learning_rate": 0.003, "loss": 4.1323, "step": 4248 }, { "epoch": 0.04249, "grad_norm": 0.4036456069229838, "learning_rate": 0.003, "loss": 4.1278, "step": 4249 }, { "epoch": 0.0425, "grad_norm": 0.4380751494962389, "learning_rate": 0.003, "loss": 4.1444, "step": 4250 }, { "epoch": 0.04251, "grad_norm": 0.5168262583557149, "learning_rate": 0.003, "loss": 4.1054, "step": 4251 }, { "epoch": 0.04252, "grad_norm": 0.6808353915492127, "learning_rate": 0.003, "loss": 4.1541, "step": 4252 }, { "epoch": 0.04253, "grad_norm": 0.8432737064416014, "learning_rate": 0.003, "loss": 4.1227, "step": 4253 }, { "epoch": 0.04254, "grad_norm": 0.852211211018834, "learning_rate": 0.003, "loss": 4.1472, "step": 4254 }, { "epoch": 0.04255, "grad_norm": 0.6240445618140698, "learning_rate": 0.003, "loss": 4.1184, "step": 4255 }, { "epoch": 0.04256, "grad_norm": 0.4411537713654708, "learning_rate": 0.003, "loss": 4.1255, "step": 4256 }, { "epoch": 0.04257, "grad_norm": 0.5991668514983637, "learning_rate": 0.003, "loss": 4.1492, "step": 4257 }, { "epoch": 0.04258, "grad_norm": 0.7037912926105352, "learning_rate": 0.003, "loss": 4.1538, "step": 4258 }, { "epoch": 0.04259, "grad_norm": 0.6762366578254241, "learning_rate": 0.003, "loss": 4.1198, "step": 4259 }, { "epoch": 0.0426, "grad_norm": 0.6629398663628234, "learning_rate": 0.003, "loss": 4.1354, "step": 4260 }, { "epoch": 0.04261, "grad_norm": 0.7122714093545782, "learning_rate": 0.003, "loss": 4.1495, "step": 4261 }, { "epoch": 0.04262, "grad_norm": 0.6594037833976022, "learning_rate": 0.003, "loss": 4.1673, "step": 4262 }, { "epoch": 0.04263, "grad_norm": 0.6354881188170681, "learning_rate": 0.003, "loss": 4.1474, "step": 4263 }, { "epoch": 0.04264, "grad_norm": 0.6654557887834439, "learning_rate": 0.003, "loss": 4.1598, "step": 4264 }, { "epoch": 0.04265, "grad_norm": 0.6938449312414295, "learning_rate": 0.003, "loss": 4.1586, "step": 4265 }, { "epoch": 0.04266, "grad_norm": 0.7221897294325286, "learning_rate": 0.003, "loss": 4.1403, "step": 4266 }, { "epoch": 0.04267, "grad_norm": 0.7436634345125996, "learning_rate": 0.003, "loss": 4.1158, "step": 4267 }, { "epoch": 0.04268, "grad_norm": 0.8458877078713634, "learning_rate": 0.003, "loss": 4.1612, "step": 4268 }, { "epoch": 0.04269, "grad_norm": 0.8904618229753511, "learning_rate": 0.003, "loss": 4.13, "step": 4269 }, { "epoch": 0.0427, "grad_norm": 0.7112249953312418, "learning_rate": 0.003, "loss": 4.1401, "step": 4270 }, { "epoch": 0.04271, "grad_norm": 0.7216953907198649, "learning_rate": 0.003, "loss": 4.1547, "step": 4271 }, { "epoch": 0.04272, "grad_norm": 0.7847449594647588, "learning_rate": 0.003, "loss": 4.1462, "step": 4272 }, { "epoch": 0.04273, "grad_norm": 1.004517275551493, "learning_rate": 0.003, "loss": 4.1318, "step": 4273 }, { "epoch": 0.04274, "grad_norm": 1.255471199279273, "learning_rate": 0.003, "loss": 4.1681, "step": 4274 }, { "epoch": 0.04275, "grad_norm": 0.6280388986370492, "learning_rate": 0.003, "loss": 4.1565, "step": 4275 }, { "epoch": 0.04276, "grad_norm": 0.6763844718810441, "learning_rate": 0.003, "loss": 4.1743, "step": 4276 }, { "epoch": 0.04277, "grad_norm": 0.9807377268843779, "learning_rate": 0.003, "loss": 4.1684, "step": 4277 }, { "epoch": 0.04278, "grad_norm": 1.0197483171024961, "learning_rate": 0.003, "loss": 4.1775, "step": 4278 }, { "epoch": 0.04279, "grad_norm": 0.9328879956310648, "learning_rate": 0.003, "loss": 4.1556, "step": 4279 }, { "epoch": 0.0428, "grad_norm": 0.7779948668786022, "learning_rate": 0.003, "loss": 4.1299, "step": 4280 }, { "epoch": 0.04281, "grad_norm": 0.7431305171276716, "learning_rate": 0.003, "loss": 4.1851, "step": 4281 }, { "epoch": 0.04282, "grad_norm": 0.6283341114449361, "learning_rate": 0.003, "loss": 4.1564, "step": 4282 }, { "epoch": 0.04283, "grad_norm": 0.6394182134064753, "learning_rate": 0.003, "loss": 4.1901, "step": 4283 }, { "epoch": 0.04284, "grad_norm": 0.6758957287997666, "learning_rate": 0.003, "loss": 4.1437, "step": 4284 }, { "epoch": 0.04285, "grad_norm": 0.6131823431058723, "learning_rate": 0.003, "loss": 4.1543, "step": 4285 }, { "epoch": 0.04286, "grad_norm": 0.6284313302797474, "learning_rate": 0.003, "loss": 4.1751, "step": 4286 }, { "epoch": 0.04287, "grad_norm": 0.6980874101445044, "learning_rate": 0.003, "loss": 4.1541, "step": 4287 }, { "epoch": 0.04288, "grad_norm": 0.696667601658298, "learning_rate": 0.003, "loss": 4.1361, "step": 4288 }, { "epoch": 0.04289, "grad_norm": 0.5891879790472746, "learning_rate": 0.003, "loss": 4.1405, "step": 4289 }, { "epoch": 0.0429, "grad_norm": 0.6679196724071268, "learning_rate": 0.003, "loss": 4.127, "step": 4290 }, { "epoch": 0.04291, "grad_norm": 0.7132152030025544, "learning_rate": 0.003, "loss": 4.1321, "step": 4291 }, { "epoch": 0.04292, "grad_norm": 0.7831211413634629, "learning_rate": 0.003, "loss": 4.141, "step": 4292 }, { "epoch": 0.04293, "grad_norm": 0.7420395140404202, "learning_rate": 0.003, "loss": 4.1725, "step": 4293 }, { "epoch": 0.04294, "grad_norm": 0.664819817807019, "learning_rate": 0.003, "loss": 4.1508, "step": 4294 }, { "epoch": 0.04295, "grad_norm": 0.6533174489648946, "learning_rate": 0.003, "loss": 4.1223, "step": 4295 }, { "epoch": 0.04296, "grad_norm": 0.6539485681842259, "learning_rate": 0.003, "loss": 4.1205, "step": 4296 }, { "epoch": 0.04297, "grad_norm": 0.5445244387293794, "learning_rate": 0.003, "loss": 4.1064, "step": 4297 }, { "epoch": 0.04298, "grad_norm": 0.509532772235553, "learning_rate": 0.003, "loss": 4.1518, "step": 4298 }, { "epoch": 0.04299, "grad_norm": 0.45809506613975526, "learning_rate": 0.003, "loss": 4.1361, "step": 4299 }, { "epoch": 0.043, "grad_norm": 0.4552028711707873, "learning_rate": 0.003, "loss": 4.1165, "step": 4300 }, { "epoch": 0.04301, "grad_norm": 0.5511852682855607, "learning_rate": 0.003, "loss": 4.1668, "step": 4301 }, { "epoch": 0.04302, "grad_norm": 0.7137895720159015, "learning_rate": 0.003, "loss": 4.1605, "step": 4302 }, { "epoch": 0.04303, "grad_norm": 0.968773809309932, "learning_rate": 0.003, "loss": 4.1573, "step": 4303 }, { "epoch": 0.04304, "grad_norm": 1.0008705380109228, "learning_rate": 0.003, "loss": 4.1581, "step": 4304 }, { "epoch": 0.04305, "grad_norm": 0.7364014697863502, "learning_rate": 0.003, "loss": 4.1497, "step": 4305 }, { "epoch": 0.04306, "grad_norm": 0.8422192934651608, "learning_rate": 0.003, "loss": 4.1268, "step": 4306 }, { "epoch": 0.04307, "grad_norm": 0.8960004698755968, "learning_rate": 0.003, "loss": 4.1409, "step": 4307 }, { "epoch": 0.04308, "grad_norm": 0.8900888278943823, "learning_rate": 0.003, "loss": 4.1516, "step": 4308 }, { "epoch": 0.04309, "grad_norm": 0.7851447078463367, "learning_rate": 0.003, "loss": 4.1418, "step": 4309 }, { "epoch": 0.0431, "grad_norm": 0.8003446758283859, "learning_rate": 0.003, "loss": 4.1648, "step": 4310 }, { "epoch": 0.04311, "grad_norm": 0.9468061968202307, "learning_rate": 0.003, "loss": 4.1711, "step": 4311 }, { "epoch": 0.04312, "grad_norm": 0.8521671702866287, "learning_rate": 0.003, "loss": 4.149, "step": 4312 }, { "epoch": 0.04313, "grad_norm": 0.7923961445552284, "learning_rate": 0.003, "loss": 4.1585, "step": 4313 }, { "epoch": 0.04314, "grad_norm": 0.739461542428857, "learning_rate": 0.003, "loss": 4.1432, "step": 4314 }, { "epoch": 0.04315, "grad_norm": 0.8509444047900186, "learning_rate": 0.003, "loss": 4.1727, "step": 4315 }, { "epoch": 0.04316, "grad_norm": 0.8601049555561022, "learning_rate": 0.003, "loss": 4.1754, "step": 4316 }, { "epoch": 0.04317, "grad_norm": 0.7623314110787873, "learning_rate": 0.003, "loss": 4.1871, "step": 4317 }, { "epoch": 0.04318, "grad_norm": 0.6587491558124051, "learning_rate": 0.003, "loss": 4.1227, "step": 4318 }, { "epoch": 0.04319, "grad_norm": 0.5849951298753531, "learning_rate": 0.003, "loss": 4.1174, "step": 4319 }, { "epoch": 0.0432, "grad_norm": 0.57401029054835, "learning_rate": 0.003, "loss": 4.1395, "step": 4320 }, { "epoch": 0.04321, "grad_norm": 0.6311486059735384, "learning_rate": 0.003, "loss": 4.1439, "step": 4321 }, { "epoch": 0.04322, "grad_norm": 0.6281833102961318, "learning_rate": 0.003, "loss": 4.1751, "step": 4322 }, { "epoch": 0.04323, "grad_norm": 0.7033374538455368, "learning_rate": 0.003, "loss": 4.1459, "step": 4323 }, { "epoch": 0.04324, "grad_norm": 0.6437486136063232, "learning_rate": 0.003, "loss": 4.1378, "step": 4324 }, { "epoch": 0.04325, "grad_norm": 0.6232679102675521, "learning_rate": 0.003, "loss": 4.1663, "step": 4325 }, { "epoch": 0.04326, "grad_norm": 0.6515618951702632, "learning_rate": 0.003, "loss": 4.1454, "step": 4326 }, { "epoch": 0.04327, "grad_norm": 0.6736681660679901, "learning_rate": 0.003, "loss": 4.1287, "step": 4327 }, { "epoch": 0.04328, "grad_norm": 0.719714363226295, "learning_rate": 0.003, "loss": 4.1434, "step": 4328 }, { "epoch": 0.04329, "grad_norm": 0.8360323655381567, "learning_rate": 0.003, "loss": 4.164, "step": 4329 }, { "epoch": 0.0433, "grad_norm": 0.8102704190603504, "learning_rate": 0.003, "loss": 4.1354, "step": 4330 }, { "epoch": 0.04331, "grad_norm": 0.7636451395661018, "learning_rate": 0.003, "loss": 4.1319, "step": 4331 }, { "epoch": 0.04332, "grad_norm": 0.7789172902676315, "learning_rate": 0.003, "loss": 4.179, "step": 4332 }, { "epoch": 0.04333, "grad_norm": 0.6881019378708992, "learning_rate": 0.003, "loss": 4.1475, "step": 4333 }, { "epoch": 0.04334, "grad_norm": 0.5741071284174022, "learning_rate": 0.003, "loss": 4.1328, "step": 4334 }, { "epoch": 0.04335, "grad_norm": 0.6016578373290047, "learning_rate": 0.003, "loss": 4.1203, "step": 4335 }, { "epoch": 0.04336, "grad_norm": 0.6309338575418205, "learning_rate": 0.003, "loss": 4.1558, "step": 4336 }, { "epoch": 0.04337, "grad_norm": 0.5725441440390032, "learning_rate": 0.003, "loss": 4.1395, "step": 4337 }, { "epoch": 0.04338, "grad_norm": 0.5493515413414659, "learning_rate": 0.003, "loss": 4.1403, "step": 4338 }, { "epoch": 0.04339, "grad_norm": 0.5145808044001364, "learning_rate": 0.003, "loss": 4.1442, "step": 4339 }, { "epoch": 0.0434, "grad_norm": 0.5155260956336473, "learning_rate": 0.003, "loss": 4.1247, "step": 4340 }, { "epoch": 0.04341, "grad_norm": 0.49209801146871524, "learning_rate": 0.003, "loss": 4.1362, "step": 4341 }, { "epoch": 0.04342, "grad_norm": 0.5776994696106369, "learning_rate": 0.003, "loss": 4.1231, "step": 4342 }, { "epoch": 0.04343, "grad_norm": 0.8551512196078731, "learning_rate": 0.003, "loss": 4.1689, "step": 4343 }, { "epoch": 0.04344, "grad_norm": 1.0783725488561569, "learning_rate": 0.003, "loss": 4.124, "step": 4344 }, { "epoch": 0.04345, "grad_norm": 0.9655180256016967, "learning_rate": 0.003, "loss": 4.1629, "step": 4345 }, { "epoch": 0.04346, "grad_norm": 0.7582683532329971, "learning_rate": 0.003, "loss": 4.1515, "step": 4346 }, { "epoch": 0.04347, "grad_norm": 0.6941505792622187, "learning_rate": 0.003, "loss": 4.1535, "step": 4347 }, { "epoch": 0.04348, "grad_norm": 0.8405157089264003, "learning_rate": 0.003, "loss": 4.1369, "step": 4348 }, { "epoch": 0.04349, "grad_norm": 0.8523333890856811, "learning_rate": 0.003, "loss": 4.1558, "step": 4349 }, { "epoch": 0.0435, "grad_norm": 0.7396539502996479, "learning_rate": 0.003, "loss": 4.1379, "step": 4350 }, { "epoch": 0.04351, "grad_norm": 0.6266986999030948, "learning_rate": 0.003, "loss": 4.15, "step": 4351 }, { "epoch": 0.04352, "grad_norm": 0.5282839844073566, "learning_rate": 0.003, "loss": 4.1624, "step": 4352 }, { "epoch": 0.04353, "grad_norm": 0.5005795044157166, "learning_rate": 0.003, "loss": 4.1304, "step": 4353 }, { "epoch": 0.04354, "grad_norm": 0.4851237319282445, "learning_rate": 0.003, "loss": 4.134, "step": 4354 }, { "epoch": 0.04355, "grad_norm": 0.5595788754843884, "learning_rate": 0.003, "loss": 4.1409, "step": 4355 }, { "epoch": 0.04356, "grad_norm": 0.5667796441106236, "learning_rate": 0.003, "loss": 4.1467, "step": 4356 }, { "epoch": 0.04357, "grad_norm": 0.5983610407473247, "learning_rate": 0.003, "loss": 4.1307, "step": 4357 }, { "epoch": 0.04358, "grad_norm": 0.6456602317217889, "learning_rate": 0.003, "loss": 4.1221, "step": 4358 }, { "epoch": 0.04359, "grad_norm": 0.6069596406446627, "learning_rate": 0.003, "loss": 4.1417, "step": 4359 }, { "epoch": 0.0436, "grad_norm": 0.5908374036987593, "learning_rate": 0.003, "loss": 4.144, "step": 4360 }, { "epoch": 0.04361, "grad_norm": 0.6735288968429441, "learning_rate": 0.003, "loss": 4.1113, "step": 4361 }, { "epoch": 0.04362, "grad_norm": 0.7257053819990018, "learning_rate": 0.003, "loss": 4.1256, "step": 4362 }, { "epoch": 0.04363, "grad_norm": 0.6914693554965775, "learning_rate": 0.003, "loss": 4.1294, "step": 4363 }, { "epoch": 0.04364, "grad_norm": 0.7451580269204352, "learning_rate": 0.003, "loss": 4.1478, "step": 4364 }, { "epoch": 0.04365, "grad_norm": 0.7858248696802116, "learning_rate": 0.003, "loss": 4.1514, "step": 4365 }, { "epoch": 0.04366, "grad_norm": 0.663121974241136, "learning_rate": 0.003, "loss": 4.1013, "step": 4366 }, { "epoch": 0.04367, "grad_norm": 0.7175426458581545, "learning_rate": 0.003, "loss": 4.1542, "step": 4367 }, { "epoch": 0.04368, "grad_norm": 0.7351488980711208, "learning_rate": 0.003, "loss": 4.1422, "step": 4368 }, { "epoch": 0.04369, "grad_norm": 0.8536889689371039, "learning_rate": 0.003, "loss": 4.1453, "step": 4369 }, { "epoch": 0.0437, "grad_norm": 0.854111892194005, "learning_rate": 0.003, "loss": 4.1678, "step": 4370 }, { "epoch": 0.04371, "grad_norm": 1.046284154850743, "learning_rate": 0.003, "loss": 4.1575, "step": 4371 }, { "epoch": 0.04372, "grad_norm": 1.2262372791586333, "learning_rate": 0.003, "loss": 4.1517, "step": 4372 }, { "epoch": 0.04373, "grad_norm": 0.8968734983342949, "learning_rate": 0.003, "loss": 4.1673, "step": 4373 }, { "epoch": 0.04374, "grad_norm": 0.9753259228506409, "learning_rate": 0.003, "loss": 4.2155, "step": 4374 }, { "epoch": 0.04375, "grad_norm": 0.9920981237100678, "learning_rate": 0.003, "loss": 4.1324, "step": 4375 }, { "epoch": 0.04376, "grad_norm": 1.0294125384070931, "learning_rate": 0.003, "loss": 4.1801, "step": 4376 }, { "epoch": 0.04377, "grad_norm": 1.0068899366919466, "learning_rate": 0.003, "loss": 4.2068, "step": 4377 }, { "epoch": 0.04378, "grad_norm": 1.0341191509965633, "learning_rate": 0.003, "loss": 4.191, "step": 4378 }, { "epoch": 0.04379, "grad_norm": 0.9717889790676081, "learning_rate": 0.003, "loss": 4.2061, "step": 4379 }, { "epoch": 0.0438, "grad_norm": 1.0471882312294167, "learning_rate": 0.003, "loss": 4.1807, "step": 4380 }, { "epoch": 0.04381, "grad_norm": 0.8790592687859848, "learning_rate": 0.003, "loss": 4.1637, "step": 4381 }, { "epoch": 0.04382, "grad_norm": 0.8968294394751086, "learning_rate": 0.003, "loss": 4.1774, "step": 4382 }, { "epoch": 0.04383, "grad_norm": 0.8820678241101328, "learning_rate": 0.003, "loss": 4.157, "step": 4383 }, { "epoch": 0.04384, "grad_norm": 1.0214298134194522, "learning_rate": 0.003, "loss": 4.1664, "step": 4384 }, { "epoch": 0.04385, "grad_norm": 0.900876137883741, "learning_rate": 0.003, "loss": 4.2023, "step": 4385 }, { "epoch": 0.04386, "grad_norm": 0.7692038806596491, "learning_rate": 0.003, "loss": 4.1737, "step": 4386 }, { "epoch": 0.04387, "grad_norm": 0.653702371460772, "learning_rate": 0.003, "loss": 4.1914, "step": 4387 }, { "epoch": 0.04388, "grad_norm": 0.6492229267746281, "learning_rate": 0.003, "loss": 4.1506, "step": 4388 }, { "epoch": 0.04389, "grad_norm": 0.5786656683989158, "learning_rate": 0.003, "loss": 4.179, "step": 4389 }, { "epoch": 0.0439, "grad_norm": 0.5012080835531559, "learning_rate": 0.003, "loss": 4.1702, "step": 4390 }, { "epoch": 0.04391, "grad_norm": 0.48775107179113014, "learning_rate": 0.003, "loss": 4.1402, "step": 4391 }, { "epoch": 0.04392, "grad_norm": 0.46374848591785856, "learning_rate": 0.003, "loss": 4.1462, "step": 4392 }, { "epoch": 0.04393, "grad_norm": 0.450356925230617, "learning_rate": 0.003, "loss": 4.1327, "step": 4393 }, { "epoch": 0.04394, "grad_norm": 0.4674972403264732, "learning_rate": 0.003, "loss": 4.1498, "step": 4394 }, { "epoch": 0.04395, "grad_norm": 0.5893115115334077, "learning_rate": 0.003, "loss": 4.1342, "step": 4395 }, { "epoch": 0.04396, "grad_norm": 0.7658769835507094, "learning_rate": 0.003, "loss": 4.1118, "step": 4396 }, { "epoch": 0.04397, "grad_norm": 0.9383339109696239, "learning_rate": 0.003, "loss": 4.1344, "step": 4397 }, { "epoch": 0.04398, "grad_norm": 0.8777679801246571, "learning_rate": 0.003, "loss": 4.1501, "step": 4398 }, { "epoch": 0.04399, "grad_norm": 0.7720983136183994, "learning_rate": 0.003, "loss": 4.1556, "step": 4399 }, { "epoch": 0.044, "grad_norm": 0.7229759596282145, "learning_rate": 0.003, "loss": 4.1478, "step": 4400 }, { "epoch": 0.04401, "grad_norm": 0.8501411177864325, "learning_rate": 0.003, "loss": 4.1629, "step": 4401 }, { "epoch": 0.04402, "grad_norm": 0.8319303801252986, "learning_rate": 0.003, "loss": 4.172, "step": 4402 }, { "epoch": 0.04403, "grad_norm": 0.6805684530525856, "learning_rate": 0.003, "loss": 4.1317, "step": 4403 }, { "epoch": 0.04404, "grad_norm": 0.5579714343751891, "learning_rate": 0.003, "loss": 4.1573, "step": 4404 }, { "epoch": 0.04405, "grad_norm": 0.6272316247141385, "learning_rate": 0.003, "loss": 4.151, "step": 4405 }, { "epoch": 0.04406, "grad_norm": 0.7189270603588521, "learning_rate": 0.003, "loss": 4.1491, "step": 4406 }, { "epoch": 0.04407, "grad_norm": 0.762459715751194, "learning_rate": 0.003, "loss": 4.1227, "step": 4407 }, { "epoch": 0.04408, "grad_norm": 0.7250097639288197, "learning_rate": 0.003, "loss": 4.1272, "step": 4408 }, { "epoch": 0.04409, "grad_norm": 0.7106703945924645, "learning_rate": 0.003, "loss": 4.1606, "step": 4409 }, { "epoch": 0.0441, "grad_norm": 0.5937514209420578, "learning_rate": 0.003, "loss": 4.1417, "step": 4410 }, { "epoch": 0.04411, "grad_norm": 0.6265606257146901, "learning_rate": 0.003, "loss": 4.124, "step": 4411 }, { "epoch": 0.04412, "grad_norm": 0.684509287657226, "learning_rate": 0.003, "loss": 4.114, "step": 4412 }, { "epoch": 0.04413, "grad_norm": 0.7827609729511189, "learning_rate": 0.003, "loss": 4.1251, "step": 4413 }, { "epoch": 0.04414, "grad_norm": 0.8264979154885277, "learning_rate": 0.003, "loss": 4.1806, "step": 4414 }, { "epoch": 0.04415, "grad_norm": 0.7365639173006763, "learning_rate": 0.003, "loss": 4.1413, "step": 4415 }, { "epoch": 0.04416, "grad_norm": 0.5988319699730835, "learning_rate": 0.003, "loss": 4.1256, "step": 4416 }, { "epoch": 0.04417, "grad_norm": 0.6185388120705024, "learning_rate": 0.003, "loss": 4.1033, "step": 4417 }, { "epoch": 0.04418, "grad_norm": 0.7024277083040185, "learning_rate": 0.003, "loss": 4.145, "step": 4418 }, { "epoch": 0.04419, "grad_norm": 0.6522881055559766, "learning_rate": 0.003, "loss": 4.1651, "step": 4419 }, { "epoch": 0.0442, "grad_norm": 0.6345542061505771, "learning_rate": 0.003, "loss": 4.1337, "step": 4420 }, { "epoch": 0.04421, "grad_norm": 0.6652846957230868, "learning_rate": 0.003, "loss": 4.1668, "step": 4421 }, { "epoch": 0.04422, "grad_norm": 0.7005501321538639, "learning_rate": 0.003, "loss": 4.1322, "step": 4422 }, { "epoch": 0.04423, "grad_norm": 0.7044884290461375, "learning_rate": 0.003, "loss": 4.1513, "step": 4423 }, { "epoch": 0.04424, "grad_norm": 0.6741401349701589, "learning_rate": 0.003, "loss": 4.1007, "step": 4424 }, { "epoch": 0.04425, "grad_norm": 0.6934002596368363, "learning_rate": 0.003, "loss": 4.1257, "step": 4425 }, { "epoch": 0.04426, "grad_norm": 0.6003050918470045, "learning_rate": 0.003, "loss": 4.1458, "step": 4426 }, { "epoch": 0.04427, "grad_norm": 0.6672810210726654, "learning_rate": 0.003, "loss": 4.1355, "step": 4427 }, { "epoch": 0.04428, "grad_norm": 0.6318153966455053, "learning_rate": 0.003, "loss": 4.1305, "step": 4428 }, { "epoch": 0.04429, "grad_norm": 0.623912292564508, "learning_rate": 0.003, "loss": 4.1717, "step": 4429 }, { "epoch": 0.0443, "grad_norm": 0.7110054552912546, "learning_rate": 0.003, "loss": 4.132, "step": 4430 }, { "epoch": 0.04431, "grad_norm": 0.7246952184785321, "learning_rate": 0.003, "loss": 4.157, "step": 4431 }, { "epoch": 0.04432, "grad_norm": 0.7298502791104271, "learning_rate": 0.003, "loss": 4.1607, "step": 4432 }, { "epoch": 0.04433, "grad_norm": 0.8250488623045142, "learning_rate": 0.003, "loss": 4.1389, "step": 4433 }, { "epoch": 0.04434, "grad_norm": 0.9624884861403035, "learning_rate": 0.003, "loss": 4.1442, "step": 4434 }, { "epoch": 0.04435, "grad_norm": 0.9889176292713632, "learning_rate": 0.003, "loss": 4.1624, "step": 4435 }, { "epoch": 0.04436, "grad_norm": 0.9593070985375517, "learning_rate": 0.003, "loss": 4.1373, "step": 4436 }, { "epoch": 0.04437, "grad_norm": 0.754019474989507, "learning_rate": 0.003, "loss": 4.1545, "step": 4437 }, { "epoch": 0.04438, "grad_norm": 0.6112092782747661, "learning_rate": 0.003, "loss": 4.1893, "step": 4438 }, { "epoch": 0.04439, "grad_norm": 0.8008555881950424, "learning_rate": 0.003, "loss": 4.1584, "step": 4439 }, { "epoch": 0.0444, "grad_norm": 0.8796518350119269, "learning_rate": 0.003, "loss": 4.1203, "step": 4440 }, { "epoch": 0.04441, "grad_norm": 0.881979649964416, "learning_rate": 0.003, "loss": 4.155, "step": 4441 }, { "epoch": 0.04442, "grad_norm": 0.8372483887218065, "learning_rate": 0.003, "loss": 4.1465, "step": 4442 }, { "epoch": 0.04443, "grad_norm": 0.6759359878775629, "learning_rate": 0.003, "loss": 4.1188, "step": 4443 }, { "epoch": 0.04444, "grad_norm": 0.6554603066254844, "learning_rate": 0.003, "loss": 4.12, "step": 4444 }, { "epoch": 0.04445, "grad_norm": 0.5717002727777339, "learning_rate": 0.003, "loss": 4.1477, "step": 4445 }, { "epoch": 0.04446, "grad_norm": 0.49898785075563756, "learning_rate": 0.003, "loss": 4.1144, "step": 4446 }, { "epoch": 0.04447, "grad_norm": 0.5124508174653755, "learning_rate": 0.003, "loss": 4.1181, "step": 4447 }, { "epoch": 0.04448, "grad_norm": 0.46026046835883233, "learning_rate": 0.003, "loss": 4.1155, "step": 4448 }, { "epoch": 0.04449, "grad_norm": 0.40191804438918904, "learning_rate": 0.003, "loss": 4.1268, "step": 4449 }, { "epoch": 0.0445, "grad_norm": 0.4056694001793259, "learning_rate": 0.003, "loss": 4.1311, "step": 4450 }, { "epoch": 0.04451, "grad_norm": 0.4140029705225631, "learning_rate": 0.003, "loss": 4.1277, "step": 4451 }, { "epoch": 0.04452, "grad_norm": 0.4961606990959985, "learning_rate": 0.003, "loss": 4.1432, "step": 4452 }, { "epoch": 0.04453, "grad_norm": 0.7225243712391776, "learning_rate": 0.003, "loss": 4.1556, "step": 4453 }, { "epoch": 0.04454, "grad_norm": 0.9541612332110946, "learning_rate": 0.003, "loss": 4.1428, "step": 4454 }, { "epoch": 0.04455, "grad_norm": 1.004523212032084, "learning_rate": 0.003, "loss": 4.1579, "step": 4455 }, { "epoch": 0.04456, "grad_norm": 0.7366078757936858, "learning_rate": 0.003, "loss": 4.1199, "step": 4456 }, { "epoch": 0.04457, "grad_norm": 0.7291092880614075, "learning_rate": 0.003, "loss": 4.1359, "step": 4457 }, { "epoch": 0.04458, "grad_norm": 0.8886507308195684, "learning_rate": 0.003, "loss": 4.1658, "step": 4458 }, { "epoch": 0.04459, "grad_norm": 1.023956211330971, "learning_rate": 0.003, "loss": 4.1783, "step": 4459 }, { "epoch": 0.0446, "grad_norm": 0.9783050212355968, "learning_rate": 0.003, "loss": 4.1765, "step": 4460 }, { "epoch": 0.04461, "grad_norm": 0.8910569073793239, "learning_rate": 0.003, "loss": 4.1774, "step": 4461 }, { "epoch": 0.04462, "grad_norm": 1.0114250150322548, "learning_rate": 0.003, "loss": 4.1382, "step": 4462 }, { "epoch": 0.04463, "grad_norm": 1.109635261633442, "learning_rate": 0.003, "loss": 4.159, "step": 4463 }, { "epoch": 0.04464, "grad_norm": 0.9892179336121286, "learning_rate": 0.003, "loss": 4.1722, "step": 4464 }, { "epoch": 0.04465, "grad_norm": 0.9496628250922594, "learning_rate": 0.003, "loss": 4.1619, "step": 4465 }, { "epoch": 0.04466, "grad_norm": 0.9698557150392482, "learning_rate": 0.003, "loss": 4.1735, "step": 4466 }, { "epoch": 0.04467, "grad_norm": 0.9285468685707471, "learning_rate": 0.003, "loss": 4.1725, "step": 4467 }, { "epoch": 0.04468, "grad_norm": 0.9397451172133555, "learning_rate": 0.003, "loss": 4.1519, "step": 4468 }, { "epoch": 0.04469, "grad_norm": 0.9565209269681922, "learning_rate": 0.003, "loss": 4.1882, "step": 4469 }, { "epoch": 0.0447, "grad_norm": 0.8769720683928874, "learning_rate": 0.003, "loss": 4.1844, "step": 4470 }, { "epoch": 0.04471, "grad_norm": 0.795338760730945, "learning_rate": 0.003, "loss": 4.2008, "step": 4471 }, { "epoch": 0.04472, "grad_norm": 0.7843283232673075, "learning_rate": 0.003, "loss": 4.1549, "step": 4472 }, { "epoch": 0.04473, "grad_norm": 0.8909484017306056, "learning_rate": 0.003, "loss": 4.1593, "step": 4473 }, { "epoch": 0.04474, "grad_norm": 0.7784392006819544, "learning_rate": 0.003, "loss": 4.152, "step": 4474 }, { "epoch": 0.04475, "grad_norm": 0.7042700663520809, "learning_rate": 0.003, "loss": 4.1398, "step": 4475 }, { "epoch": 0.04476, "grad_norm": 0.6389974316195782, "learning_rate": 0.003, "loss": 4.1499, "step": 4476 }, { "epoch": 0.04477, "grad_norm": 0.6071760811715112, "learning_rate": 0.003, "loss": 4.1616, "step": 4477 }, { "epoch": 0.04478, "grad_norm": 0.5219307858429795, "learning_rate": 0.003, "loss": 4.1544, "step": 4478 }, { "epoch": 0.04479, "grad_norm": 0.52342819477722, "learning_rate": 0.003, "loss": 4.1329, "step": 4479 }, { "epoch": 0.0448, "grad_norm": 0.5268154790194748, "learning_rate": 0.003, "loss": 4.1425, "step": 4480 }, { "epoch": 0.04481, "grad_norm": 0.5399282796391196, "learning_rate": 0.003, "loss": 4.1341, "step": 4481 }, { "epoch": 0.04482, "grad_norm": 0.6507902172065512, "learning_rate": 0.003, "loss": 4.1572, "step": 4482 }, { "epoch": 0.04483, "grad_norm": 0.867713110982925, "learning_rate": 0.003, "loss": 4.1524, "step": 4483 }, { "epoch": 0.04484, "grad_norm": 0.9801856447775387, "learning_rate": 0.003, "loss": 4.1697, "step": 4484 }, { "epoch": 0.04485, "grad_norm": 0.8358981195735171, "learning_rate": 0.003, "loss": 4.176, "step": 4485 }, { "epoch": 0.04486, "grad_norm": 0.726373516850574, "learning_rate": 0.003, "loss": 4.1324, "step": 4486 }, { "epoch": 0.04487, "grad_norm": 0.7123593368535847, "learning_rate": 0.003, "loss": 4.1419, "step": 4487 }, { "epoch": 0.04488, "grad_norm": 0.726522221164894, "learning_rate": 0.003, "loss": 4.1587, "step": 4488 }, { "epoch": 0.04489, "grad_norm": 0.7095321623068442, "learning_rate": 0.003, "loss": 4.163, "step": 4489 }, { "epoch": 0.0449, "grad_norm": 0.6252082433565278, "learning_rate": 0.003, "loss": 4.1567, "step": 4490 }, { "epoch": 0.04491, "grad_norm": 0.6640250192773183, "learning_rate": 0.003, "loss": 4.1429, "step": 4491 }, { "epoch": 0.04492, "grad_norm": 0.6961055886888501, "learning_rate": 0.003, "loss": 4.1552, "step": 4492 }, { "epoch": 0.04493, "grad_norm": 0.8458020064467313, "learning_rate": 0.003, "loss": 4.1623, "step": 4493 }, { "epoch": 0.04494, "grad_norm": 0.7474034453269607, "learning_rate": 0.003, "loss": 4.1407, "step": 4494 }, { "epoch": 0.04495, "grad_norm": 0.6451994546687867, "learning_rate": 0.003, "loss": 4.1365, "step": 4495 }, { "epoch": 0.04496, "grad_norm": 0.540011005371107, "learning_rate": 0.003, "loss": 4.1741, "step": 4496 }, { "epoch": 0.04497, "grad_norm": 0.4950562236330396, "learning_rate": 0.003, "loss": 4.1537, "step": 4497 }, { "epoch": 0.04498, "grad_norm": 0.4943734925550196, "learning_rate": 0.003, "loss": 4.1237, "step": 4498 }, { "epoch": 0.04499, "grad_norm": 0.47393717260525386, "learning_rate": 0.003, "loss": 4.0968, "step": 4499 }, { "epoch": 0.045, "grad_norm": 0.41885472582226696, "learning_rate": 0.003, "loss": 4.1243, "step": 4500 }, { "epoch": 0.04501, "grad_norm": 0.4834752022098693, "learning_rate": 0.003, "loss": 4.1175, "step": 4501 }, { "epoch": 0.04502, "grad_norm": 0.45694364065485266, "learning_rate": 0.003, "loss": 4.1588, "step": 4502 }, { "epoch": 0.04503, "grad_norm": 0.48865003766237863, "learning_rate": 0.003, "loss": 4.1582, "step": 4503 }, { "epoch": 0.04504, "grad_norm": 0.5438297922402094, "learning_rate": 0.003, "loss": 4.1576, "step": 4504 }, { "epoch": 0.04505, "grad_norm": 0.5871203728299376, "learning_rate": 0.003, "loss": 4.154, "step": 4505 }, { "epoch": 0.04506, "grad_norm": 0.5917294880351803, "learning_rate": 0.003, "loss": 4.1318, "step": 4506 }, { "epoch": 0.04507, "grad_norm": 0.7183532404676503, "learning_rate": 0.003, "loss": 4.1091, "step": 4507 }, { "epoch": 0.04508, "grad_norm": 0.8049591048914756, "learning_rate": 0.003, "loss": 4.1329, "step": 4508 }, { "epoch": 0.04509, "grad_norm": 0.9156955919586133, "learning_rate": 0.003, "loss": 4.1602, "step": 4509 }, { "epoch": 0.0451, "grad_norm": 0.8889056537954558, "learning_rate": 0.003, "loss": 4.1618, "step": 4510 }, { "epoch": 0.04511, "grad_norm": 0.693800418390815, "learning_rate": 0.003, "loss": 4.1419, "step": 4511 }, { "epoch": 0.04512, "grad_norm": 0.7135831824345485, "learning_rate": 0.003, "loss": 4.0971, "step": 4512 }, { "epoch": 0.04513, "grad_norm": 0.6688562196054199, "learning_rate": 0.003, "loss": 4.1268, "step": 4513 }, { "epoch": 0.04514, "grad_norm": 0.6423991582569938, "learning_rate": 0.003, "loss": 4.1345, "step": 4514 }, { "epoch": 0.04515, "grad_norm": 0.7711378950211699, "learning_rate": 0.003, "loss": 4.1345, "step": 4515 }, { "epoch": 0.04516, "grad_norm": 0.8939759348625197, "learning_rate": 0.003, "loss": 4.1535, "step": 4516 }, { "epoch": 0.04517, "grad_norm": 0.7606381004553531, "learning_rate": 0.003, "loss": 4.1665, "step": 4517 }, { "epoch": 0.04518, "grad_norm": 0.6218485906194273, "learning_rate": 0.003, "loss": 4.1359, "step": 4518 }, { "epoch": 0.04519, "grad_norm": 0.7964516800865484, "learning_rate": 0.003, "loss": 4.1353, "step": 4519 }, { "epoch": 0.0452, "grad_norm": 0.8973981028823347, "learning_rate": 0.003, "loss": 4.1488, "step": 4520 }, { "epoch": 0.04521, "grad_norm": 0.807736716714071, "learning_rate": 0.003, "loss": 4.1345, "step": 4521 }, { "epoch": 0.04522, "grad_norm": 0.8117844738071615, "learning_rate": 0.003, "loss": 4.1378, "step": 4522 }, { "epoch": 0.04523, "grad_norm": 0.7829719745475495, "learning_rate": 0.003, "loss": 4.1416, "step": 4523 }, { "epoch": 0.04524, "grad_norm": 0.7479885268590871, "learning_rate": 0.003, "loss": 4.1194, "step": 4524 }, { "epoch": 0.04525, "grad_norm": 0.7243373752912751, "learning_rate": 0.003, "loss": 4.127, "step": 4525 }, { "epoch": 0.04526, "grad_norm": 0.7384870219800298, "learning_rate": 0.003, "loss": 4.157, "step": 4526 }, { "epoch": 0.04527, "grad_norm": 0.8786326776352553, "learning_rate": 0.003, "loss": 4.1433, "step": 4527 }, { "epoch": 0.04528, "grad_norm": 0.9239987067801139, "learning_rate": 0.003, "loss": 4.1245, "step": 4528 }, { "epoch": 0.04529, "grad_norm": 0.8389910760544824, "learning_rate": 0.003, "loss": 4.1271, "step": 4529 }, { "epoch": 0.0453, "grad_norm": 0.748168700266448, "learning_rate": 0.003, "loss": 4.1231, "step": 4530 }, { "epoch": 0.04531, "grad_norm": 0.7202788367913475, "learning_rate": 0.003, "loss": 4.1225, "step": 4531 }, { "epoch": 0.04532, "grad_norm": 0.6571475637254836, "learning_rate": 0.003, "loss": 4.1461, "step": 4532 }, { "epoch": 0.04533, "grad_norm": 0.7185379271005126, "learning_rate": 0.003, "loss": 4.1112, "step": 4533 }, { "epoch": 0.04534, "grad_norm": 0.6309360237693905, "learning_rate": 0.003, "loss": 4.1522, "step": 4534 }, { "epoch": 0.04535, "grad_norm": 0.6779837587183748, "learning_rate": 0.003, "loss": 4.1737, "step": 4535 }, { "epoch": 0.04536, "grad_norm": 0.6922814218735922, "learning_rate": 0.003, "loss": 4.1687, "step": 4536 }, { "epoch": 0.04537, "grad_norm": 0.6707461416618695, "learning_rate": 0.003, "loss": 4.1466, "step": 4537 }, { "epoch": 0.04538, "grad_norm": 0.6533648131571776, "learning_rate": 0.003, "loss": 4.1468, "step": 4538 }, { "epoch": 0.04539, "grad_norm": 0.7226394008940196, "learning_rate": 0.003, "loss": 4.1106, "step": 4539 }, { "epoch": 0.0454, "grad_norm": 0.7809591559130085, "learning_rate": 0.003, "loss": 4.1225, "step": 4540 }, { "epoch": 0.04541, "grad_norm": 0.8598716052287259, "learning_rate": 0.003, "loss": 4.1744, "step": 4541 }, { "epoch": 0.04542, "grad_norm": 0.7887863556115788, "learning_rate": 0.003, "loss": 4.1194, "step": 4542 }, { "epoch": 0.04543, "grad_norm": 0.5682149072319508, "learning_rate": 0.003, "loss": 4.146, "step": 4543 }, { "epoch": 0.04544, "grad_norm": 0.5505936637479015, "learning_rate": 0.003, "loss": 4.1237, "step": 4544 }, { "epoch": 0.04545, "grad_norm": 0.6999578034297054, "learning_rate": 0.003, "loss": 4.1378, "step": 4545 }, { "epoch": 0.04546, "grad_norm": 0.6684727088847572, "learning_rate": 0.003, "loss": 4.1187, "step": 4546 }, { "epoch": 0.04547, "grad_norm": 0.6131696984235054, "learning_rate": 0.003, "loss": 4.1242, "step": 4547 }, { "epoch": 0.04548, "grad_norm": 0.6233401797280433, "learning_rate": 0.003, "loss": 4.1371, "step": 4548 }, { "epoch": 0.04549, "grad_norm": 0.7411565175033444, "learning_rate": 0.003, "loss": 4.1303, "step": 4549 }, { "epoch": 0.0455, "grad_norm": 0.7508874744904848, "learning_rate": 0.003, "loss": 4.1646, "step": 4550 }, { "epoch": 0.04551, "grad_norm": 0.7534956856948848, "learning_rate": 0.003, "loss": 4.1738, "step": 4551 }, { "epoch": 0.04552, "grad_norm": 0.689574501986947, "learning_rate": 0.003, "loss": 4.1544, "step": 4552 }, { "epoch": 0.04553, "grad_norm": 0.6536678327481642, "learning_rate": 0.003, "loss": 4.1419, "step": 4553 }, { "epoch": 0.04554, "grad_norm": 0.6521633276844322, "learning_rate": 0.003, "loss": 4.1285, "step": 4554 }, { "epoch": 0.04555, "grad_norm": 0.7438153849355749, "learning_rate": 0.003, "loss": 4.1448, "step": 4555 }, { "epoch": 0.04556, "grad_norm": 0.813199349395972, "learning_rate": 0.003, "loss": 4.1481, "step": 4556 }, { "epoch": 0.04557, "grad_norm": 0.8670448776825898, "learning_rate": 0.003, "loss": 4.1339, "step": 4557 }, { "epoch": 0.04558, "grad_norm": 0.9173442370434104, "learning_rate": 0.003, "loss": 4.165, "step": 4558 }, { "epoch": 0.04559, "grad_norm": 0.9293838780402448, "learning_rate": 0.003, "loss": 4.1785, "step": 4559 }, { "epoch": 0.0456, "grad_norm": 0.9721860045634062, "learning_rate": 0.003, "loss": 4.142, "step": 4560 }, { "epoch": 0.04561, "grad_norm": 0.9130374577828355, "learning_rate": 0.003, "loss": 4.1572, "step": 4561 }, { "epoch": 0.04562, "grad_norm": 0.8204531300346364, "learning_rate": 0.003, "loss": 4.1735, "step": 4562 }, { "epoch": 0.04563, "grad_norm": 0.941182990714371, "learning_rate": 0.003, "loss": 4.145, "step": 4563 }, { "epoch": 0.04564, "grad_norm": 0.9950132484736411, "learning_rate": 0.003, "loss": 4.1607, "step": 4564 }, { "epoch": 0.04565, "grad_norm": 0.8713983224408589, "learning_rate": 0.003, "loss": 4.1308, "step": 4565 }, { "epoch": 0.04566, "grad_norm": 0.7742242108390238, "learning_rate": 0.003, "loss": 4.158, "step": 4566 }, { "epoch": 0.04567, "grad_norm": 0.7732353202308767, "learning_rate": 0.003, "loss": 4.1557, "step": 4567 }, { "epoch": 0.04568, "grad_norm": 0.7949276600918046, "learning_rate": 0.003, "loss": 4.1324, "step": 4568 }, { "epoch": 0.04569, "grad_norm": 0.76216115917809, "learning_rate": 0.003, "loss": 4.1586, "step": 4569 }, { "epoch": 0.0457, "grad_norm": 0.665803751777818, "learning_rate": 0.003, "loss": 4.1443, "step": 4570 }, { "epoch": 0.04571, "grad_norm": 0.61898217206735, "learning_rate": 0.003, "loss": 4.1709, "step": 4571 }, { "epoch": 0.04572, "grad_norm": 0.6630102965712841, "learning_rate": 0.003, "loss": 4.1438, "step": 4572 }, { "epoch": 0.04573, "grad_norm": 0.6545388007004492, "learning_rate": 0.003, "loss": 4.1147, "step": 4573 }, { "epoch": 0.04574, "grad_norm": 0.6434199338973368, "learning_rate": 0.003, "loss": 4.1269, "step": 4574 }, { "epoch": 0.04575, "grad_norm": 0.6084613496329172, "learning_rate": 0.003, "loss": 4.1507, "step": 4575 }, { "epoch": 0.04576, "grad_norm": 0.5926985992517696, "learning_rate": 0.003, "loss": 4.1399, "step": 4576 }, { "epoch": 0.04577, "grad_norm": 0.7068872849239232, "learning_rate": 0.003, "loss": 4.161, "step": 4577 }, { "epoch": 0.04578, "grad_norm": 0.7815668152529673, "learning_rate": 0.003, "loss": 4.1309, "step": 4578 }, { "epoch": 0.04579, "grad_norm": 0.705130762977042, "learning_rate": 0.003, "loss": 4.1148, "step": 4579 }, { "epoch": 0.0458, "grad_norm": 0.706838854182526, "learning_rate": 0.003, "loss": 4.1481, "step": 4580 }, { "epoch": 0.04581, "grad_norm": 0.7719932048413563, "learning_rate": 0.003, "loss": 4.1624, "step": 4581 }, { "epoch": 0.04582, "grad_norm": 0.8196871294195923, "learning_rate": 0.003, "loss": 4.1644, "step": 4582 }, { "epoch": 0.04583, "grad_norm": 0.9747741235878243, "learning_rate": 0.003, "loss": 4.1405, "step": 4583 }, { "epoch": 0.04584, "grad_norm": 1.0614405255220143, "learning_rate": 0.003, "loss": 4.148, "step": 4584 }, { "epoch": 0.04585, "grad_norm": 0.8692609981568404, "learning_rate": 0.003, "loss": 4.1458, "step": 4585 }, { "epoch": 0.04586, "grad_norm": 0.8017360688038584, "learning_rate": 0.003, "loss": 4.1494, "step": 4586 }, { "epoch": 0.04587, "grad_norm": 0.737110938579538, "learning_rate": 0.003, "loss": 4.1647, "step": 4587 }, { "epoch": 0.04588, "grad_norm": 0.6638294236901521, "learning_rate": 0.003, "loss": 4.1602, "step": 4588 }, { "epoch": 0.04589, "grad_norm": 0.6411690157984203, "learning_rate": 0.003, "loss": 4.1251, "step": 4589 }, { "epoch": 0.0459, "grad_norm": 0.6088730568376585, "learning_rate": 0.003, "loss": 4.1499, "step": 4590 }, { "epoch": 0.04591, "grad_norm": 0.6362826036737852, "learning_rate": 0.003, "loss": 4.1154, "step": 4591 }, { "epoch": 0.04592, "grad_norm": 0.6389570140066912, "learning_rate": 0.003, "loss": 4.1392, "step": 4592 }, { "epoch": 0.04593, "grad_norm": 0.7097656474131322, "learning_rate": 0.003, "loss": 4.1641, "step": 4593 }, { "epoch": 0.04594, "grad_norm": 0.6175568409115185, "learning_rate": 0.003, "loss": 4.1529, "step": 4594 }, { "epoch": 0.04595, "grad_norm": 0.620699543326453, "learning_rate": 0.003, "loss": 4.1404, "step": 4595 }, { "epoch": 0.04596, "grad_norm": 0.5962342876114253, "learning_rate": 0.003, "loss": 4.1429, "step": 4596 }, { "epoch": 0.04597, "grad_norm": 0.6324593221701938, "learning_rate": 0.003, "loss": 4.1494, "step": 4597 }, { "epoch": 0.04598, "grad_norm": 0.6611397602971393, "learning_rate": 0.003, "loss": 4.1806, "step": 4598 }, { "epoch": 0.04599, "grad_norm": 0.6234361489034259, "learning_rate": 0.003, "loss": 4.1279, "step": 4599 }, { "epoch": 0.046, "grad_norm": 0.6075516815675354, "learning_rate": 0.003, "loss": 4.1403, "step": 4600 }, { "epoch": 0.04601, "grad_norm": 0.6042104496947458, "learning_rate": 0.003, "loss": 4.1434, "step": 4601 }, { "epoch": 0.04602, "grad_norm": 0.6164999421939368, "learning_rate": 0.003, "loss": 4.1389, "step": 4602 }, { "epoch": 0.04603, "grad_norm": 0.6490308990358886, "learning_rate": 0.003, "loss": 4.1506, "step": 4603 }, { "epoch": 0.04604, "grad_norm": 0.7661434840060077, "learning_rate": 0.003, "loss": 4.1474, "step": 4604 }, { "epoch": 0.04605, "grad_norm": 1.0569916498623362, "learning_rate": 0.003, "loss": 4.1659, "step": 4605 }, { "epoch": 0.04606, "grad_norm": 1.027715504643133, "learning_rate": 0.003, "loss": 4.1333, "step": 4606 }, { "epoch": 0.04607, "grad_norm": 0.8342746434973768, "learning_rate": 0.003, "loss": 4.1373, "step": 4607 }, { "epoch": 0.04608, "grad_norm": 0.7850330384795643, "learning_rate": 0.003, "loss": 4.121, "step": 4608 }, { "epoch": 0.04609, "grad_norm": 0.7443196821550939, "learning_rate": 0.003, "loss": 4.1433, "step": 4609 }, { "epoch": 0.0461, "grad_norm": 0.7785237638327334, "learning_rate": 0.003, "loss": 4.1237, "step": 4610 }, { "epoch": 0.04611, "grad_norm": 0.8838166156331966, "learning_rate": 0.003, "loss": 4.1784, "step": 4611 }, { "epoch": 0.04612, "grad_norm": 0.8874166663110993, "learning_rate": 0.003, "loss": 4.1625, "step": 4612 }, { "epoch": 0.04613, "grad_norm": 0.8148264887171576, "learning_rate": 0.003, "loss": 4.1424, "step": 4613 }, { "epoch": 0.04614, "grad_norm": 0.7820861967247282, "learning_rate": 0.003, "loss": 4.1074, "step": 4614 }, { "epoch": 0.04615, "grad_norm": 0.7280484691047371, "learning_rate": 0.003, "loss": 4.1473, "step": 4615 }, { "epoch": 0.04616, "grad_norm": 0.7490023925524317, "learning_rate": 0.003, "loss": 4.1301, "step": 4616 }, { "epoch": 0.04617, "grad_norm": 0.7518558085096564, "learning_rate": 0.003, "loss": 4.1549, "step": 4617 }, { "epoch": 0.04618, "grad_norm": 0.625986793466225, "learning_rate": 0.003, "loss": 4.134, "step": 4618 }, { "epoch": 0.04619, "grad_norm": 0.6472629361360867, "learning_rate": 0.003, "loss": 4.1346, "step": 4619 }, { "epoch": 0.0462, "grad_norm": 0.6557801977850327, "learning_rate": 0.003, "loss": 4.1625, "step": 4620 }, { "epoch": 0.04621, "grad_norm": 0.73229721178544, "learning_rate": 0.003, "loss": 4.1693, "step": 4621 }, { "epoch": 0.04622, "grad_norm": 0.9280201932819085, "learning_rate": 0.003, "loss": 4.1688, "step": 4622 }, { "epoch": 0.04623, "grad_norm": 1.042833156417488, "learning_rate": 0.003, "loss": 4.1581, "step": 4623 }, { "epoch": 0.04624, "grad_norm": 0.8256463072634586, "learning_rate": 0.003, "loss": 4.1292, "step": 4624 }, { "epoch": 0.04625, "grad_norm": 0.7149715088227584, "learning_rate": 0.003, "loss": 4.1471, "step": 4625 }, { "epoch": 0.04626, "grad_norm": 0.6877823576969472, "learning_rate": 0.003, "loss": 4.1522, "step": 4626 }, { "epoch": 0.04627, "grad_norm": 0.6742469710012651, "learning_rate": 0.003, "loss": 4.1327, "step": 4627 }, { "epoch": 0.04628, "grad_norm": 0.62532013220358, "learning_rate": 0.003, "loss": 4.1618, "step": 4628 }, { "epoch": 0.04629, "grad_norm": 0.5130605726615276, "learning_rate": 0.003, "loss": 4.1316, "step": 4629 }, { "epoch": 0.0463, "grad_norm": 0.5654280379865769, "learning_rate": 0.003, "loss": 4.1481, "step": 4630 }, { "epoch": 0.04631, "grad_norm": 0.6014781265202847, "learning_rate": 0.003, "loss": 4.1292, "step": 4631 }, { "epoch": 0.04632, "grad_norm": 0.6307983761305929, "learning_rate": 0.003, "loss": 4.1198, "step": 4632 }, { "epoch": 0.04633, "grad_norm": 0.6189113146917462, "learning_rate": 0.003, "loss": 4.1255, "step": 4633 }, { "epoch": 0.04634, "grad_norm": 0.6421436906797112, "learning_rate": 0.003, "loss": 4.1275, "step": 4634 }, { "epoch": 0.04635, "grad_norm": 0.5590583262211648, "learning_rate": 0.003, "loss": 4.1297, "step": 4635 }, { "epoch": 0.04636, "grad_norm": 0.503140076293387, "learning_rate": 0.003, "loss": 4.1643, "step": 4636 }, { "epoch": 0.04637, "grad_norm": 0.5299879986072056, "learning_rate": 0.003, "loss": 4.1066, "step": 4637 }, { "epoch": 0.04638, "grad_norm": 0.4214634549110216, "learning_rate": 0.003, "loss": 4.1395, "step": 4638 }, { "epoch": 0.04639, "grad_norm": 0.397393509216712, "learning_rate": 0.003, "loss": 4.1094, "step": 4639 }, { "epoch": 0.0464, "grad_norm": 0.43447214142466917, "learning_rate": 0.003, "loss": 4.134, "step": 4640 }, { "epoch": 0.04641, "grad_norm": 0.5062252032170508, "learning_rate": 0.003, "loss": 4.1499, "step": 4641 }, { "epoch": 0.04642, "grad_norm": 0.5711310545610656, "learning_rate": 0.003, "loss": 4.0962, "step": 4642 }, { "epoch": 0.04643, "grad_norm": 0.6248454986616232, "learning_rate": 0.003, "loss": 4.1224, "step": 4643 }, { "epoch": 0.04644, "grad_norm": 0.7210053036878883, "learning_rate": 0.003, "loss": 4.1317, "step": 4644 }, { "epoch": 0.04645, "grad_norm": 0.90249716060301, "learning_rate": 0.003, "loss": 4.1299, "step": 4645 }, { "epoch": 0.04646, "grad_norm": 1.2409050533897181, "learning_rate": 0.003, "loss": 4.1424, "step": 4646 }, { "epoch": 0.04647, "grad_norm": 0.8393901607525605, "learning_rate": 0.003, "loss": 4.1312, "step": 4647 }, { "epoch": 0.04648, "grad_norm": 0.7746500580810028, "learning_rate": 0.003, "loss": 4.1499, "step": 4648 }, { "epoch": 0.04649, "grad_norm": 0.8739807831426791, "learning_rate": 0.003, "loss": 4.1372, "step": 4649 }, { "epoch": 0.0465, "grad_norm": 0.9522286444793494, "learning_rate": 0.003, "loss": 4.1127, "step": 4650 }, { "epoch": 0.04651, "grad_norm": 1.1296690683804316, "learning_rate": 0.003, "loss": 4.1538, "step": 4651 }, { "epoch": 0.04652, "grad_norm": 0.9265569184990113, "learning_rate": 0.003, "loss": 4.1575, "step": 4652 }, { "epoch": 0.04653, "grad_norm": 0.900473448775536, "learning_rate": 0.003, "loss": 4.1588, "step": 4653 }, { "epoch": 0.04654, "grad_norm": 1.0602801619166438, "learning_rate": 0.003, "loss": 4.1627, "step": 4654 }, { "epoch": 0.04655, "grad_norm": 0.9413459563751835, "learning_rate": 0.003, "loss": 4.1634, "step": 4655 }, { "epoch": 0.04656, "grad_norm": 0.7433440151400471, "learning_rate": 0.003, "loss": 4.135, "step": 4656 }, { "epoch": 0.04657, "grad_norm": 0.7855838722575367, "learning_rate": 0.003, "loss": 4.1731, "step": 4657 }, { "epoch": 0.04658, "grad_norm": 0.7026074720403149, "learning_rate": 0.003, "loss": 4.1442, "step": 4658 }, { "epoch": 0.04659, "grad_norm": 0.7054872170953301, "learning_rate": 0.003, "loss": 4.1422, "step": 4659 }, { "epoch": 0.0466, "grad_norm": 0.7253225489512056, "learning_rate": 0.003, "loss": 4.162, "step": 4660 }, { "epoch": 0.04661, "grad_norm": 0.6938113299882246, "learning_rate": 0.003, "loss": 4.1462, "step": 4661 }, { "epoch": 0.04662, "grad_norm": 0.6661030601372132, "learning_rate": 0.003, "loss": 4.1305, "step": 4662 }, { "epoch": 0.04663, "grad_norm": 0.7495766554410687, "learning_rate": 0.003, "loss": 4.152, "step": 4663 }, { "epoch": 0.04664, "grad_norm": 0.7803399105366602, "learning_rate": 0.003, "loss": 4.129, "step": 4664 }, { "epoch": 0.04665, "grad_norm": 0.7715228893902641, "learning_rate": 0.003, "loss": 4.1305, "step": 4665 }, { "epoch": 0.04666, "grad_norm": 0.8189022468897561, "learning_rate": 0.003, "loss": 4.1281, "step": 4666 }, { "epoch": 0.04667, "grad_norm": 0.9073148500434436, "learning_rate": 0.003, "loss": 4.1352, "step": 4667 }, { "epoch": 0.04668, "grad_norm": 0.8663369305932774, "learning_rate": 0.003, "loss": 4.1091, "step": 4668 }, { "epoch": 0.04669, "grad_norm": 0.7981760342295242, "learning_rate": 0.003, "loss": 4.1642, "step": 4669 }, { "epoch": 0.0467, "grad_norm": 0.7399100042776996, "learning_rate": 0.003, "loss": 4.1113, "step": 4670 }, { "epoch": 0.04671, "grad_norm": 0.79400390511152, "learning_rate": 0.003, "loss": 4.1397, "step": 4671 }, { "epoch": 0.04672, "grad_norm": 0.7886276399898601, "learning_rate": 0.003, "loss": 4.1526, "step": 4672 }, { "epoch": 0.04673, "grad_norm": 0.7587981117840258, "learning_rate": 0.003, "loss": 4.1559, "step": 4673 }, { "epoch": 0.04674, "grad_norm": 0.6833849503950475, "learning_rate": 0.003, "loss": 4.1246, "step": 4674 }, { "epoch": 0.04675, "grad_norm": 0.5521830886006064, "learning_rate": 0.003, "loss": 4.1453, "step": 4675 }, { "epoch": 0.04676, "grad_norm": 0.5551519809403406, "learning_rate": 0.003, "loss": 4.1502, "step": 4676 }, { "epoch": 0.04677, "grad_norm": 0.4956612676914781, "learning_rate": 0.003, "loss": 4.1512, "step": 4677 }, { "epoch": 0.04678, "grad_norm": 0.5248385967176291, "learning_rate": 0.003, "loss": 4.1077, "step": 4678 }, { "epoch": 0.04679, "grad_norm": 0.5169108616408382, "learning_rate": 0.003, "loss": 4.1375, "step": 4679 }, { "epoch": 0.0468, "grad_norm": 0.7044973029382834, "learning_rate": 0.003, "loss": 4.111, "step": 4680 }, { "epoch": 0.04681, "grad_norm": 0.845934209130146, "learning_rate": 0.003, "loss": 4.1661, "step": 4681 }, { "epoch": 0.04682, "grad_norm": 0.8277130051517226, "learning_rate": 0.003, "loss": 4.1349, "step": 4682 }, { "epoch": 0.04683, "grad_norm": 0.7576471069505033, "learning_rate": 0.003, "loss": 4.1267, "step": 4683 }, { "epoch": 0.04684, "grad_norm": 0.6931368300408345, "learning_rate": 0.003, "loss": 4.1455, "step": 4684 }, { "epoch": 0.04685, "grad_norm": 0.6709891251557847, "learning_rate": 0.003, "loss": 4.1292, "step": 4685 }, { "epoch": 0.04686, "grad_norm": 0.5581781513115139, "learning_rate": 0.003, "loss": 4.1336, "step": 4686 }, { "epoch": 0.04687, "grad_norm": 0.6261887980349435, "learning_rate": 0.003, "loss": 4.1643, "step": 4687 }, { "epoch": 0.04688, "grad_norm": 0.7985962391701531, "learning_rate": 0.003, "loss": 4.1465, "step": 4688 }, { "epoch": 0.04689, "grad_norm": 1.0550803093277015, "learning_rate": 0.003, "loss": 4.1722, "step": 4689 }, { "epoch": 0.0469, "grad_norm": 0.9618900206251452, "learning_rate": 0.003, "loss": 4.1247, "step": 4690 }, { "epoch": 0.04691, "grad_norm": 0.6523829450195608, "learning_rate": 0.003, "loss": 4.1317, "step": 4691 }, { "epoch": 0.04692, "grad_norm": 0.6896621979107347, "learning_rate": 0.003, "loss": 4.1375, "step": 4692 }, { "epoch": 0.04693, "grad_norm": 0.9401038472008131, "learning_rate": 0.003, "loss": 4.1706, "step": 4693 }, { "epoch": 0.04694, "grad_norm": 0.9720475262714349, "learning_rate": 0.003, "loss": 4.1363, "step": 4694 }, { "epoch": 0.04695, "grad_norm": 0.9719251158591558, "learning_rate": 0.003, "loss": 4.1313, "step": 4695 }, { "epoch": 0.04696, "grad_norm": 0.8427853123728982, "learning_rate": 0.003, "loss": 4.1711, "step": 4696 }, { "epoch": 0.04697, "grad_norm": 0.8499034656864161, "learning_rate": 0.003, "loss": 4.1544, "step": 4697 }, { "epoch": 0.04698, "grad_norm": 0.7021228867021835, "learning_rate": 0.003, "loss": 4.1502, "step": 4698 }, { "epoch": 0.04699, "grad_norm": 0.7398294832914439, "learning_rate": 0.003, "loss": 4.144, "step": 4699 }, { "epoch": 0.047, "grad_norm": 0.6942643738589905, "learning_rate": 0.003, "loss": 4.1544, "step": 4700 }, { "epoch": 0.04701, "grad_norm": 0.6683525277935326, "learning_rate": 0.003, "loss": 4.1603, "step": 4701 }, { "epoch": 0.04702, "grad_norm": 0.5837305852992254, "learning_rate": 0.003, "loss": 4.151, "step": 4702 }, { "epoch": 0.04703, "grad_norm": 0.5004181371776402, "learning_rate": 0.003, "loss": 4.1344, "step": 4703 }, { "epoch": 0.04704, "grad_norm": 0.500112229497055, "learning_rate": 0.003, "loss": 4.1276, "step": 4704 }, { "epoch": 0.04705, "grad_norm": 0.46870504628905124, "learning_rate": 0.003, "loss": 4.1571, "step": 4705 }, { "epoch": 0.04706, "grad_norm": 0.5084124708467841, "learning_rate": 0.003, "loss": 4.0994, "step": 4706 }, { "epoch": 0.04707, "grad_norm": 0.5592830079070097, "learning_rate": 0.003, "loss": 4.1412, "step": 4707 }, { "epoch": 0.04708, "grad_norm": 0.6734818421768685, "learning_rate": 0.003, "loss": 4.0984, "step": 4708 }, { "epoch": 0.04709, "grad_norm": 0.7820705785181057, "learning_rate": 0.003, "loss": 4.1325, "step": 4709 }, { "epoch": 0.0471, "grad_norm": 0.8173042330246184, "learning_rate": 0.003, "loss": 4.1472, "step": 4710 }, { "epoch": 0.04711, "grad_norm": 0.7323489999342726, "learning_rate": 0.003, "loss": 4.1382, "step": 4711 }, { "epoch": 0.04712, "grad_norm": 0.6858752455645303, "learning_rate": 0.003, "loss": 4.1261, "step": 4712 }, { "epoch": 0.04713, "grad_norm": 0.5541474886267923, "learning_rate": 0.003, "loss": 4.1189, "step": 4713 }, { "epoch": 0.04714, "grad_norm": 0.5810454942772322, "learning_rate": 0.003, "loss": 4.1281, "step": 4714 }, { "epoch": 0.04715, "grad_norm": 0.6021618978261117, "learning_rate": 0.003, "loss": 4.1564, "step": 4715 }, { "epoch": 0.04716, "grad_norm": 0.6343272321037665, "learning_rate": 0.003, "loss": 4.1271, "step": 4716 }, { "epoch": 0.04717, "grad_norm": 0.7013052653860089, "learning_rate": 0.003, "loss": 4.1476, "step": 4717 }, { "epoch": 0.04718, "grad_norm": 0.66349485254104, "learning_rate": 0.003, "loss": 4.1109, "step": 4718 }, { "epoch": 0.04719, "grad_norm": 0.8713918643231059, "learning_rate": 0.003, "loss": 4.127, "step": 4719 }, { "epoch": 0.0472, "grad_norm": 1.0008190365871503, "learning_rate": 0.003, "loss": 4.1663, "step": 4720 }, { "epoch": 0.04721, "grad_norm": 0.9716425512049767, "learning_rate": 0.003, "loss": 4.1264, "step": 4721 }, { "epoch": 0.04722, "grad_norm": 1.1120781580345849, "learning_rate": 0.003, "loss": 4.1498, "step": 4722 }, { "epoch": 0.04723, "grad_norm": 0.8161047263055594, "learning_rate": 0.003, "loss": 4.1529, "step": 4723 }, { "epoch": 0.04724, "grad_norm": 0.8833523326572259, "learning_rate": 0.003, "loss": 4.1324, "step": 4724 }, { "epoch": 0.04725, "grad_norm": 0.8898592287073411, "learning_rate": 0.003, "loss": 4.1569, "step": 4725 }, { "epoch": 0.04726, "grad_norm": 0.9859327457193534, "learning_rate": 0.003, "loss": 4.1487, "step": 4726 }, { "epoch": 0.04727, "grad_norm": 0.9165866060948171, "learning_rate": 0.003, "loss": 4.1321, "step": 4727 }, { "epoch": 0.04728, "grad_norm": 0.9068684412672875, "learning_rate": 0.003, "loss": 4.1421, "step": 4728 }, { "epoch": 0.04729, "grad_norm": 0.8318177657045663, "learning_rate": 0.003, "loss": 4.1603, "step": 4729 }, { "epoch": 0.0473, "grad_norm": 0.8027800490874897, "learning_rate": 0.003, "loss": 4.1585, "step": 4730 }, { "epoch": 0.04731, "grad_norm": 0.7978896274361092, "learning_rate": 0.003, "loss": 4.1507, "step": 4731 }, { "epoch": 0.04732, "grad_norm": 0.8802600387002112, "learning_rate": 0.003, "loss": 4.164, "step": 4732 }, { "epoch": 0.04733, "grad_norm": 0.8234843622598483, "learning_rate": 0.003, "loss": 4.148, "step": 4733 }, { "epoch": 0.04734, "grad_norm": 0.6599234947722757, "learning_rate": 0.003, "loss": 4.1501, "step": 4734 }, { "epoch": 0.04735, "grad_norm": 0.8208592352781166, "learning_rate": 0.003, "loss": 4.1681, "step": 4735 }, { "epoch": 0.04736, "grad_norm": 1.1729409563655009, "learning_rate": 0.003, "loss": 4.1639, "step": 4736 }, { "epoch": 0.04737, "grad_norm": 0.9659463338122776, "learning_rate": 0.003, "loss": 4.1274, "step": 4737 }, { "epoch": 0.04738, "grad_norm": 0.7651058866068673, "learning_rate": 0.003, "loss": 4.1646, "step": 4738 }, { "epoch": 0.04739, "grad_norm": 0.717869067195337, "learning_rate": 0.003, "loss": 4.0994, "step": 4739 }, { "epoch": 0.0474, "grad_norm": 0.6851887051422195, "learning_rate": 0.003, "loss": 4.1663, "step": 4740 }, { "epoch": 0.04741, "grad_norm": 0.6717713840804643, "learning_rate": 0.003, "loss": 4.1559, "step": 4741 }, { "epoch": 0.04742, "grad_norm": 0.6107738091433065, "learning_rate": 0.003, "loss": 4.1372, "step": 4742 }, { "epoch": 0.04743, "grad_norm": 0.5441883526059924, "learning_rate": 0.003, "loss": 4.1554, "step": 4743 }, { "epoch": 0.04744, "grad_norm": 0.5198168694209138, "learning_rate": 0.003, "loss": 4.1156, "step": 4744 }, { "epoch": 0.04745, "grad_norm": 0.43524115400010815, "learning_rate": 0.003, "loss": 4.117, "step": 4745 }, { "epoch": 0.04746, "grad_norm": 0.48653308583746263, "learning_rate": 0.003, "loss": 4.1322, "step": 4746 }, { "epoch": 0.04747, "grad_norm": 0.5339120307603674, "learning_rate": 0.003, "loss": 4.1062, "step": 4747 }, { "epoch": 0.04748, "grad_norm": 0.5479071965461091, "learning_rate": 0.003, "loss": 4.1554, "step": 4748 }, { "epoch": 0.04749, "grad_norm": 0.5417055693516375, "learning_rate": 0.003, "loss": 4.1617, "step": 4749 }, { "epoch": 0.0475, "grad_norm": 0.5280951958997921, "learning_rate": 0.003, "loss": 4.1257, "step": 4750 }, { "epoch": 0.04751, "grad_norm": 0.5350329833617672, "learning_rate": 0.003, "loss": 4.0892, "step": 4751 }, { "epoch": 0.04752, "grad_norm": 0.57851596805048, "learning_rate": 0.003, "loss": 4.1221, "step": 4752 }, { "epoch": 0.04753, "grad_norm": 0.72902319198213, "learning_rate": 0.003, "loss": 4.1305, "step": 4753 }, { "epoch": 0.04754, "grad_norm": 0.871184049188565, "learning_rate": 0.003, "loss": 4.1544, "step": 4754 }, { "epoch": 0.04755, "grad_norm": 0.9103037208186765, "learning_rate": 0.003, "loss": 4.1375, "step": 4755 }, { "epoch": 0.04756, "grad_norm": 0.859550535967343, "learning_rate": 0.003, "loss": 4.1417, "step": 4756 }, { "epoch": 0.04757, "grad_norm": 0.7466205077193813, "learning_rate": 0.003, "loss": 4.1385, "step": 4757 }, { "epoch": 0.04758, "grad_norm": 0.7507245421944043, "learning_rate": 0.003, "loss": 4.1287, "step": 4758 }, { "epoch": 0.04759, "grad_norm": 0.8294869880303999, "learning_rate": 0.003, "loss": 4.1452, "step": 4759 }, { "epoch": 0.0476, "grad_norm": 0.8251883621259142, "learning_rate": 0.003, "loss": 4.1243, "step": 4760 }, { "epoch": 0.04761, "grad_norm": 0.7765732752545424, "learning_rate": 0.003, "loss": 4.1329, "step": 4761 }, { "epoch": 0.04762, "grad_norm": 0.6280062378308506, "learning_rate": 0.003, "loss": 4.1228, "step": 4762 }, { "epoch": 0.04763, "grad_norm": 0.5686058703500096, "learning_rate": 0.003, "loss": 4.1247, "step": 4763 }, { "epoch": 0.04764, "grad_norm": 0.46584482494791357, "learning_rate": 0.003, "loss": 4.1095, "step": 4764 }, { "epoch": 0.04765, "grad_norm": 0.45316919798397814, "learning_rate": 0.003, "loss": 4.1591, "step": 4765 }, { "epoch": 0.04766, "grad_norm": 0.5554760863440187, "learning_rate": 0.003, "loss": 4.1327, "step": 4766 }, { "epoch": 0.04767, "grad_norm": 0.6418170833104156, "learning_rate": 0.003, "loss": 4.1325, "step": 4767 }, { "epoch": 0.04768, "grad_norm": 0.5645471402502725, "learning_rate": 0.003, "loss": 4.1237, "step": 4768 }, { "epoch": 0.04769, "grad_norm": 0.5927099803110258, "learning_rate": 0.003, "loss": 4.0909, "step": 4769 }, { "epoch": 0.0477, "grad_norm": 0.6595637919437056, "learning_rate": 0.003, "loss": 4.1055, "step": 4770 }, { "epoch": 0.04771, "grad_norm": 0.5520312176680346, "learning_rate": 0.003, "loss": 4.1292, "step": 4771 }, { "epoch": 0.04772, "grad_norm": 0.5616366446629109, "learning_rate": 0.003, "loss": 4.1263, "step": 4772 }, { "epoch": 0.04773, "grad_norm": 0.7069869424163182, "learning_rate": 0.003, "loss": 4.1512, "step": 4773 }, { "epoch": 0.04774, "grad_norm": 0.8943049067297537, "learning_rate": 0.003, "loss": 4.1426, "step": 4774 }, { "epoch": 0.04775, "grad_norm": 1.0379517887129035, "learning_rate": 0.003, "loss": 4.1448, "step": 4775 }, { "epoch": 0.04776, "grad_norm": 1.0601938367245354, "learning_rate": 0.003, "loss": 4.1369, "step": 4776 }, { "epoch": 0.04777, "grad_norm": 0.8123830997039955, "learning_rate": 0.003, "loss": 4.13, "step": 4777 }, { "epoch": 0.04778, "grad_norm": 0.6466306116567022, "learning_rate": 0.003, "loss": 4.1035, "step": 4778 }, { "epoch": 0.04779, "grad_norm": 0.8059965327611258, "learning_rate": 0.003, "loss": 4.1478, "step": 4779 }, { "epoch": 0.0478, "grad_norm": 1.005661350204286, "learning_rate": 0.003, "loss": 4.1578, "step": 4780 }, { "epoch": 0.04781, "grad_norm": 0.9783065445314736, "learning_rate": 0.003, "loss": 4.1382, "step": 4781 }, { "epoch": 0.04782, "grad_norm": 0.9777333193008619, "learning_rate": 0.003, "loss": 4.1427, "step": 4782 }, { "epoch": 0.04783, "grad_norm": 1.0138916385667516, "learning_rate": 0.003, "loss": 4.1442, "step": 4783 }, { "epoch": 0.04784, "grad_norm": 1.0210591580782744, "learning_rate": 0.003, "loss": 4.1688, "step": 4784 }, { "epoch": 0.04785, "grad_norm": 1.0867592349380752, "learning_rate": 0.003, "loss": 4.1783, "step": 4785 }, { "epoch": 0.04786, "grad_norm": 0.9270426683995183, "learning_rate": 0.003, "loss": 4.1726, "step": 4786 }, { "epoch": 0.04787, "grad_norm": 0.9581278828591302, "learning_rate": 0.003, "loss": 4.1717, "step": 4787 }, { "epoch": 0.04788, "grad_norm": 1.0463519085350252, "learning_rate": 0.003, "loss": 4.1652, "step": 4788 }, { "epoch": 0.04789, "grad_norm": 1.048457678252623, "learning_rate": 0.003, "loss": 4.149, "step": 4789 }, { "epoch": 0.0479, "grad_norm": 1.0802030566717422, "learning_rate": 0.003, "loss": 4.1693, "step": 4790 }, { "epoch": 0.04791, "grad_norm": 0.9188967498168278, "learning_rate": 0.003, "loss": 4.1796, "step": 4791 }, { "epoch": 0.04792, "grad_norm": 1.0152593647469241, "learning_rate": 0.003, "loss": 4.1927, "step": 4792 }, { "epoch": 0.04793, "grad_norm": 1.018584486769021, "learning_rate": 0.003, "loss": 4.1531, "step": 4793 }, { "epoch": 0.04794, "grad_norm": 0.8402721654087, "learning_rate": 0.003, "loss": 4.1722, "step": 4794 }, { "epoch": 0.04795, "grad_norm": 0.7643727384922673, "learning_rate": 0.003, "loss": 4.1655, "step": 4795 }, { "epoch": 0.04796, "grad_norm": 0.813774811366339, "learning_rate": 0.003, "loss": 4.137, "step": 4796 }, { "epoch": 0.04797, "grad_norm": 0.6494127001881433, "learning_rate": 0.003, "loss": 4.1645, "step": 4797 }, { "epoch": 0.04798, "grad_norm": 0.664089910476502, "learning_rate": 0.003, "loss": 4.1242, "step": 4798 }, { "epoch": 0.04799, "grad_norm": 0.6031144588619443, "learning_rate": 0.003, "loss": 4.1338, "step": 4799 }, { "epoch": 0.048, "grad_norm": 0.5598439245281802, "learning_rate": 0.003, "loss": 4.1362, "step": 4800 }, { "epoch": 0.04801, "grad_norm": 0.6477000775289928, "learning_rate": 0.003, "loss": 4.1467, "step": 4801 }, { "epoch": 0.04802, "grad_norm": 0.706840356865147, "learning_rate": 0.003, "loss": 4.1209, "step": 4802 }, { "epoch": 0.04803, "grad_norm": 0.687443212408336, "learning_rate": 0.003, "loss": 4.1292, "step": 4803 }, { "epoch": 0.04804, "grad_norm": 0.6817235142027526, "learning_rate": 0.003, "loss": 4.1578, "step": 4804 }, { "epoch": 0.04805, "grad_norm": 0.7014528353258211, "learning_rate": 0.003, "loss": 4.1588, "step": 4805 }, { "epoch": 0.04806, "grad_norm": 0.6063087225759103, "learning_rate": 0.003, "loss": 4.1019, "step": 4806 }, { "epoch": 0.04807, "grad_norm": 0.4870524793552668, "learning_rate": 0.003, "loss": 4.1518, "step": 4807 }, { "epoch": 0.04808, "grad_norm": 0.4304737736314177, "learning_rate": 0.003, "loss": 4.1263, "step": 4808 }, { "epoch": 0.04809, "grad_norm": 0.444058010799397, "learning_rate": 0.003, "loss": 4.1187, "step": 4809 }, { "epoch": 0.0481, "grad_norm": 0.44159249278167406, "learning_rate": 0.003, "loss": 4.1384, "step": 4810 }, { "epoch": 0.04811, "grad_norm": 0.46942470854293217, "learning_rate": 0.003, "loss": 4.1346, "step": 4811 }, { "epoch": 0.04812, "grad_norm": 0.4477933921171643, "learning_rate": 0.003, "loss": 4.1339, "step": 4812 }, { "epoch": 0.04813, "grad_norm": 0.43354928602746334, "learning_rate": 0.003, "loss": 4.1197, "step": 4813 }, { "epoch": 0.04814, "grad_norm": 0.45498153005997544, "learning_rate": 0.003, "loss": 4.0766, "step": 4814 }, { "epoch": 0.04815, "grad_norm": 0.48463241046460065, "learning_rate": 0.003, "loss": 4.1186, "step": 4815 }, { "epoch": 0.04816, "grad_norm": 0.5311818047677171, "learning_rate": 0.003, "loss": 4.1276, "step": 4816 }, { "epoch": 0.04817, "grad_norm": 0.631506579388757, "learning_rate": 0.003, "loss": 4.1052, "step": 4817 }, { "epoch": 0.04818, "grad_norm": 0.8170443890052518, "learning_rate": 0.003, "loss": 4.123, "step": 4818 }, { "epoch": 0.04819, "grad_norm": 1.172132734592305, "learning_rate": 0.003, "loss": 4.1381, "step": 4819 }, { "epoch": 0.0482, "grad_norm": 0.838017513510012, "learning_rate": 0.003, "loss": 4.121, "step": 4820 }, { "epoch": 0.04821, "grad_norm": 0.6975770749182754, "learning_rate": 0.003, "loss": 4.1459, "step": 4821 }, { "epoch": 0.04822, "grad_norm": 0.7527718267712026, "learning_rate": 0.003, "loss": 4.126, "step": 4822 }, { "epoch": 0.04823, "grad_norm": 0.7414326559856069, "learning_rate": 0.003, "loss": 4.1219, "step": 4823 }, { "epoch": 0.04824, "grad_norm": 0.858775362232865, "learning_rate": 0.003, "loss": 4.1143, "step": 4824 }, { "epoch": 0.04825, "grad_norm": 0.695583690590602, "learning_rate": 0.003, "loss": 4.1259, "step": 4825 }, { "epoch": 0.04826, "grad_norm": 0.5781760330827292, "learning_rate": 0.003, "loss": 4.126, "step": 4826 }, { "epoch": 0.04827, "grad_norm": 0.5806936123154903, "learning_rate": 0.003, "loss": 4.1311, "step": 4827 }, { "epoch": 0.04828, "grad_norm": 0.7721046134763737, "learning_rate": 0.003, "loss": 4.1447, "step": 4828 }, { "epoch": 0.04829, "grad_norm": 0.7885233902013562, "learning_rate": 0.003, "loss": 4.1266, "step": 4829 }, { "epoch": 0.0483, "grad_norm": 0.7091409575187497, "learning_rate": 0.003, "loss": 4.1361, "step": 4830 }, { "epoch": 0.04831, "grad_norm": 0.7110846809520083, "learning_rate": 0.003, "loss": 4.1021, "step": 4831 }, { "epoch": 0.04832, "grad_norm": 0.680243507107158, "learning_rate": 0.003, "loss": 4.1282, "step": 4832 }, { "epoch": 0.04833, "grad_norm": 0.7284136950991514, "learning_rate": 0.003, "loss": 4.1327, "step": 4833 }, { "epoch": 0.04834, "grad_norm": 0.7137894246322048, "learning_rate": 0.003, "loss": 4.1037, "step": 4834 }, { "epoch": 0.04835, "grad_norm": 0.636140630835025, "learning_rate": 0.003, "loss": 4.1411, "step": 4835 }, { "epoch": 0.04836, "grad_norm": 0.75652748414691, "learning_rate": 0.003, "loss": 4.1105, "step": 4836 }, { "epoch": 0.04837, "grad_norm": 0.785548659376736, "learning_rate": 0.003, "loss": 4.1377, "step": 4837 }, { "epoch": 0.04838, "grad_norm": 0.8897930937393894, "learning_rate": 0.003, "loss": 4.1521, "step": 4838 }, { "epoch": 0.04839, "grad_norm": 0.9660346509912735, "learning_rate": 0.003, "loss": 4.1398, "step": 4839 }, { "epoch": 0.0484, "grad_norm": 0.9300911895678211, "learning_rate": 0.003, "loss": 4.1437, "step": 4840 }, { "epoch": 0.04841, "grad_norm": 0.7818946985346545, "learning_rate": 0.003, "loss": 4.1377, "step": 4841 }, { "epoch": 0.04842, "grad_norm": 0.7032209957018698, "learning_rate": 0.003, "loss": 4.1538, "step": 4842 }, { "epoch": 0.04843, "grad_norm": 0.6722771974917844, "learning_rate": 0.003, "loss": 4.1053, "step": 4843 }, { "epoch": 0.04844, "grad_norm": 0.6970990299983332, "learning_rate": 0.003, "loss": 4.1379, "step": 4844 }, { "epoch": 0.04845, "grad_norm": 0.6367868288526063, "learning_rate": 0.003, "loss": 4.1214, "step": 4845 }, { "epoch": 0.04846, "grad_norm": 0.6829062431633811, "learning_rate": 0.003, "loss": 4.146, "step": 4846 }, { "epoch": 0.04847, "grad_norm": 0.6460341296632274, "learning_rate": 0.003, "loss": 4.1086, "step": 4847 }, { "epoch": 0.04848, "grad_norm": 0.6713362647372938, "learning_rate": 0.003, "loss": 4.1515, "step": 4848 }, { "epoch": 0.04849, "grad_norm": 0.6714682036160667, "learning_rate": 0.003, "loss": 4.1426, "step": 4849 }, { "epoch": 0.0485, "grad_norm": 0.7057113852906085, "learning_rate": 0.003, "loss": 4.132, "step": 4850 }, { "epoch": 0.04851, "grad_norm": 0.6646770579142436, "learning_rate": 0.003, "loss": 4.1472, "step": 4851 }, { "epoch": 0.04852, "grad_norm": 0.6577931410879579, "learning_rate": 0.003, "loss": 4.0821, "step": 4852 }, { "epoch": 0.04853, "grad_norm": 0.6521737566217662, "learning_rate": 0.003, "loss": 4.1384, "step": 4853 }, { "epoch": 0.04854, "grad_norm": 0.7638656389712092, "learning_rate": 0.003, "loss": 4.1383, "step": 4854 }, { "epoch": 0.04855, "grad_norm": 0.9457310244447173, "learning_rate": 0.003, "loss": 4.1514, "step": 4855 }, { "epoch": 0.04856, "grad_norm": 1.1517585285172265, "learning_rate": 0.003, "loss": 4.1588, "step": 4856 }, { "epoch": 0.04857, "grad_norm": 0.9746651389714764, "learning_rate": 0.003, "loss": 4.1672, "step": 4857 }, { "epoch": 0.04858, "grad_norm": 0.8324446739497157, "learning_rate": 0.003, "loss": 4.135, "step": 4858 }, { "epoch": 0.04859, "grad_norm": 0.7506712881500571, "learning_rate": 0.003, "loss": 4.1678, "step": 4859 }, { "epoch": 0.0486, "grad_norm": 0.7957902928578074, "learning_rate": 0.003, "loss": 4.1483, "step": 4860 }, { "epoch": 0.04861, "grad_norm": 1.0534393033014007, "learning_rate": 0.003, "loss": 4.1484, "step": 4861 }, { "epoch": 0.04862, "grad_norm": 1.072043293213455, "learning_rate": 0.003, "loss": 4.1568, "step": 4862 }, { "epoch": 0.04863, "grad_norm": 0.8542078395141963, "learning_rate": 0.003, "loss": 4.1674, "step": 4863 }, { "epoch": 0.04864, "grad_norm": 0.979149461429483, "learning_rate": 0.003, "loss": 4.1489, "step": 4864 }, { "epoch": 0.04865, "grad_norm": 0.9889727360280101, "learning_rate": 0.003, "loss": 4.1728, "step": 4865 }, { "epoch": 0.04866, "grad_norm": 0.87668738837822, "learning_rate": 0.003, "loss": 4.176, "step": 4866 }, { "epoch": 0.04867, "grad_norm": 0.9480918411172308, "learning_rate": 0.003, "loss": 4.1508, "step": 4867 }, { "epoch": 0.04868, "grad_norm": 0.8900699953728738, "learning_rate": 0.003, "loss": 4.1571, "step": 4868 }, { "epoch": 0.04869, "grad_norm": 0.8410323236753658, "learning_rate": 0.003, "loss": 4.169, "step": 4869 }, { "epoch": 0.0487, "grad_norm": 0.8052365609623746, "learning_rate": 0.003, "loss": 4.1563, "step": 4870 }, { "epoch": 0.04871, "grad_norm": 0.732052294025559, "learning_rate": 0.003, "loss": 4.1727, "step": 4871 }, { "epoch": 0.04872, "grad_norm": 0.6093794224074204, "learning_rate": 0.003, "loss": 4.1332, "step": 4872 }, { "epoch": 0.04873, "grad_norm": 0.5550033492784485, "learning_rate": 0.003, "loss": 4.1554, "step": 4873 }, { "epoch": 0.04874, "grad_norm": 0.6407687802372767, "learning_rate": 0.003, "loss": 4.1462, "step": 4874 }, { "epoch": 0.04875, "grad_norm": 0.6445083276518792, "learning_rate": 0.003, "loss": 4.1398, "step": 4875 }, { "epoch": 0.04876, "grad_norm": 0.598683403431915, "learning_rate": 0.003, "loss": 4.1391, "step": 4876 }, { "epoch": 0.04877, "grad_norm": 0.5879420193521339, "learning_rate": 0.003, "loss": 4.1392, "step": 4877 }, { "epoch": 0.04878, "grad_norm": 0.6015687919635526, "learning_rate": 0.003, "loss": 4.1193, "step": 4878 }, { "epoch": 0.04879, "grad_norm": 0.6406016359581286, "learning_rate": 0.003, "loss": 4.1157, "step": 4879 }, { "epoch": 0.0488, "grad_norm": 0.6520274839816634, "learning_rate": 0.003, "loss": 4.1271, "step": 4880 }, { "epoch": 0.04881, "grad_norm": 0.6746577474599795, "learning_rate": 0.003, "loss": 4.1215, "step": 4881 }, { "epoch": 0.04882, "grad_norm": 0.6988276300361237, "learning_rate": 0.003, "loss": 4.1281, "step": 4882 }, { "epoch": 0.04883, "grad_norm": 0.637601167920116, "learning_rate": 0.003, "loss": 4.1203, "step": 4883 }, { "epoch": 0.04884, "grad_norm": 0.6575805356479, "learning_rate": 0.003, "loss": 4.1259, "step": 4884 }, { "epoch": 0.04885, "grad_norm": 0.8560411189615229, "learning_rate": 0.003, "loss": 4.1406, "step": 4885 }, { "epoch": 0.04886, "grad_norm": 1.131590855439064, "learning_rate": 0.003, "loss": 4.1439, "step": 4886 }, { "epoch": 0.04887, "grad_norm": 0.8261960657958597, "learning_rate": 0.003, "loss": 4.1505, "step": 4887 }, { "epoch": 0.04888, "grad_norm": 0.5889636338082994, "learning_rate": 0.003, "loss": 4.147, "step": 4888 }, { "epoch": 0.04889, "grad_norm": 0.6875568675189105, "learning_rate": 0.003, "loss": 4.1272, "step": 4889 }, { "epoch": 0.0489, "grad_norm": 0.8178506331950943, "learning_rate": 0.003, "loss": 4.1384, "step": 4890 }, { "epoch": 0.04891, "grad_norm": 0.8524321612267298, "learning_rate": 0.003, "loss": 4.1499, "step": 4891 }, { "epoch": 0.04892, "grad_norm": 0.6402952785912811, "learning_rate": 0.003, "loss": 4.1436, "step": 4892 }, { "epoch": 0.04893, "grad_norm": 0.49571623322484065, "learning_rate": 0.003, "loss": 4.1372, "step": 4893 }, { "epoch": 0.04894, "grad_norm": 0.6005303000219224, "learning_rate": 0.003, "loss": 4.1485, "step": 4894 }, { "epoch": 0.04895, "grad_norm": 0.7874188186491505, "learning_rate": 0.003, "loss": 4.1285, "step": 4895 }, { "epoch": 0.04896, "grad_norm": 0.8796160513302255, "learning_rate": 0.003, "loss": 4.1496, "step": 4896 }, { "epoch": 0.04897, "grad_norm": 0.7789626736982245, "learning_rate": 0.003, "loss": 4.1546, "step": 4897 }, { "epoch": 0.04898, "grad_norm": 0.7938347458338417, "learning_rate": 0.003, "loss": 4.1434, "step": 4898 }, { "epoch": 0.04899, "grad_norm": 0.6968501707866385, "learning_rate": 0.003, "loss": 4.1295, "step": 4899 }, { "epoch": 0.049, "grad_norm": 0.6737103261211691, "learning_rate": 0.003, "loss": 4.1563, "step": 4900 }, { "epoch": 0.04901, "grad_norm": 0.6928180762743227, "learning_rate": 0.003, "loss": 4.1181, "step": 4901 }, { "epoch": 0.04902, "grad_norm": 0.7387421279656656, "learning_rate": 0.003, "loss": 4.1554, "step": 4902 }, { "epoch": 0.04903, "grad_norm": 0.7365881444221591, "learning_rate": 0.003, "loss": 4.1552, "step": 4903 }, { "epoch": 0.04904, "grad_norm": 0.6809368406351045, "learning_rate": 0.003, "loss": 4.1193, "step": 4904 }, { "epoch": 0.04905, "grad_norm": 0.8596440024174532, "learning_rate": 0.003, "loss": 4.1227, "step": 4905 }, { "epoch": 0.04906, "grad_norm": 1.1779416676139556, "learning_rate": 0.003, "loss": 4.1426, "step": 4906 }, { "epoch": 0.04907, "grad_norm": 0.9153726514069693, "learning_rate": 0.003, "loss": 4.1244, "step": 4907 }, { "epoch": 0.04908, "grad_norm": 0.6705432105024506, "learning_rate": 0.003, "loss": 4.1326, "step": 4908 }, { "epoch": 0.04909, "grad_norm": 0.7177688849716874, "learning_rate": 0.003, "loss": 4.1377, "step": 4909 }, { "epoch": 0.0491, "grad_norm": 0.8659923553333108, "learning_rate": 0.003, "loss": 4.1347, "step": 4910 }, { "epoch": 0.04911, "grad_norm": 1.0871210771334645, "learning_rate": 0.003, "loss": 4.1392, "step": 4911 }, { "epoch": 0.04912, "grad_norm": 0.825170311776902, "learning_rate": 0.003, "loss": 4.1074, "step": 4912 }, { "epoch": 0.04913, "grad_norm": 0.6669180449834547, "learning_rate": 0.003, "loss": 4.1304, "step": 4913 }, { "epoch": 0.04914, "grad_norm": 0.6589662640213191, "learning_rate": 0.003, "loss": 4.1349, "step": 4914 }, { "epoch": 0.04915, "grad_norm": 0.6453034668867473, "learning_rate": 0.003, "loss": 4.1394, "step": 4915 }, { "epoch": 0.04916, "grad_norm": 0.6379061303258432, "learning_rate": 0.003, "loss": 4.1375, "step": 4916 }, { "epoch": 0.04917, "grad_norm": 0.5924781741927655, "learning_rate": 0.003, "loss": 4.0909, "step": 4917 }, { "epoch": 0.04918, "grad_norm": 0.5392017954625788, "learning_rate": 0.003, "loss": 4.1294, "step": 4918 }, { "epoch": 0.04919, "grad_norm": 0.4926263926107212, "learning_rate": 0.003, "loss": 4.1291, "step": 4919 }, { "epoch": 0.0492, "grad_norm": 0.5657039568533931, "learning_rate": 0.003, "loss": 4.1282, "step": 4920 }, { "epoch": 0.04921, "grad_norm": 0.6408113470012788, "learning_rate": 0.003, "loss": 4.1481, "step": 4921 }, { "epoch": 0.04922, "grad_norm": 0.7276187339344597, "learning_rate": 0.003, "loss": 4.1242, "step": 4922 }, { "epoch": 0.04923, "grad_norm": 0.7782906389614147, "learning_rate": 0.003, "loss": 4.1051, "step": 4923 }, { "epoch": 0.04924, "grad_norm": 0.8311620891758528, "learning_rate": 0.003, "loss": 4.1585, "step": 4924 }, { "epoch": 0.04925, "grad_norm": 0.9301177021878505, "learning_rate": 0.003, "loss": 4.1125, "step": 4925 }, { "epoch": 0.04926, "grad_norm": 0.8645596717575721, "learning_rate": 0.003, "loss": 4.115, "step": 4926 }, { "epoch": 0.04927, "grad_norm": 0.7213043835497918, "learning_rate": 0.003, "loss": 4.1488, "step": 4927 }, { "epoch": 0.04928, "grad_norm": 0.6797037072765041, "learning_rate": 0.003, "loss": 4.1102, "step": 4928 }, { "epoch": 0.04929, "grad_norm": 0.5833040699084097, "learning_rate": 0.003, "loss": 4.1237, "step": 4929 }, { "epoch": 0.0493, "grad_norm": 0.7502027197799604, "learning_rate": 0.003, "loss": 4.1211, "step": 4930 }, { "epoch": 0.04931, "grad_norm": 0.7729294183641608, "learning_rate": 0.003, "loss": 4.1088, "step": 4931 }, { "epoch": 0.04932, "grad_norm": 0.7115365383624536, "learning_rate": 0.003, "loss": 4.1526, "step": 4932 }, { "epoch": 0.04933, "grad_norm": 0.619935413571119, "learning_rate": 0.003, "loss": 4.127, "step": 4933 }, { "epoch": 0.04934, "grad_norm": 0.6328194752766774, "learning_rate": 0.003, "loss": 4.156, "step": 4934 }, { "epoch": 0.04935, "grad_norm": 0.666067060827277, "learning_rate": 0.003, "loss": 4.153, "step": 4935 }, { "epoch": 0.04936, "grad_norm": 0.7455790570370421, "learning_rate": 0.003, "loss": 4.1274, "step": 4936 }, { "epoch": 0.04937, "grad_norm": 0.8690467307626608, "learning_rate": 0.003, "loss": 4.1265, "step": 4937 }, { "epoch": 0.04938, "grad_norm": 0.9382478740758116, "learning_rate": 0.003, "loss": 4.1384, "step": 4938 }, { "epoch": 0.04939, "grad_norm": 0.9269163238429858, "learning_rate": 0.003, "loss": 4.15, "step": 4939 }, { "epoch": 0.0494, "grad_norm": 0.7535488235404929, "learning_rate": 0.003, "loss": 4.1512, "step": 4940 }, { "epoch": 0.04941, "grad_norm": 0.6231540125860376, "learning_rate": 0.003, "loss": 4.1145, "step": 4941 }, { "epoch": 0.04942, "grad_norm": 0.5994421619470709, "learning_rate": 0.003, "loss": 4.1284, "step": 4942 }, { "epoch": 0.04943, "grad_norm": 0.6316381391994007, "learning_rate": 0.003, "loss": 4.1507, "step": 4943 }, { "epoch": 0.04944, "grad_norm": 0.6942848124244505, "learning_rate": 0.003, "loss": 4.1526, "step": 4944 }, { "epoch": 0.04945, "grad_norm": 0.6745431924540877, "learning_rate": 0.003, "loss": 4.1442, "step": 4945 }, { "epoch": 0.04946, "grad_norm": 0.7148234111999289, "learning_rate": 0.003, "loss": 4.1734, "step": 4946 }, { "epoch": 0.04947, "grad_norm": 0.8151644855471658, "learning_rate": 0.003, "loss": 4.1207, "step": 4947 }, { "epoch": 0.04948, "grad_norm": 0.901565456553654, "learning_rate": 0.003, "loss": 4.1404, "step": 4948 }, { "epoch": 0.04949, "grad_norm": 0.8388445391988596, "learning_rate": 0.003, "loss": 4.1455, "step": 4949 }, { "epoch": 0.0495, "grad_norm": 0.769498516722048, "learning_rate": 0.003, "loss": 4.1069, "step": 4950 }, { "epoch": 0.04951, "grad_norm": 0.9271659871328424, "learning_rate": 0.003, "loss": 4.1193, "step": 4951 }, { "epoch": 0.04952, "grad_norm": 0.9430760383533365, "learning_rate": 0.003, "loss": 4.1601, "step": 4952 }, { "epoch": 0.04953, "grad_norm": 0.8839519050512392, "learning_rate": 0.003, "loss": 4.1458, "step": 4953 }, { "epoch": 0.04954, "grad_norm": 0.8691782058986859, "learning_rate": 0.003, "loss": 4.1668, "step": 4954 }, { "epoch": 0.04955, "grad_norm": 0.7804007935901659, "learning_rate": 0.003, "loss": 4.1378, "step": 4955 }, { "epoch": 0.04956, "grad_norm": 0.7340937957959927, "learning_rate": 0.003, "loss": 4.1545, "step": 4956 }, { "epoch": 0.04957, "grad_norm": 0.7502559806635023, "learning_rate": 0.003, "loss": 4.123, "step": 4957 }, { "epoch": 0.04958, "grad_norm": 0.7469357331628064, "learning_rate": 0.003, "loss": 4.1412, "step": 4958 }, { "epoch": 0.04959, "grad_norm": 0.7227020566857775, "learning_rate": 0.003, "loss": 4.1451, "step": 4959 }, { "epoch": 0.0496, "grad_norm": 0.6072369078325202, "learning_rate": 0.003, "loss": 4.1095, "step": 4960 }, { "epoch": 0.04961, "grad_norm": 0.676652458930749, "learning_rate": 0.003, "loss": 4.1185, "step": 4961 }, { "epoch": 0.04962, "grad_norm": 0.7166853924943506, "learning_rate": 0.003, "loss": 4.1329, "step": 4962 }, { "epoch": 0.04963, "grad_norm": 0.8282425384023169, "learning_rate": 0.003, "loss": 4.1738, "step": 4963 }, { "epoch": 0.04964, "grad_norm": 0.9085445932185267, "learning_rate": 0.003, "loss": 4.1454, "step": 4964 }, { "epoch": 0.04965, "grad_norm": 0.9373085689145864, "learning_rate": 0.003, "loss": 4.149, "step": 4965 }, { "epoch": 0.04966, "grad_norm": 0.6879723306931469, "learning_rate": 0.003, "loss": 4.114, "step": 4966 }, { "epoch": 0.04967, "grad_norm": 0.5346987676889187, "learning_rate": 0.003, "loss": 4.1357, "step": 4967 }, { "epoch": 0.04968, "grad_norm": 0.5973814985358372, "learning_rate": 0.003, "loss": 4.1594, "step": 4968 }, { "epoch": 0.04969, "grad_norm": 0.8137665544420138, "learning_rate": 0.003, "loss": 4.1371, "step": 4969 }, { "epoch": 0.0497, "grad_norm": 0.9213595657452577, "learning_rate": 0.003, "loss": 4.1364, "step": 4970 }, { "epoch": 0.04971, "grad_norm": 0.8081884217188025, "learning_rate": 0.003, "loss": 4.1548, "step": 4971 }, { "epoch": 0.04972, "grad_norm": 0.696216159144708, "learning_rate": 0.003, "loss": 4.127, "step": 4972 }, { "epoch": 0.04973, "grad_norm": 0.8733113357334519, "learning_rate": 0.003, "loss": 4.1797, "step": 4973 }, { "epoch": 0.04974, "grad_norm": 1.0046232999118463, "learning_rate": 0.003, "loss": 4.1469, "step": 4974 }, { "epoch": 0.04975, "grad_norm": 0.7903522031468557, "learning_rate": 0.003, "loss": 4.1385, "step": 4975 }, { "epoch": 0.04976, "grad_norm": 0.6802567984502358, "learning_rate": 0.003, "loss": 4.1337, "step": 4976 }, { "epoch": 0.04977, "grad_norm": 0.6880693634478566, "learning_rate": 0.003, "loss": 4.145, "step": 4977 }, { "epoch": 0.04978, "grad_norm": 0.8224558060329078, "learning_rate": 0.003, "loss": 4.1443, "step": 4978 }, { "epoch": 0.04979, "grad_norm": 0.693860233816605, "learning_rate": 0.003, "loss": 4.1505, "step": 4979 }, { "epoch": 0.0498, "grad_norm": 0.5722513075301342, "learning_rate": 0.003, "loss": 4.1367, "step": 4980 }, { "epoch": 0.04981, "grad_norm": 0.5155541843719716, "learning_rate": 0.003, "loss": 4.1092, "step": 4981 }, { "epoch": 0.04982, "grad_norm": 0.5293279825762658, "learning_rate": 0.003, "loss": 4.1319, "step": 4982 }, { "epoch": 0.04983, "grad_norm": 0.5031810438056484, "learning_rate": 0.003, "loss": 4.0871, "step": 4983 }, { "epoch": 0.04984, "grad_norm": 0.50373719850947, "learning_rate": 0.003, "loss": 4.0951, "step": 4984 }, { "epoch": 0.04985, "grad_norm": 0.4503251156136766, "learning_rate": 0.003, "loss": 4.1217, "step": 4985 }, { "epoch": 0.04986, "grad_norm": 0.45313125147575284, "learning_rate": 0.003, "loss": 4.1387, "step": 4986 }, { "epoch": 0.04987, "grad_norm": 0.3893398619956913, "learning_rate": 0.003, "loss": 4.1002, "step": 4987 }, { "epoch": 0.04988, "grad_norm": 0.38877981714111726, "learning_rate": 0.003, "loss": 4.133, "step": 4988 }, { "epoch": 0.04989, "grad_norm": 0.4389205190435005, "learning_rate": 0.003, "loss": 4.1368, "step": 4989 }, { "epoch": 0.0499, "grad_norm": 0.629712543859391, "learning_rate": 0.003, "loss": 4.1162, "step": 4990 }, { "epoch": 0.04991, "grad_norm": 0.8411643038205673, "learning_rate": 0.003, "loss": 4.1109, "step": 4991 }, { "epoch": 0.04992, "grad_norm": 1.0789241755274537, "learning_rate": 0.003, "loss": 4.1066, "step": 4992 }, { "epoch": 0.04993, "grad_norm": 0.894691990453197, "learning_rate": 0.003, "loss": 4.1324, "step": 4993 }, { "epoch": 0.04994, "grad_norm": 0.768409182457612, "learning_rate": 0.003, "loss": 4.1156, "step": 4994 }, { "epoch": 0.04995, "grad_norm": 0.7502833338777625, "learning_rate": 0.003, "loss": 4.1388, "step": 4995 }, { "epoch": 0.04996, "grad_norm": 0.7438036098652065, "learning_rate": 0.003, "loss": 4.1195, "step": 4996 }, { "epoch": 0.04997, "grad_norm": 0.7542393015064909, "learning_rate": 0.003, "loss": 4.137, "step": 4997 }, { "epoch": 0.04998, "grad_norm": 0.7987106834768938, "learning_rate": 0.003, "loss": 4.1614, "step": 4998 }, { "epoch": 0.04999, "grad_norm": 0.7978624560845015, "learning_rate": 0.003, "loss": 4.1203, "step": 4999 }, { "epoch": 0.05, "grad_norm": 0.9161273312112043, "learning_rate": 0.003, "loss": 4.1693, "step": 5000 }, { "epoch": 0.05001, "grad_norm": 0.996412080112922, "learning_rate": 0.003, "loss": 4.1666, "step": 5001 }, { "epoch": 0.05002, "grad_norm": 0.9597914662378922, "learning_rate": 0.003, "loss": 4.1413, "step": 5002 }, { "epoch": 0.05003, "grad_norm": 0.8551476884140587, "learning_rate": 0.003, "loss": 4.1549, "step": 5003 }, { "epoch": 0.05004, "grad_norm": 0.8613602446247762, "learning_rate": 0.003, "loss": 4.1388, "step": 5004 }, { "epoch": 0.05005, "grad_norm": 0.9135522439159383, "learning_rate": 0.003, "loss": 4.1599, "step": 5005 }, { "epoch": 0.05006, "grad_norm": 1.0256214526238998, "learning_rate": 0.003, "loss": 4.1448, "step": 5006 }, { "epoch": 0.05007, "grad_norm": 0.9189841175452944, "learning_rate": 0.003, "loss": 4.1565, "step": 5007 }, { "epoch": 0.05008, "grad_norm": 0.8069809344089516, "learning_rate": 0.003, "loss": 4.1222, "step": 5008 }, { "epoch": 0.05009, "grad_norm": 0.7363173254372745, "learning_rate": 0.003, "loss": 4.1321, "step": 5009 }, { "epoch": 0.0501, "grad_norm": 0.780930048247047, "learning_rate": 0.003, "loss": 4.1444, "step": 5010 }, { "epoch": 0.05011, "grad_norm": 0.8533571714235313, "learning_rate": 0.003, "loss": 4.151, "step": 5011 }, { "epoch": 0.05012, "grad_norm": 0.7333946370690233, "learning_rate": 0.003, "loss": 4.1338, "step": 5012 }, { "epoch": 0.05013, "grad_norm": 0.7554255781670896, "learning_rate": 0.003, "loss": 4.1335, "step": 5013 }, { "epoch": 0.05014, "grad_norm": 0.8053934909287488, "learning_rate": 0.003, "loss": 4.1128, "step": 5014 }, { "epoch": 0.05015, "grad_norm": 0.6989364159311318, "learning_rate": 0.003, "loss": 4.1504, "step": 5015 }, { "epoch": 0.05016, "grad_norm": 0.6523590967522017, "learning_rate": 0.003, "loss": 4.1287, "step": 5016 }, { "epoch": 0.05017, "grad_norm": 0.6801442129276165, "learning_rate": 0.003, "loss": 4.1461, "step": 5017 }, { "epoch": 0.05018, "grad_norm": 0.7921166720657037, "learning_rate": 0.003, "loss": 4.1423, "step": 5018 }, { "epoch": 0.05019, "grad_norm": 0.9072386747671375, "learning_rate": 0.003, "loss": 4.1649, "step": 5019 }, { "epoch": 0.0502, "grad_norm": 1.01701838314669, "learning_rate": 0.003, "loss": 4.1346, "step": 5020 }, { "epoch": 0.05021, "grad_norm": 0.9399609124786235, "learning_rate": 0.003, "loss": 4.1404, "step": 5021 }, { "epoch": 0.05022, "grad_norm": 0.9060890151344648, "learning_rate": 0.003, "loss": 4.1401, "step": 5022 }, { "epoch": 0.05023, "grad_norm": 0.833401199404608, "learning_rate": 0.003, "loss": 4.1634, "step": 5023 }, { "epoch": 0.05024, "grad_norm": 0.6936980469006718, "learning_rate": 0.003, "loss": 4.1185, "step": 5024 }, { "epoch": 0.05025, "grad_norm": 0.6485227724397681, "learning_rate": 0.003, "loss": 4.1543, "step": 5025 }, { "epoch": 0.05026, "grad_norm": 0.6504679913105271, "learning_rate": 0.003, "loss": 4.1308, "step": 5026 }, { "epoch": 0.05027, "grad_norm": 0.6627173144511802, "learning_rate": 0.003, "loss": 4.1474, "step": 5027 }, { "epoch": 0.05028, "grad_norm": 0.6951447785935732, "learning_rate": 0.003, "loss": 4.1337, "step": 5028 }, { "epoch": 0.05029, "grad_norm": 0.6952175839048826, "learning_rate": 0.003, "loss": 4.1472, "step": 5029 }, { "epoch": 0.0503, "grad_norm": 0.5688496607028097, "learning_rate": 0.003, "loss": 4.112, "step": 5030 }, { "epoch": 0.05031, "grad_norm": 0.6035161846572389, "learning_rate": 0.003, "loss": 4.1318, "step": 5031 }, { "epoch": 0.05032, "grad_norm": 0.670286035574246, "learning_rate": 0.003, "loss": 4.135, "step": 5032 }, { "epoch": 0.05033, "grad_norm": 0.6467306224656293, "learning_rate": 0.003, "loss": 4.1099, "step": 5033 }, { "epoch": 0.05034, "grad_norm": 0.6063260055449672, "learning_rate": 0.003, "loss": 4.1125, "step": 5034 }, { "epoch": 0.05035, "grad_norm": 0.629592938048323, "learning_rate": 0.003, "loss": 4.1323, "step": 5035 }, { "epoch": 0.05036, "grad_norm": 0.6478567980439086, "learning_rate": 0.003, "loss": 4.1177, "step": 5036 }, { "epoch": 0.05037, "grad_norm": 0.7505964391613944, "learning_rate": 0.003, "loss": 4.1165, "step": 5037 }, { "epoch": 0.05038, "grad_norm": 0.8528209687755143, "learning_rate": 0.003, "loss": 4.1296, "step": 5038 }, { "epoch": 0.05039, "grad_norm": 1.0629743487440606, "learning_rate": 0.003, "loss": 4.1472, "step": 5039 }, { "epoch": 0.0504, "grad_norm": 1.1351742762146553, "learning_rate": 0.003, "loss": 4.1529, "step": 5040 }, { "epoch": 0.05041, "grad_norm": 0.7653122098370709, "learning_rate": 0.003, "loss": 4.1447, "step": 5041 }, { "epoch": 0.05042, "grad_norm": 0.7775733262103008, "learning_rate": 0.003, "loss": 4.1377, "step": 5042 }, { "epoch": 0.05043, "grad_norm": 0.7251140829300723, "learning_rate": 0.003, "loss": 4.1332, "step": 5043 }, { "epoch": 0.05044, "grad_norm": 0.7675590430218364, "learning_rate": 0.003, "loss": 4.1501, "step": 5044 }, { "epoch": 0.05045, "grad_norm": 0.796129916502261, "learning_rate": 0.003, "loss": 4.1675, "step": 5045 }, { "epoch": 0.05046, "grad_norm": 0.7730541689349488, "learning_rate": 0.003, "loss": 4.1316, "step": 5046 }, { "epoch": 0.05047, "grad_norm": 0.721813094114717, "learning_rate": 0.003, "loss": 4.1401, "step": 5047 }, { "epoch": 0.05048, "grad_norm": 0.6421472913380964, "learning_rate": 0.003, "loss": 4.1331, "step": 5048 }, { "epoch": 0.05049, "grad_norm": 0.6622380588723976, "learning_rate": 0.003, "loss": 4.0972, "step": 5049 }, { "epoch": 0.0505, "grad_norm": 0.5608629241012306, "learning_rate": 0.003, "loss": 4.183, "step": 5050 }, { "epoch": 0.05051, "grad_norm": 0.5745585056656611, "learning_rate": 0.003, "loss": 4.1443, "step": 5051 }, { "epoch": 0.05052, "grad_norm": 0.6379797741525732, "learning_rate": 0.003, "loss": 4.1186, "step": 5052 }, { "epoch": 0.05053, "grad_norm": 0.6833197049521939, "learning_rate": 0.003, "loss": 4.0926, "step": 5053 }, { "epoch": 0.05054, "grad_norm": 0.7634490766042622, "learning_rate": 0.003, "loss": 4.1497, "step": 5054 }, { "epoch": 0.05055, "grad_norm": 0.8837284078408866, "learning_rate": 0.003, "loss": 4.1298, "step": 5055 }, { "epoch": 0.05056, "grad_norm": 0.986372668426662, "learning_rate": 0.003, "loss": 4.1493, "step": 5056 }, { "epoch": 0.05057, "grad_norm": 1.1002958520848571, "learning_rate": 0.003, "loss": 4.1615, "step": 5057 }, { "epoch": 0.05058, "grad_norm": 0.8620407065983835, "learning_rate": 0.003, "loss": 4.1539, "step": 5058 }, { "epoch": 0.05059, "grad_norm": 0.7776023795769754, "learning_rate": 0.003, "loss": 4.12, "step": 5059 }, { "epoch": 0.0506, "grad_norm": 0.8000191647790404, "learning_rate": 0.003, "loss": 4.155, "step": 5060 }, { "epoch": 0.05061, "grad_norm": 0.8859446865423543, "learning_rate": 0.003, "loss": 4.135, "step": 5061 }, { "epoch": 0.05062, "grad_norm": 0.8482279418144665, "learning_rate": 0.003, "loss": 4.1209, "step": 5062 }, { "epoch": 0.05063, "grad_norm": 0.8922032578947977, "learning_rate": 0.003, "loss": 4.1079, "step": 5063 }, { "epoch": 0.05064, "grad_norm": 0.8411761082071082, "learning_rate": 0.003, "loss": 4.1479, "step": 5064 }, { "epoch": 0.05065, "grad_norm": 0.8548177713084265, "learning_rate": 0.003, "loss": 4.136, "step": 5065 }, { "epoch": 0.05066, "grad_norm": 0.8283313824637577, "learning_rate": 0.003, "loss": 4.1234, "step": 5066 }, { "epoch": 0.05067, "grad_norm": 0.8012249471762791, "learning_rate": 0.003, "loss": 4.1327, "step": 5067 }, { "epoch": 0.05068, "grad_norm": 0.7695096288578107, "learning_rate": 0.003, "loss": 4.1533, "step": 5068 }, { "epoch": 0.05069, "grad_norm": 0.9045793357271396, "learning_rate": 0.003, "loss": 4.1473, "step": 5069 }, { "epoch": 0.0507, "grad_norm": 0.864648963362809, "learning_rate": 0.003, "loss": 4.1598, "step": 5070 }, { "epoch": 0.05071, "grad_norm": 0.8168223538297239, "learning_rate": 0.003, "loss": 4.1468, "step": 5071 }, { "epoch": 0.05072, "grad_norm": 0.7496813210802163, "learning_rate": 0.003, "loss": 4.1213, "step": 5072 }, { "epoch": 0.05073, "grad_norm": 0.758480840663007, "learning_rate": 0.003, "loss": 4.123, "step": 5073 }, { "epoch": 0.05074, "grad_norm": 0.696634586083982, "learning_rate": 0.003, "loss": 4.1158, "step": 5074 }, { "epoch": 0.05075, "grad_norm": 0.7650541290537565, "learning_rate": 0.003, "loss": 4.1406, "step": 5075 }, { "epoch": 0.05076, "grad_norm": 0.8193433029203343, "learning_rate": 0.003, "loss": 4.1544, "step": 5076 }, { "epoch": 0.05077, "grad_norm": 0.8494467870191629, "learning_rate": 0.003, "loss": 4.1587, "step": 5077 }, { "epoch": 0.05078, "grad_norm": 0.746324730026238, "learning_rate": 0.003, "loss": 4.1508, "step": 5078 }, { "epoch": 0.05079, "grad_norm": 0.7788241307468048, "learning_rate": 0.003, "loss": 4.1233, "step": 5079 }, { "epoch": 0.0508, "grad_norm": 0.6225546418109031, "learning_rate": 0.003, "loss": 4.1123, "step": 5080 }, { "epoch": 0.05081, "grad_norm": 0.5890146987434735, "learning_rate": 0.003, "loss": 4.1354, "step": 5081 }, { "epoch": 0.05082, "grad_norm": 0.7095924514939523, "learning_rate": 0.003, "loss": 4.145, "step": 5082 }, { "epoch": 0.05083, "grad_norm": 0.8654893728046584, "learning_rate": 0.003, "loss": 4.1128, "step": 5083 }, { "epoch": 0.05084, "grad_norm": 0.9274741044534023, "learning_rate": 0.003, "loss": 4.1349, "step": 5084 }, { "epoch": 0.05085, "grad_norm": 0.863892065363985, "learning_rate": 0.003, "loss": 4.1004, "step": 5085 }, { "epoch": 0.05086, "grad_norm": 0.7754486240701118, "learning_rate": 0.003, "loss": 4.1074, "step": 5086 }, { "epoch": 0.05087, "grad_norm": 0.6467115070840203, "learning_rate": 0.003, "loss": 4.1273, "step": 5087 }, { "epoch": 0.05088, "grad_norm": 0.6210040639615062, "learning_rate": 0.003, "loss": 4.1254, "step": 5088 }, { "epoch": 0.05089, "grad_norm": 0.6024673934619533, "learning_rate": 0.003, "loss": 4.1198, "step": 5089 }, { "epoch": 0.0509, "grad_norm": 0.6180444267761035, "learning_rate": 0.003, "loss": 4.156, "step": 5090 }, { "epoch": 0.05091, "grad_norm": 0.6761606632023955, "learning_rate": 0.003, "loss": 4.1193, "step": 5091 }, { "epoch": 0.05092, "grad_norm": 0.7124865882076785, "learning_rate": 0.003, "loss": 4.1217, "step": 5092 }, { "epoch": 0.05093, "grad_norm": 0.6974866206345641, "learning_rate": 0.003, "loss": 4.1352, "step": 5093 }, { "epoch": 0.05094, "grad_norm": 0.6044579073369937, "learning_rate": 0.003, "loss": 4.0943, "step": 5094 }, { "epoch": 0.05095, "grad_norm": 0.6081394607788756, "learning_rate": 0.003, "loss": 4.1312, "step": 5095 }, { "epoch": 0.05096, "grad_norm": 0.529589673235904, "learning_rate": 0.003, "loss": 4.1165, "step": 5096 }, { "epoch": 0.05097, "grad_norm": 0.5564475574602485, "learning_rate": 0.003, "loss": 4.1129, "step": 5097 }, { "epoch": 0.05098, "grad_norm": 0.6408190676261977, "learning_rate": 0.003, "loss": 4.1069, "step": 5098 }, { "epoch": 0.05099, "grad_norm": 0.6723429647324713, "learning_rate": 0.003, "loss": 4.0863, "step": 5099 }, { "epoch": 0.051, "grad_norm": 0.8300243600705887, "learning_rate": 0.003, "loss": 4.1109, "step": 5100 }, { "epoch": 0.05101, "grad_norm": 1.1968215120059336, "learning_rate": 0.003, "loss": 4.1589, "step": 5101 }, { "epoch": 0.05102, "grad_norm": 0.9163396896342757, "learning_rate": 0.003, "loss": 4.1289, "step": 5102 }, { "epoch": 0.05103, "grad_norm": 0.6393734271585462, "learning_rate": 0.003, "loss": 4.1356, "step": 5103 }, { "epoch": 0.05104, "grad_norm": 0.6360132483972007, "learning_rate": 0.003, "loss": 4.1164, "step": 5104 }, { "epoch": 0.05105, "grad_norm": 0.7716876792159864, "learning_rate": 0.003, "loss": 4.1071, "step": 5105 }, { "epoch": 0.05106, "grad_norm": 0.8361810564307343, "learning_rate": 0.003, "loss": 4.088, "step": 5106 }, { "epoch": 0.05107, "grad_norm": 0.8838910237762364, "learning_rate": 0.003, "loss": 4.1191, "step": 5107 }, { "epoch": 0.05108, "grad_norm": 0.7935879173083066, "learning_rate": 0.003, "loss": 4.1669, "step": 5108 }, { "epoch": 0.05109, "grad_norm": 0.8192023142285442, "learning_rate": 0.003, "loss": 4.1093, "step": 5109 }, { "epoch": 0.0511, "grad_norm": 0.843755803433391, "learning_rate": 0.003, "loss": 4.1448, "step": 5110 }, { "epoch": 0.05111, "grad_norm": 0.7470385931001929, "learning_rate": 0.003, "loss": 4.155, "step": 5111 }, { "epoch": 0.05112, "grad_norm": 0.6896006523029916, "learning_rate": 0.003, "loss": 4.1399, "step": 5112 }, { "epoch": 0.05113, "grad_norm": 0.7291351113373098, "learning_rate": 0.003, "loss": 4.1297, "step": 5113 }, { "epoch": 0.05114, "grad_norm": 0.6627016065501992, "learning_rate": 0.003, "loss": 4.1057, "step": 5114 }, { "epoch": 0.05115, "grad_norm": 0.6079619066030912, "learning_rate": 0.003, "loss": 4.1316, "step": 5115 }, { "epoch": 0.05116, "grad_norm": 0.6260970910935404, "learning_rate": 0.003, "loss": 4.1393, "step": 5116 }, { "epoch": 0.05117, "grad_norm": 0.6494564538670361, "learning_rate": 0.003, "loss": 4.1193, "step": 5117 }, { "epoch": 0.05118, "grad_norm": 0.6314756962030549, "learning_rate": 0.003, "loss": 4.119, "step": 5118 }, { "epoch": 0.05119, "grad_norm": 0.621412047113099, "learning_rate": 0.003, "loss": 4.101, "step": 5119 }, { "epoch": 0.0512, "grad_norm": 0.6805129862001599, "learning_rate": 0.003, "loss": 4.0929, "step": 5120 }, { "epoch": 0.05121, "grad_norm": 0.6678436168391524, "learning_rate": 0.003, "loss": 4.1117, "step": 5121 }, { "epoch": 0.05122, "grad_norm": 0.6883310547366032, "learning_rate": 0.003, "loss": 4.1009, "step": 5122 }, { "epoch": 0.05123, "grad_norm": 0.6801681468514705, "learning_rate": 0.003, "loss": 4.1157, "step": 5123 }, { "epoch": 0.05124, "grad_norm": 0.6407786390586026, "learning_rate": 0.003, "loss": 4.154, "step": 5124 }, { "epoch": 0.05125, "grad_norm": 0.616883516629288, "learning_rate": 0.003, "loss": 4.122, "step": 5125 }, { "epoch": 0.05126, "grad_norm": 0.7827204877464654, "learning_rate": 0.003, "loss": 4.1224, "step": 5126 }, { "epoch": 0.05127, "grad_norm": 0.8837164038749966, "learning_rate": 0.003, "loss": 4.1311, "step": 5127 }, { "epoch": 0.05128, "grad_norm": 0.8294720258543833, "learning_rate": 0.003, "loss": 4.1466, "step": 5128 }, { "epoch": 0.05129, "grad_norm": 0.8659985985101629, "learning_rate": 0.003, "loss": 4.1179, "step": 5129 }, { "epoch": 0.0513, "grad_norm": 0.9120789657636011, "learning_rate": 0.003, "loss": 4.111, "step": 5130 }, { "epoch": 0.05131, "grad_norm": 1.071986933837498, "learning_rate": 0.003, "loss": 4.1629, "step": 5131 }, { "epoch": 0.05132, "grad_norm": 1.0458562389037795, "learning_rate": 0.003, "loss": 4.1506, "step": 5132 }, { "epoch": 0.05133, "grad_norm": 0.9786133915152717, "learning_rate": 0.003, "loss": 4.1558, "step": 5133 }, { "epoch": 0.05134, "grad_norm": 0.9697734611114633, "learning_rate": 0.003, "loss": 4.1714, "step": 5134 }, { "epoch": 0.05135, "grad_norm": 0.9363895927818596, "learning_rate": 0.003, "loss": 4.1397, "step": 5135 }, { "epoch": 0.05136, "grad_norm": 1.0170683776594798, "learning_rate": 0.003, "loss": 4.125, "step": 5136 }, { "epoch": 0.05137, "grad_norm": 1.1145412465734708, "learning_rate": 0.003, "loss": 4.1715, "step": 5137 }, { "epoch": 0.05138, "grad_norm": 0.9941607682684069, "learning_rate": 0.003, "loss": 4.1663, "step": 5138 }, { "epoch": 0.05139, "grad_norm": 0.978207963075468, "learning_rate": 0.003, "loss": 4.1558, "step": 5139 }, { "epoch": 0.0514, "grad_norm": 1.0158981617955298, "learning_rate": 0.003, "loss": 4.1585, "step": 5140 }, { "epoch": 0.05141, "grad_norm": 0.8431378615881333, "learning_rate": 0.003, "loss": 4.2033, "step": 5141 }, { "epoch": 0.05142, "grad_norm": 0.9258386606688979, "learning_rate": 0.003, "loss": 4.1465, "step": 5142 }, { "epoch": 0.05143, "grad_norm": 0.8809664967212562, "learning_rate": 0.003, "loss": 4.1529, "step": 5143 }, { "epoch": 0.05144, "grad_norm": 0.808685397208462, "learning_rate": 0.003, "loss": 4.136, "step": 5144 }, { "epoch": 0.05145, "grad_norm": 0.7632108507977365, "learning_rate": 0.003, "loss": 4.1352, "step": 5145 }, { "epoch": 0.05146, "grad_norm": 0.6541780509288703, "learning_rate": 0.003, "loss": 4.1198, "step": 5146 }, { "epoch": 0.05147, "grad_norm": 0.5225402009574097, "learning_rate": 0.003, "loss": 4.1424, "step": 5147 }, { "epoch": 0.05148, "grad_norm": 0.4857663422658965, "learning_rate": 0.003, "loss": 4.1408, "step": 5148 }, { "epoch": 0.05149, "grad_norm": 0.5849142561064581, "learning_rate": 0.003, "loss": 4.1136, "step": 5149 }, { "epoch": 0.0515, "grad_norm": 0.6123859001036767, "learning_rate": 0.003, "loss": 4.151, "step": 5150 }, { "epoch": 0.05151, "grad_norm": 0.7026699600678006, "learning_rate": 0.003, "loss": 4.1142, "step": 5151 }, { "epoch": 0.05152, "grad_norm": 0.6504209042889696, "learning_rate": 0.003, "loss": 4.1508, "step": 5152 }, { "epoch": 0.05153, "grad_norm": 0.6252489466134844, "learning_rate": 0.003, "loss": 4.124, "step": 5153 }, { "epoch": 0.05154, "grad_norm": 0.6371096455455558, "learning_rate": 0.003, "loss": 4.1347, "step": 5154 }, { "epoch": 0.05155, "grad_norm": 0.5613653424439898, "learning_rate": 0.003, "loss": 4.1327, "step": 5155 }, { "epoch": 0.05156, "grad_norm": 0.5387099035222069, "learning_rate": 0.003, "loss": 4.1562, "step": 5156 }, { "epoch": 0.05157, "grad_norm": 0.5796148701609515, "learning_rate": 0.003, "loss": 4.1277, "step": 5157 }, { "epoch": 0.05158, "grad_norm": 0.6280103262144628, "learning_rate": 0.003, "loss": 4.1224, "step": 5158 }, { "epoch": 0.05159, "grad_norm": 0.713561476333827, "learning_rate": 0.003, "loss": 4.1053, "step": 5159 }, { "epoch": 0.0516, "grad_norm": 0.7440717469043273, "learning_rate": 0.003, "loss": 4.1151, "step": 5160 }, { "epoch": 0.05161, "grad_norm": 0.693854318455779, "learning_rate": 0.003, "loss": 4.0878, "step": 5161 }, { "epoch": 0.05162, "grad_norm": 0.5713907895082873, "learning_rate": 0.003, "loss": 4.1132, "step": 5162 }, { "epoch": 0.05163, "grad_norm": 0.6389112592034603, "learning_rate": 0.003, "loss": 4.1355, "step": 5163 }, { "epoch": 0.05164, "grad_norm": 0.644621023044623, "learning_rate": 0.003, "loss": 4.1119, "step": 5164 }, { "epoch": 0.05165, "grad_norm": 0.6418172439331881, "learning_rate": 0.003, "loss": 4.1297, "step": 5165 }, { "epoch": 0.05166, "grad_norm": 0.6129087318474767, "learning_rate": 0.003, "loss": 4.1266, "step": 5166 }, { "epoch": 0.05167, "grad_norm": 0.6032353120193177, "learning_rate": 0.003, "loss": 4.1341, "step": 5167 }, { "epoch": 0.05168, "grad_norm": 0.5247219196682845, "learning_rate": 0.003, "loss": 4.1612, "step": 5168 }, { "epoch": 0.05169, "grad_norm": 0.5175916837961249, "learning_rate": 0.003, "loss": 4.1113, "step": 5169 }, { "epoch": 0.0517, "grad_norm": 0.6351381415951768, "learning_rate": 0.003, "loss": 4.0807, "step": 5170 }, { "epoch": 0.05171, "grad_norm": 0.9707906373263301, "learning_rate": 0.003, "loss": 4.121, "step": 5171 }, { "epoch": 0.05172, "grad_norm": 1.3871012373265488, "learning_rate": 0.003, "loss": 4.1567, "step": 5172 }, { "epoch": 0.05173, "grad_norm": 0.6859878951673775, "learning_rate": 0.003, "loss": 4.1484, "step": 5173 }, { "epoch": 0.05174, "grad_norm": 0.688770612830069, "learning_rate": 0.003, "loss": 4.1416, "step": 5174 }, { "epoch": 0.05175, "grad_norm": 0.7348674861956664, "learning_rate": 0.003, "loss": 4.151, "step": 5175 }, { "epoch": 0.05176, "grad_norm": 0.7293852449611181, "learning_rate": 0.003, "loss": 4.1142, "step": 5176 }, { "epoch": 0.05177, "grad_norm": 0.6564692961913474, "learning_rate": 0.003, "loss": 4.1059, "step": 5177 }, { "epoch": 0.05178, "grad_norm": 0.6743163086709401, "learning_rate": 0.003, "loss": 4.104, "step": 5178 }, { "epoch": 0.05179, "grad_norm": 0.7124422218567592, "learning_rate": 0.003, "loss": 4.1241, "step": 5179 }, { "epoch": 0.0518, "grad_norm": 0.7927407152562181, "learning_rate": 0.003, "loss": 4.1251, "step": 5180 }, { "epoch": 0.05181, "grad_norm": 0.7257533748695947, "learning_rate": 0.003, "loss": 4.1324, "step": 5181 }, { "epoch": 0.05182, "grad_norm": 0.6857538651077083, "learning_rate": 0.003, "loss": 4.1461, "step": 5182 }, { "epoch": 0.05183, "grad_norm": 0.6165169903841881, "learning_rate": 0.003, "loss": 4.1098, "step": 5183 }, { "epoch": 0.05184, "grad_norm": 0.5646217873608306, "learning_rate": 0.003, "loss": 4.1087, "step": 5184 }, { "epoch": 0.05185, "grad_norm": 0.6765417920434729, "learning_rate": 0.003, "loss": 4.1155, "step": 5185 }, { "epoch": 0.05186, "grad_norm": 0.763739343016591, "learning_rate": 0.003, "loss": 4.1271, "step": 5186 }, { "epoch": 0.05187, "grad_norm": 0.8133701961716747, "learning_rate": 0.003, "loss": 4.1673, "step": 5187 }, { "epoch": 0.05188, "grad_norm": 0.9136528986209898, "learning_rate": 0.003, "loss": 4.1414, "step": 5188 }, { "epoch": 0.05189, "grad_norm": 1.1309518830748815, "learning_rate": 0.003, "loss": 4.1235, "step": 5189 }, { "epoch": 0.0519, "grad_norm": 1.1084320754378056, "learning_rate": 0.003, "loss": 4.1316, "step": 5190 }, { "epoch": 0.05191, "grad_norm": 0.7822117289248245, "learning_rate": 0.003, "loss": 4.1232, "step": 5191 }, { "epoch": 0.05192, "grad_norm": 0.6054235823299604, "learning_rate": 0.003, "loss": 4.1413, "step": 5192 }, { "epoch": 0.05193, "grad_norm": 0.7272923940874335, "learning_rate": 0.003, "loss": 4.1392, "step": 5193 }, { "epoch": 0.05194, "grad_norm": 1.053286129123416, "learning_rate": 0.003, "loss": 4.1104, "step": 5194 }, { "epoch": 0.05195, "grad_norm": 1.0774389375766364, "learning_rate": 0.003, "loss": 4.1542, "step": 5195 }, { "epoch": 0.05196, "grad_norm": 0.7935135429431, "learning_rate": 0.003, "loss": 4.1262, "step": 5196 }, { "epoch": 0.05197, "grad_norm": 0.6765899030219741, "learning_rate": 0.003, "loss": 4.146, "step": 5197 }, { "epoch": 0.05198, "grad_norm": 0.7288470154808702, "learning_rate": 0.003, "loss": 4.1197, "step": 5198 }, { "epoch": 0.05199, "grad_norm": 0.7762676094179997, "learning_rate": 0.003, "loss": 4.1066, "step": 5199 }, { "epoch": 0.052, "grad_norm": 0.8517147173377261, "learning_rate": 0.003, "loss": 4.1432, "step": 5200 }, { "epoch": 0.05201, "grad_norm": 0.7812345967785519, "learning_rate": 0.003, "loss": 4.1391, "step": 5201 }, { "epoch": 0.05202, "grad_norm": 0.7915734925208033, "learning_rate": 0.003, "loss": 4.1227, "step": 5202 }, { "epoch": 0.05203, "grad_norm": 0.779004263274718, "learning_rate": 0.003, "loss": 4.1542, "step": 5203 }, { "epoch": 0.05204, "grad_norm": 0.8440004396702558, "learning_rate": 0.003, "loss": 4.143, "step": 5204 }, { "epoch": 0.05205, "grad_norm": 0.8044224640384809, "learning_rate": 0.003, "loss": 4.1498, "step": 5205 }, { "epoch": 0.05206, "grad_norm": 0.830818456670086, "learning_rate": 0.003, "loss": 4.1336, "step": 5206 }, { "epoch": 0.05207, "grad_norm": 0.840221891416657, "learning_rate": 0.003, "loss": 4.1185, "step": 5207 }, { "epoch": 0.05208, "grad_norm": 0.8228927798697641, "learning_rate": 0.003, "loss": 4.1353, "step": 5208 }, { "epoch": 0.05209, "grad_norm": 0.7538723463798628, "learning_rate": 0.003, "loss": 4.1278, "step": 5209 }, { "epoch": 0.0521, "grad_norm": 0.7269914660347427, "learning_rate": 0.003, "loss": 4.0982, "step": 5210 }, { "epoch": 0.05211, "grad_norm": 0.6396845788398585, "learning_rate": 0.003, "loss": 4.121, "step": 5211 }, { "epoch": 0.05212, "grad_norm": 0.6578986704720922, "learning_rate": 0.003, "loss": 4.1682, "step": 5212 }, { "epoch": 0.05213, "grad_norm": 0.6665917425071297, "learning_rate": 0.003, "loss": 4.124, "step": 5213 }, { "epoch": 0.05214, "grad_norm": 0.6696745228858743, "learning_rate": 0.003, "loss": 4.1155, "step": 5214 }, { "epoch": 0.05215, "grad_norm": 0.8360803674253388, "learning_rate": 0.003, "loss": 4.1145, "step": 5215 }, { "epoch": 0.05216, "grad_norm": 1.0234029588407536, "learning_rate": 0.003, "loss": 4.1326, "step": 5216 }, { "epoch": 0.05217, "grad_norm": 0.8990465132650213, "learning_rate": 0.003, "loss": 4.1318, "step": 5217 }, { "epoch": 0.05218, "grad_norm": 0.7789964318595968, "learning_rate": 0.003, "loss": 4.1198, "step": 5218 }, { "epoch": 0.05219, "grad_norm": 0.7298439479090568, "learning_rate": 0.003, "loss": 4.1481, "step": 5219 }, { "epoch": 0.0522, "grad_norm": 0.8002872146937169, "learning_rate": 0.003, "loss": 4.115, "step": 5220 }, { "epoch": 0.05221, "grad_norm": 0.8077845914697496, "learning_rate": 0.003, "loss": 4.1263, "step": 5221 }, { "epoch": 0.05222, "grad_norm": 0.8937695724547757, "learning_rate": 0.003, "loss": 4.1376, "step": 5222 }, { "epoch": 0.05223, "grad_norm": 0.8660989097279149, "learning_rate": 0.003, "loss": 4.1496, "step": 5223 }, { "epoch": 0.05224, "grad_norm": 0.8551329970671594, "learning_rate": 0.003, "loss": 4.1288, "step": 5224 }, { "epoch": 0.05225, "grad_norm": 0.7915686333783567, "learning_rate": 0.003, "loss": 4.1505, "step": 5225 }, { "epoch": 0.05226, "grad_norm": 0.8295912864972166, "learning_rate": 0.003, "loss": 4.1414, "step": 5226 }, { "epoch": 0.05227, "grad_norm": 0.7362810272638042, "learning_rate": 0.003, "loss": 4.1637, "step": 5227 }, { "epoch": 0.05228, "grad_norm": 0.7001627229979629, "learning_rate": 0.003, "loss": 4.1432, "step": 5228 }, { "epoch": 0.05229, "grad_norm": 0.7602202740566725, "learning_rate": 0.003, "loss": 4.1086, "step": 5229 }, { "epoch": 0.0523, "grad_norm": 0.6912471723136686, "learning_rate": 0.003, "loss": 4.0986, "step": 5230 }, { "epoch": 0.05231, "grad_norm": 0.7018975883307987, "learning_rate": 0.003, "loss": 4.1343, "step": 5231 }, { "epoch": 0.05232, "grad_norm": 0.717854420616106, "learning_rate": 0.003, "loss": 4.1242, "step": 5232 }, { "epoch": 0.05233, "grad_norm": 0.7712695717441904, "learning_rate": 0.003, "loss": 4.1086, "step": 5233 }, { "epoch": 0.05234, "grad_norm": 0.8385680879325836, "learning_rate": 0.003, "loss": 4.1626, "step": 5234 }, { "epoch": 0.05235, "grad_norm": 0.9750232594843035, "learning_rate": 0.003, "loss": 4.1435, "step": 5235 }, { "epoch": 0.05236, "grad_norm": 0.8446359663530114, "learning_rate": 0.003, "loss": 4.1204, "step": 5236 }, { "epoch": 0.05237, "grad_norm": 0.9096810630918026, "learning_rate": 0.003, "loss": 4.1267, "step": 5237 }, { "epoch": 0.05238, "grad_norm": 0.9079420042262413, "learning_rate": 0.003, "loss": 4.1189, "step": 5238 }, { "epoch": 0.05239, "grad_norm": 0.8141480866664513, "learning_rate": 0.003, "loss": 4.1388, "step": 5239 }, { "epoch": 0.0524, "grad_norm": 0.7056408832787606, "learning_rate": 0.003, "loss": 4.1448, "step": 5240 }, { "epoch": 0.05241, "grad_norm": 0.6654549467738141, "learning_rate": 0.003, "loss": 4.1475, "step": 5241 }, { "epoch": 0.05242, "grad_norm": 0.7294744123105587, "learning_rate": 0.003, "loss": 4.1449, "step": 5242 }, { "epoch": 0.05243, "grad_norm": 0.7203093785089144, "learning_rate": 0.003, "loss": 4.1366, "step": 5243 }, { "epoch": 0.05244, "grad_norm": 0.6894002827090067, "learning_rate": 0.003, "loss": 4.1125, "step": 5244 }, { "epoch": 0.05245, "grad_norm": 0.6674737328058699, "learning_rate": 0.003, "loss": 4.1193, "step": 5245 }, { "epoch": 0.05246, "grad_norm": 0.6789552091924481, "learning_rate": 0.003, "loss": 4.1241, "step": 5246 }, { "epoch": 0.05247, "grad_norm": 0.7027379397907318, "learning_rate": 0.003, "loss": 4.1241, "step": 5247 }, { "epoch": 0.05248, "grad_norm": 0.7672915233226524, "learning_rate": 0.003, "loss": 4.1316, "step": 5248 }, { "epoch": 0.05249, "grad_norm": 0.8600082125105806, "learning_rate": 0.003, "loss": 4.1487, "step": 5249 }, { "epoch": 0.0525, "grad_norm": 0.8053204409517745, "learning_rate": 0.003, "loss": 4.1378, "step": 5250 }, { "epoch": 0.05251, "grad_norm": 0.7017613490500446, "learning_rate": 0.003, "loss": 4.1246, "step": 5251 }, { "epoch": 0.05252, "grad_norm": 0.6533051938670863, "learning_rate": 0.003, "loss": 4.1484, "step": 5252 }, { "epoch": 0.05253, "grad_norm": 0.6002345144376094, "learning_rate": 0.003, "loss": 4.1137, "step": 5253 }, { "epoch": 0.05254, "grad_norm": 0.5673372560372355, "learning_rate": 0.003, "loss": 4.0996, "step": 5254 }, { "epoch": 0.05255, "grad_norm": 0.5743257458749801, "learning_rate": 0.003, "loss": 4.1015, "step": 5255 }, { "epoch": 0.05256, "grad_norm": 0.5639251305320502, "learning_rate": 0.003, "loss": 4.1283, "step": 5256 }, { "epoch": 0.05257, "grad_norm": 0.5046284771633743, "learning_rate": 0.003, "loss": 4.1166, "step": 5257 }, { "epoch": 0.05258, "grad_norm": 0.474168541210226, "learning_rate": 0.003, "loss": 4.1167, "step": 5258 }, { "epoch": 0.05259, "grad_norm": 0.47547400835554277, "learning_rate": 0.003, "loss": 4.1012, "step": 5259 }, { "epoch": 0.0526, "grad_norm": 0.5164451968626312, "learning_rate": 0.003, "loss": 4.1303, "step": 5260 }, { "epoch": 0.05261, "grad_norm": 0.5250991374606924, "learning_rate": 0.003, "loss": 4.0976, "step": 5261 }, { "epoch": 0.05262, "grad_norm": 0.6851820393823259, "learning_rate": 0.003, "loss": 4.0927, "step": 5262 }, { "epoch": 0.05263, "grad_norm": 0.9977081433022779, "learning_rate": 0.003, "loss": 4.1511, "step": 5263 }, { "epoch": 0.05264, "grad_norm": 1.268054358654276, "learning_rate": 0.003, "loss": 4.159, "step": 5264 }, { "epoch": 0.05265, "grad_norm": 0.5767306016131388, "learning_rate": 0.003, "loss": 4.1057, "step": 5265 }, { "epoch": 0.05266, "grad_norm": 0.7952501578678153, "learning_rate": 0.003, "loss": 4.0961, "step": 5266 }, { "epoch": 0.05267, "grad_norm": 1.0070088823226309, "learning_rate": 0.003, "loss": 4.152, "step": 5267 }, { "epoch": 0.05268, "grad_norm": 0.9932744688310504, "learning_rate": 0.003, "loss": 4.1218, "step": 5268 }, { "epoch": 0.05269, "grad_norm": 1.0388601409327192, "learning_rate": 0.003, "loss": 4.1029, "step": 5269 }, { "epoch": 0.0527, "grad_norm": 0.956313275292693, "learning_rate": 0.003, "loss": 4.1371, "step": 5270 }, { "epoch": 0.05271, "grad_norm": 1.0292710271964594, "learning_rate": 0.003, "loss": 4.1075, "step": 5271 }, { "epoch": 0.05272, "grad_norm": 0.8930084447233932, "learning_rate": 0.003, "loss": 4.1636, "step": 5272 }, { "epoch": 0.05273, "grad_norm": 0.9405712293541173, "learning_rate": 0.003, "loss": 4.1478, "step": 5273 }, { "epoch": 0.05274, "grad_norm": 1.0695246435294683, "learning_rate": 0.003, "loss": 4.1665, "step": 5274 }, { "epoch": 0.05275, "grad_norm": 0.9058378894957746, "learning_rate": 0.003, "loss": 4.135, "step": 5275 }, { "epoch": 0.05276, "grad_norm": 0.8493287470524594, "learning_rate": 0.003, "loss": 4.1264, "step": 5276 }, { "epoch": 0.05277, "grad_norm": 0.9420450553310487, "learning_rate": 0.003, "loss": 4.1198, "step": 5277 }, { "epoch": 0.05278, "grad_norm": 1.1979114145894763, "learning_rate": 0.003, "loss": 4.1498, "step": 5278 }, { "epoch": 0.05279, "grad_norm": 0.9851879183907458, "learning_rate": 0.003, "loss": 4.1443, "step": 5279 }, { "epoch": 0.0528, "grad_norm": 0.9906333358429689, "learning_rate": 0.003, "loss": 4.1431, "step": 5280 }, { "epoch": 0.05281, "grad_norm": 1.0018246671795275, "learning_rate": 0.003, "loss": 4.1561, "step": 5281 }, { "epoch": 0.05282, "grad_norm": 0.9088563498086558, "learning_rate": 0.003, "loss": 4.1483, "step": 5282 }, { "epoch": 0.05283, "grad_norm": 0.8255004872484286, "learning_rate": 0.003, "loss": 4.1314, "step": 5283 }, { "epoch": 0.05284, "grad_norm": 0.7395222306618988, "learning_rate": 0.003, "loss": 4.1586, "step": 5284 }, { "epoch": 0.05285, "grad_norm": 0.7519757069394422, "learning_rate": 0.003, "loss": 4.1084, "step": 5285 }, { "epoch": 0.05286, "grad_norm": 0.8309929954214635, "learning_rate": 0.003, "loss": 4.1763, "step": 5286 }, { "epoch": 0.05287, "grad_norm": 0.8541420130148721, "learning_rate": 0.003, "loss": 4.1453, "step": 5287 }, { "epoch": 0.05288, "grad_norm": 0.9074648300179621, "learning_rate": 0.003, "loss": 4.1262, "step": 5288 }, { "epoch": 0.05289, "grad_norm": 0.934169321459613, "learning_rate": 0.003, "loss": 4.1567, "step": 5289 }, { "epoch": 0.0529, "grad_norm": 0.8614434047866291, "learning_rate": 0.003, "loss": 4.1499, "step": 5290 }, { "epoch": 0.05291, "grad_norm": 0.7851323632915874, "learning_rate": 0.003, "loss": 4.1592, "step": 5291 }, { "epoch": 0.05292, "grad_norm": 0.8846666231031659, "learning_rate": 0.003, "loss": 4.1535, "step": 5292 }, { "epoch": 0.05293, "grad_norm": 0.8272989171480324, "learning_rate": 0.003, "loss": 4.151, "step": 5293 }, { "epoch": 0.05294, "grad_norm": 0.7925051182542472, "learning_rate": 0.003, "loss": 4.1348, "step": 5294 }, { "epoch": 0.05295, "grad_norm": 0.8458687864133015, "learning_rate": 0.003, "loss": 4.1485, "step": 5295 }, { "epoch": 0.05296, "grad_norm": 0.7329229801657224, "learning_rate": 0.003, "loss": 4.1264, "step": 5296 }, { "epoch": 0.05297, "grad_norm": 0.590148048628958, "learning_rate": 0.003, "loss": 4.1241, "step": 5297 }, { "epoch": 0.05298, "grad_norm": 0.5633959457343373, "learning_rate": 0.003, "loss": 4.1403, "step": 5298 }, { "epoch": 0.05299, "grad_norm": 0.6047041287435936, "learning_rate": 0.003, "loss": 4.1167, "step": 5299 }, { "epoch": 0.053, "grad_norm": 0.6026083589407333, "learning_rate": 0.003, "loss": 4.1334, "step": 5300 }, { "epoch": 0.05301, "grad_norm": 0.6614621931672765, "learning_rate": 0.003, "loss": 4.1486, "step": 5301 }, { "epoch": 0.05302, "grad_norm": 0.667460840133991, "learning_rate": 0.003, "loss": 4.1218, "step": 5302 }, { "epoch": 0.05303, "grad_norm": 0.7178043539507469, "learning_rate": 0.003, "loss": 4.1337, "step": 5303 }, { "epoch": 0.05304, "grad_norm": 0.7057622216619723, "learning_rate": 0.003, "loss": 4.1228, "step": 5304 }, { "epoch": 0.05305, "grad_norm": 0.6395996129884405, "learning_rate": 0.003, "loss": 4.1282, "step": 5305 }, { "epoch": 0.05306, "grad_norm": 0.6595661147403964, "learning_rate": 0.003, "loss": 4.1362, "step": 5306 }, { "epoch": 0.05307, "grad_norm": 0.5697470297170749, "learning_rate": 0.003, "loss": 4.1297, "step": 5307 }, { "epoch": 0.05308, "grad_norm": 0.6269529857264409, "learning_rate": 0.003, "loss": 4.1412, "step": 5308 }, { "epoch": 0.05309, "grad_norm": 0.7109602470701628, "learning_rate": 0.003, "loss": 4.1076, "step": 5309 }, { "epoch": 0.0531, "grad_norm": 0.8779710810213476, "learning_rate": 0.003, "loss": 4.1214, "step": 5310 }, { "epoch": 0.05311, "grad_norm": 0.9865994447177229, "learning_rate": 0.003, "loss": 4.161, "step": 5311 }, { "epoch": 0.05312, "grad_norm": 0.928167243969855, "learning_rate": 0.003, "loss": 4.1182, "step": 5312 }, { "epoch": 0.05313, "grad_norm": 0.65760273068672, "learning_rate": 0.003, "loss": 4.0955, "step": 5313 }, { "epoch": 0.05314, "grad_norm": 0.6095510092870687, "learning_rate": 0.003, "loss": 4.1245, "step": 5314 }, { "epoch": 0.05315, "grad_norm": 0.7424516970237663, "learning_rate": 0.003, "loss": 4.1331, "step": 5315 }, { "epoch": 0.05316, "grad_norm": 0.8440091626661397, "learning_rate": 0.003, "loss": 4.1369, "step": 5316 }, { "epoch": 0.05317, "grad_norm": 0.9378368997983185, "learning_rate": 0.003, "loss": 4.1418, "step": 5317 }, { "epoch": 0.05318, "grad_norm": 0.8048248745024449, "learning_rate": 0.003, "loss": 4.108, "step": 5318 }, { "epoch": 0.05319, "grad_norm": 0.7236000931693785, "learning_rate": 0.003, "loss": 4.1133, "step": 5319 }, { "epoch": 0.0532, "grad_norm": 0.6461031092410674, "learning_rate": 0.003, "loss": 4.1188, "step": 5320 }, { "epoch": 0.05321, "grad_norm": 0.7679854561007805, "learning_rate": 0.003, "loss": 4.1233, "step": 5321 }, { "epoch": 0.05322, "grad_norm": 0.7986946965085385, "learning_rate": 0.003, "loss": 4.1448, "step": 5322 }, { "epoch": 0.05323, "grad_norm": 0.7153375585884025, "learning_rate": 0.003, "loss": 4.1679, "step": 5323 }, { "epoch": 0.05324, "grad_norm": 0.749330813072559, "learning_rate": 0.003, "loss": 4.1102, "step": 5324 }, { "epoch": 0.05325, "grad_norm": 0.832758253039198, "learning_rate": 0.003, "loss": 4.1332, "step": 5325 }, { "epoch": 0.05326, "grad_norm": 0.7759697554212088, "learning_rate": 0.003, "loss": 4.1525, "step": 5326 }, { "epoch": 0.05327, "grad_norm": 0.7232266024198855, "learning_rate": 0.003, "loss": 4.1297, "step": 5327 }, { "epoch": 0.05328, "grad_norm": 0.7836949776849659, "learning_rate": 0.003, "loss": 4.1406, "step": 5328 }, { "epoch": 0.05329, "grad_norm": 0.7124265190034871, "learning_rate": 0.003, "loss": 4.1226, "step": 5329 }, { "epoch": 0.0533, "grad_norm": 0.7073201953779398, "learning_rate": 0.003, "loss": 4.1422, "step": 5330 }, { "epoch": 0.05331, "grad_norm": 0.6517329606524963, "learning_rate": 0.003, "loss": 4.1286, "step": 5331 }, { "epoch": 0.05332, "grad_norm": 0.7675020790673767, "learning_rate": 0.003, "loss": 4.1486, "step": 5332 }, { "epoch": 0.05333, "grad_norm": 0.8204477024627048, "learning_rate": 0.003, "loss": 4.1252, "step": 5333 }, { "epoch": 0.05334, "grad_norm": 0.7022074746197576, "learning_rate": 0.003, "loss": 4.108, "step": 5334 }, { "epoch": 0.05335, "grad_norm": 0.6275459710277902, "learning_rate": 0.003, "loss": 4.1391, "step": 5335 }, { "epoch": 0.05336, "grad_norm": 0.6160252429147814, "learning_rate": 0.003, "loss": 4.147, "step": 5336 }, { "epoch": 0.05337, "grad_norm": 0.6645784185963264, "learning_rate": 0.003, "loss": 4.1336, "step": 5337 }, { "epoch": 0.05338, "grad_norm": 0.755147688591901, "learning_rate": 0.003, "loss": 4.1201, "step": 5338 }, { "epoch": 0.05339, "grad_norm": 0.8974685697024821, "learning_rate": 0.003, "loss": 4.1117, "step": 5339 }, { "epoch": 0.0534, "grad_norm": 1.0247940652403054, "learning_rate": 0.003, "loss": 4.1358, "step": 5340 }, { "epoch": 0.05341, "grad_norm": 0.8398376000828047, "learning_rate": 0.003, "loss": 4.136, "step": 5341 }, { "epoch": 0.05342, "grad_norm": 0.6251041860327448, "learning_rate": 0.003, "loss": 4.1176, "step": 5342 }, { "epoch": 0.05343, "grad_norm": 0.6653717662444403, "learning_rate": 0.003, "loss": 4.1344, "step": 5343 }, { "epoch": 0.05344, "grad_norm": 0.7626824156057184, "learning_rate": 0.003, "loss": 4.0965, "step": 5344 }, { "epoch": 0.05345, "grad_norm": 0.7412027124944542, "learning_rate": 0.003, "loss": 4.1092, "step": 5345 }, { "epoch": 0.05346, "grad_norm": 0.8204551363999621, "learning_rate": 0.003, "loss": 4.1275, "step": 5346 }, { "epoch": 0.05347, "grad_norm": 0.8691311780811011, "learning_rate": 0.003, "loss": 4.1195, "step": 5347 }, { "epoch": 0.05348, "grad_norm": 0.8297575781225915, "learning_rate": 0.003, "loss": 4.1173, "step": 5348 }, { "epoch": 0.05349, "grad_norm": 0.7800308176994237, "learning_rate": 0.003, "loss": 4.1248, "step": 5349 }, { "epoch": 0.0535, "grad_norm": 0.7428195062452232, "learning_rate": 0.003, "loss": 4.1134, "step": 5350 }, { "epoch": 0.05351, "grad_norm": 0.698577766705363, "learning_rate": 0.003, "loss": 4.1171, "step": 5351 }, { "epoch": 0.05352, "grad_norm": 0.7463247671330128, "learning_rate": 0.003, "loss": 4.114, "step": 5352 }, { "epoch": 0.05353, "grad_norm": 0.8427380940568103, "learning_rate": 0.003, "loss": 4.1292, "step": 5353 }, { "epoch": 0.05354, "grad_norm": 0.8580345957059121, "learning_rate": 0.003, "loss": 4.1189, "step": 5354 }, { "epoch": 0.05355, "grad_norm": 0.7981948995131939, "learning_rate": 0.003, "loss": 4.1077, "step": 5355 }, { "epoch": 0.05356, "grad_norm": 0.9987056987093543, "learning_rate": 0.003, "loss": 4.1536, "step": 5356 }, { "epoch": 0.05357, "grad_norm": 1.0863630771998207, "learning_rate": 0.003, "loss": 4.1449, "step": 5357 }, { "epoch": 0.05358, "grad_norm": 0.8873987298102782, "learning_rate": 0.003, "loss": 4.1316, "step": 5358 }, { "epoch": 0.05359, "grad_norm": 0.8982951690299781, "learning_rate": 0.003, "loss": 4.1411, "step": 5359 }, { "epoch": 0.0536, "grad_norm": 0.7420851849216799, "learning_rate": 0.003, "loss": 4.1329, "step": 5360 }, { "epoch": 0.05361, "grad_norm": 0.6478690518338635, "learning_rate": 0.003, "loss": 4.1369, "step": 5361 }, { "epoch": 0.05362, "grad_norm": 0.7363423400186474, "learning_rate": 0.003, "loss": 4.1501, "step": 5362 }, { "epoch": 0.05363, "grad_norm": 0.8098849889259154, "learning_rate": 0.003, "loss": 4.1558, "step": 5363 }, { "epoch": 0.05364, "grad_norm": 0.8901440262592046, "learning_rate": 0.003, "loss": 4.152, "step": 5364 }, { "epoch": 0.05365, "grad_norm": 1.0380282918537116, "learning_rate": 0.003, "loss": 4.1511, "step": 5365 }, { "epoch": 0.05366, "grad_norm": 1.0788058766201782, "learning_rate": 0.003, "loss": 4.1375, "step": 5366 }, { "epoch": 0.05367, "grad_norm": 0.7608583479176579, "learning_rate": 0.003, "loss": 4.1254, "step": 5367 }, { "epoch": 0.05368, "grad_norm": 0.5600149243746851, "learning_rate": 0.003, "loss": 4.1146, "step": 5368 }, { "epoch": 0.05369, "grad_norm": 0.5618528481117717, "learning_rate": 0.003, "loss": 4.1273, "step": 5369 }, { "epoch": 0.0537, "grad_norm": 0.6773018479702052, "learning_rate": 0.003, "loss": 4.1287, "step": 5370 }, { "epoch": 0.05371, "grad_norm": 0.8077481499415716, "learning_rate": 0.003, "loss": 4.1506, "step": 5371 }, { "epoch": 0.05372, "grad_norm": 0.9098981004391098, "learning_rate": 0.003, "loss": 4.1303, "step": 5372 }, { "epoch": 0.05373, "grad_norm": 0.8688252009595805, "learning_rate": 0.003, "loss": 4.1478, "step": 5373 }, { "epoch": 0.05374, "grad_norm": 0.7822281391029792, "learning_rate": 0.003, "loss": 4.1242, "step": 5374 }, { "epoch": 0.05375, "grad_norm": 0.6393823720543013, "learning_rate": 0.003, "loss": 4.1167, "step": 5375 }, { "epoch": 0.05376, "grad_norm": 0.6297681205873783, "learning_rate": 0.003, "loss": 4.1424, "step": 5376 }, { "epoch": 0.05377, "grad_norm": 0.5591158983975534, "learning_rate": 0.003, "loss": 4.1287, "step": 5377 }, { "epoch": 0.05378, "grad_norm": 0.5827833277734367, "learning_rate": 0.003, "loss": 4.1316, "step": 5378 }, { "epoch": 0.05379, "grad_norm": 0.5267920451244182, "learning_rate": 0.003, "loss": 4.1356, "step": 5379 }, { "epoch": 0.0538, "grad_norm": 0.5153469929794081, "learning_rate": 0.003, "loss": 4.1031, "step": 5380 }, { "epoch": 0.05381, "grad_norm": 0.541236643808538, "learning_rate": 0.003, "loss": 4.115, "step": 5381 }, { "epoch": 0.05382, "grad_norm": 0.5524397786242985, "learning_rate": 0.003, "loss": 4.1335, "step": 5382 }, { "epoch": 0.05383, "grad_norm": 0.5171016444028097, "learning_rate": 0.003, "loss": 4.1294, "step": 5383 }, { "epoch": 0.05384, "grad_norm": 0.4322886075458978, "learning_rate": 0.003, "loss": 4.1065, "step": 5384 }, { "epoch": 0.05385, "grad_norm": 0.45234091719568803, "learning_rate": 0.003, "loss": 4.1332, "step": 5385 }, { "epoch": 0.05386, "grad_norm": 0.4669069348140255, "learning_rate": 0.003, "loss": 4.1133, "step": 5386 }, { "epoch": 0.05387, "grad_norm": 0.505314292670269, "learning_rate": 0.003, "loss": 4.0983, "step": 5387 }, { "epoch": 0.05388, "grad_norm": 0.6661928342962524, "learning_rate": 0.003, "loss": 4.1388, "step": 5388 }, { "epoch": 0.05389, "grad_norm": 1.0017958320911928, "learning_rate": 0.003, "loss": 4.1206, "step": 5389 }, { "epoch": 0.0539, "grad_norm": 1.2992679013311437, "learning_rate": 0.003, "loss": 4.1067, "step": 5390 }, { "epoch": 0.05391, "grad_norm": 0.5736546179541971, "learning_rate": 0.003, "loss": 4.1074, "step": 5391 }, { "epoch": 0.05392, "grad_norm": 0.7440996442059347, "learning_rate": 0.003, "loss": 4.1139, "step": 5392 }, { "epoch": 0.05393, "grad_norm": 0.9950521432118898, "learning_rate": 0.003, "loss": 4.1497, "step": 5393 }, { "epoch": 0.05394, "grad_norm": 0.7875014530755975, "learning_rate": 0.003, "loss": 4.1182, "step": 5394 }, { "epoch": 0.05395, "grad_norm": 0.7254081418838102, "learning_rate": 0.003, "loss": 4.1479, "step": 5395 }, { "epoch": 0.05396, "grad_norm": 0.8612136110888624, "learning_rate": 0.003, "loss": 4.143, "step": 5396 }, { "epoch": 0.05397, "grad_norm": 0.8986673711381578, "learning_rate": 0.003, "loss": 4.122, "step": 5397 }, { "epoch": 0.05398, "grad_norm": 0.778809786786069, "learning_rate": 0.003, "loss": 4.1161, "step": 5398 }, { "epoch": 0.05399, "grad_norm": 0.7995244546793957, "learning_rate": 0.003, "loss": 4.1442, "step": 5399 }, { "epoch": 0.054, "grad_norm": 0.7896689616881526, "learning_rate": 0.003, "loss": 4.1557, "step": 5400 }, { "epoch": 0.05401, "grad_norm": 0.7804724282282718, "learning_rate": 0.003, "loss": 4.1348, "step": 5401 }, { "epoch": 0.05402, "grad_norm": 0.7709619532218206, "learning_rate": 0.003, "loss": 4.1489, "step": 5402 }, { "epoch": 0.05403, "grad_norm": 0.7494119205086632, "learning_rate": 0.003, "loss": 4.133, "step": 5403 }, { "epoch": 0.05404, "grad_norm": 0.7695850697537835, "learning_rate": 0.003, "loss": 4.128, "step": 5404 }, { "epoch": 0.05405, "grad_norm": 0.6494919985184457, "learning_rate": 0.003, "loss": 4.1251, "step": 5405 }, { "epoch": 0.05406, "grad_norm": 0.7107256413996718, "learning_rate": 0.003, "loss": 4.1092, "step": 5406 }, { "epoch": 0.05407, "grad_norm": 0.8097891472467551, "learning_rate": 0.003, "loss": 4.1255, "step": 5407 }, { "epoch": 0.05408, "grad_norm": 0.9502406141177873, "learning_rate": 0.003, "loss": 4.1168, "step": 5408 }, { "epoch": 0.05409, "grad_norm": 0.9823219241510378, "learning_rate": 0.003, "loss": 4.0894, "step": 5409 }, { "epoch": 0.0541, "grad_norm": 0.878184005124274, "learning_rate": 0.003, "loss": 4.1409, "step": 5410 }, { "epoch": 0.05411, "grad_norm": 0.783228910710241, "learning_rate": 0.003, "loss": 4.1364, "step": 5411 }, { "epoch": 0.05412, "grad_norm": 0.7419337388906813, "learning_rate": 0.003, "loss": 4.1309, "step": 5412 }, { "epoch": 0.05413, "grad_norm": 0.8677947411304687, "learning_rate": 0.003, "loss": 4.1314, "step": 5413 }, { "epoch": 0.05414, "grad_norm": 0.754625111832618, "learning_rate": 0.003, "loss": 4.1077, "step": 5414 }, { "epoch": 0.05415, "grad_norm": 0.6166571551752229, "learning_rate": 0.003, "loss": 4.1407, "step": 5415 }, { "epoch": 0.05416, "grad_norm": 0.6149867683651459, "learning_rate": 0.003, "loss": 4.0953, "step": 5416 }, { "epoch": 0.05417, "grad_norm": 0.7707506569643702, "learning_rate": 0.003, "loss": 4.1237, "step": 5417 }, { "epoch": 0.05418, "grad_norm": 0.8827482178277591, "learning_rate": 0.003, "loss": 4.12, "step": 5418 }, { "epoch": 0.05419, "grad_norm": 1.0696377805286104, "learning_rate": 0.003, "loss": 4.1453, "step": 5419 }, { "epoch": 0.0542, "grad_norm": 1.0762623846918855, "learning_rate": 0.003, "loss": 4.1305, "step": 5420 }, { "epoch": 0.05421, "grad_norm": 1.0370239550620723, "learning_rate": 0.003, "loss": 4.1296, "step": 5421 }, { "epoch": 0.05422, "grad_norm": 0.8836609588737956, "learning_rate": 0.003, "loss": 4.1453, "step": 5422 }, { "epoch": 0.05423, "grad_norm": 0.9202200994596317, "learning_rate": 0.003, "loss": 4.1371, "step": 5423 }, { "epoch": 0.05424, "grad_norm": 0.8512778459324802, "learning_rate": 0.003, "loss": 4.1658, "step": 5424 }, { "epoch": 0.05425, "grad_norm": 0.9221497425172506, "learning_rate": 0.003, "loss": 4.1457, "step": 5425 }, { "epoch": 0.05426, "grad_norm": 0.8858038824729005, "learning_rate": 0.003, "loss": 4.1296, "step": 5426 }, { "epoch": 0.05427, "grad_norm": 0.8461598509812891, "learning_rate": 0.003, "loss": 4.1252, "step": 5427 }, { "epoch": 0.05428, "grad_norm": 0.7838983529721615, "learning_rate": 0.003, "loss": 4.1428, "step": 5428 }, { "epoch": 0.05429, "grad_norm": 0.7381870936331452, "learning_rate": 0.003, "loss": 4.1432, "step": 5429 }, { "epoch": 0.0543, "grad_norm": 0.6675438001202542, "learning_rate": 0.003, "loss": 4.1583, "step": 5430 }, { "epoch": 0.05431, "grad_norm": 0.7468875444789291, "learning_rate": 0.003, "loss": 4.1301, "step": 5431 }, { "epoch": 0.05432, "grad_norm": 0.7685334970223217, "learning_rate": 0.003, "loss": 4.1248, "step": 5432 }, { "epoch": 0.05433, "grad_norm": 0.7841989897169739, "learning_rate": 0.003, "loss": 4.0999, "step": 5433 }, { "epoch": 0.05434, "grad_norm": 0.7643517687218986, "learning_rate": 0.003, "loss": 4.1241, "step": 5434 }, { "epoch": 0.05435, "grad_norm": 0.7188445789131351, "learning_rate": 0.003, "loss": 4.1133, "step": 5435 }, { "epoch": 0.05436, "grad_norm": 0.6666336203457994, "learning_rate": 0.003, "loss": 4.1339, "step": 5436 }, { "epoch": 0.05437, "grad_norm": 0.5578867371709249, "learning_rate": 0.003, "loss": 4.1353, "step": 5437 }, { "epoch": 0.05438, "grad_norm": 0.6023052773273868, "learning_rate": 0.003, "loss": 4.1276, "step": 5438 }, { "epoch": 0.05439, "grad_norm": 0.686819083498365, "learning_rate": 0.003, "loss": 4.1193, "step": 5439 }, { "epoch": 0.0544, "grad_norm": 0.7877497063451424, "learning_rate": 0.003, "loss": 4.1164, "step": 5440 }, { "epoch": 0.05441, "grad_norm": 0.8224525297404207, "learning_rate": 0.003, "loss": 4.148, "step": 5441 }, { "epoch": 0.05442, "grad_norm": 0.8963412844552312, "learning_rate": 0.003, "loss": 4.1128, "step": 5442 }, { "epoch": 0.05443, "grad_norm": 0.7843413799075878, "learning_rate": 0.003, "loss": 4.1267, "step": 5443 }, { "epoch": 0.05444, "grad_norm": 0.6677654509581011, "learning_rate": 0.003, "loss": 4.1127, "step": 5444 }, { "epoch": 0.05445, "grad_norm": 0.5372869992196785, "learning_rate": 0.003, "loss": 4.1083, "step": 5445 }, { "epoch": 0.05446, "grad_norm": 0.4853458301953657, "learning_rate": 0.003, "loss": 4.1094, "step": 5446 }, { "epoch": 0.05447, "grad_norm": 0.534235282926048, "learning_rate": 0.003, "loss": 4.1032, "step": 5447 }, { "epoch": 0.05448, "grad_norm": 0.6051573720194104, "learning_rate": 0.003, "loss": 4.1417, "step": 5448 }, { "epoch": 0.05449, "grad_norm": 0.7586494998052061, "learning_rate": 0.003, "loss": 4.1358, "step": 5449 }, { "epoch": 0.0545, "grad_norm": 0.8213010225949955, "learning_rate": 0.003, "loss": 4.1342, "step": 5450 }, { "epoch": 0.05451, "grad_norm": 0.7188380237062496, "learning_rate": 0.003, "loss": 4.1539, "step": 5451 }, { "epoch": 0.05452, "grad_norm": 0.5272820320310276, "learning_rate": 0.003, "loss": 4.1121, "step": 5452 }, { "epoch": 0.05453, "grad_norm": 0.4825898765859038, "learning_rate": 0.003, "loss": 4.1071, "step": 5453 }, { "epoch": 0.05454, "grad_norm": 0.5231707853046749, "learning_rate": 0.003, "loss": 4.1133, "step": 5454 }, { "epoch": 0.05455, "grad_norm": 0.6109918111895452, "learning_rate": 0.003, "loss": 4.1419, "step": 5455 }, { "epoch": 0.05456, "grad_norm": 0.717222959857415, "learning_rate": 0.003, "loss": 4.1169, "step": 5456 }, { "epoch": 0.05457, "grad_norm": 0.8551566509250617, "learning_rate": 0.003, "loss": 4.1442, "step": 5457 }, { "epoch": 0.05458, "grad_norm": 0.9372765867290779, "learning_rate": 0.003, "loss": 4.1202, "step": 5458 }, { "epoch": 0.05459, "grad_norm": 0.9320602167981404, "learning_rate": 0.003, "loss": 4.1324, "step": 5459 }, { "epoch": 0.0546, "grad_norm": 0.7627661268344466, "learning_rate": 0.003, "loss": 4.1183, "step": 5460 }, { "epoch": 0.05461, "grad_norm": 0.6315449710507808, "learning_rate": 0.003, "loss": 4.1204, "step": 5461 }, { "epoch": 0.05462, "grad_norm": 0.7123926231181666, "learning_rate": 0.003, "loss": 4.1466, "step": 5462 }, { "epoch": 0.05463, "grad_norm": 0.7549211607821605, "learning_rate": 0.003, "loss": 4.1081, "step": 5463 }, { "epoch": 0.05464, "grad_norm": 0.8370379354643269, "learning_rate": 0.003, "loss": 4.1503, "step": 5464 }, { "epoch": 0.05465, "grad_norm": 0.9542926928325179, "learning_rate": 0.003, "loss": 4.1515, "step": 5465 }, { "epoch": 0.05466, "grad_norm": 1.054115432669863, "learning_rate": 0.003, "loss": 4.1695, "step": 5466 }, { "epoch": 0.05467, "grad_norm": 0.8917210500728532, "learning_rate": 0.003, "loss": 4.1439, "step": 5467 }, { "epoch": 0.05468, "grad_norm": 0.8412209468493033, "learning_rate": 0.003, "loss": 4.1181, "step": 5468 }, { "epoch": 0.05469, "grad_norm": 0.7971647310022061, "learning_rate": 0.003, "loss": 4.1111, "step": 5469 }, { "epoch": 0.0547, "grad_norm": 0.8775383662555644, "learning_rate": 0.003, "loss": 4.1182, "step": 5470 }, { "epoch": 0.05471, "grad_norm": 0.9125028901592531, "learning_rate": 0.003, "loss": 4.1309, "step": 5471 }, { "epoch": 0.05472, "grad_norm": 0.7767839416762876, "learning_rate": 0.003, "loss": 4.1215, "step": 5472 }, { "epoch": 0.05473, "grad_norm": 0.7509158035092801, "learning_rate": 0.003, "loss": 4.1403, "step": 5473 }, { "epoch": 0.05474, "grad_norm": 0.8240995260680938, "learning_rate": 0.003, "loss": 4.1154, "step": 5474 }, { "epoch": 0.05475, "grad_norm": 0.9739217618215972, "learning_rate": 0.003, "loss": 4.1769, "step": 5475 }, { "epoch": 0.05476, "grad_norm": 1.0112035995282798, "learning_rate": 0.003, "loss": 4.1044, "step": 5476 }, { "epoch": 0.05477, "grad_norm": 0.8701723078990121, "learning_rate": 0.003, "loss": 4.1252, "step": 5477 }, { "epoch": 0.05478, "grad_norm": 0.745391995716312, "learning_rate": 0.003, "loss": 4.12, "step": 5478 }, { "epoch": 0.05479, "grad_norm": 0.7047469678929648, "learning_rate": 0.003, "loss": 4.1376, "step": 5479 }, { "epoch": 0.0548, "grad_norm": 0.7568179897684124, "learning_rate": 0.003, "loss": 4.1371, "step": 5480 }, { "epoch": 0.05481, "grad_norm": 0.842291746689268, "learning_rate": 0.003, "loss": 4.1302, "step": 5481 }, { "epoch": 0.05482, "grad_norm": 0.8335768637729064, "learning_rate": 0.003, "loss": 4.1229, "step": 5482 }, { "epoch": 0.05483, "grad_norm": 0.8356448898440997, "learning_rate": 0.003, "loss": 4.1231, "step": 5483 }, { "epoch": 0.05484, "grad_norm": 0.8347694767449836, "learning_rate": 0.003, "loss": 4.1485, "step": 5484 }, { "epoch": 0.05485, "grad_norm": 0.7596611463998084, "learning_rate": 0.003, "loss": 4.15, "step": 5485 }, { "epoch": 0.05486, "grad_norm": 0.7203868901609732, "learning_rate": 0.003, "loss": 4.1318, "step": 5486 }, { "epoch": 0.05487, "grad_norm": 0.7045034193470036, "learning_rate": 0.003, "loss": 4.1376, "step": 5487 }, { "epoch": 0.05488, "grad_norm": 0.7134164831679168, "learning_rate": 0.003, "loss": 4.1332, "step": 5488 }, { "epoch": 0.05489, "grad_norm": 0.7566204654677959, "learning_rate": 0.003, "loss": 4.1554, "step": 5489 }, { "epoch": 0.0549, "grad_norm": 0.753089787028579, "learning_rate": 0.003, "loss": 4.1274, "step": 5490 }, { "epoch": 0.05491, "grad_norm": 0.7900961174130867, "learning_rate": 0.003, "loss": 4.0994, "step": 5491 }, { "epoch": 0.05492, "grad_norm": 0.7735481675330634, "learning_rate": 0.003, "loss": 4.1455, "step": 5492 }, { "epoch": 0.05493, "grad_norm": 0.7921324153077896, "learning_rate": 0.003, "loss": 4.1113, "step": 5493 }, { "epoch": 0.05494, "grad_norm": 0.7783756160879682, "learning_rate": 0.003, "loss": 4.1404, "step": 5494 }, { "epoch": 0.05495, "grad_norm": 0.8354402801680301, "learning_rate": 0.003, "loss": 4.1288, "step": 5495 }, { "epoch": 0.05496, "grad_norm": 0.6922018070920165, "learning_rate": 0.003, "loss": 4.1145, "step": 5496 }, { "epoch": 0.05497, "grad_norm": 0.6446395374277146, "learning_rate": 0.003, "loss": 4.1365, "step": 5497 }, { "epoch": 0.05498, "grad_norm": 0.6080103999530705, "learning_rate": 0.003, "loss": 4.1147, "step": 5498 }, { "epoch": 0.05499, "grad_norm": 0.6519375925877654, "learning_rate": 0.003, "loss": 4.1023, "step": 5499 }, { "epoch": 0.055, "grad_norm": 0.5807988789895889, "learning_rate": 0.003, "loss": 4.1058, "step": 5500 }, { "epoch": 0.05501, "grad_norm": 0.5252348957768659, "learning_rate": 0.003, "loss": 4.1129, "step": 5501 }, { "epoch": 0.05502, "grad_norm": 0.6442067499919607, "learning_rate": 0.003, "loss": 4.1035, "step": 5502 }, { "epoch": 0.05503, "grad_norm": 0.6540019754179668, "learning_rate": 0.003, "loss": 4.1065, "step": 5503 }, { "epoch": 0.05504, "grad_norm": 0.6146856306540659, "learning_rate": 0.003, "loss": 4.1579, "step": 5504 }, { "epoch": 0.05505, "grad_norm": 0.6120251915961363, "learning_rate": 0.003, "loss": 4.1105, "step": 5505 }, { "epoch": 0.05506, "grad_norm": 0.7454876837271709, "learning_rate": 0.003, "loss": 4.0868, "step": 5506 }, { "epoch": 0.05507, "grad_norm": 0.7761307027748946, "learning_rate": 0.003, "loss": 4.1446, "step": 5507 }, { "epoch": 0.05508, "grad_norm": 0.7800855127913302, "learning_rate": 0.003, "loss": 4.1244, "step": 5508 }, { "epoch": 0.05509, "grad_norm": 0.9996587539628538, "learning_rate": 0.003, "loss": 4.126, "step": 5509 }, { "epoch": 0.0551, "grad_norm": 1.2203215437155306, "learning_rate": 0.003, "loss": 4.1171, "step": 5510 }, { "epoch": 0.05511, "grad_norm": 0.8793583971610104, "learning_rate": 0.003, "loss": 4.1355, "step": 5511 }, { "epoch": 0.05512, "grad_norm": 0.7370740348757334, "learning_rate": 0.003, "loss": 4.1383, "step": 5512 }, { "epoch": 0.05513, "grad_norm": 0.6510573891135065, "learning_rate": 0.003, "loss": 4.1211, "step": 5513 }, { "epoch": 0.05514, "grad_norm": 0.7249273397693745, "learning_rate": 0.003, "loss": 4.1304, "step": 5514 }, { "epoch": 0.05515, "grad_norm": 0.7885147514411773, "learning_rate": 0.003, "loss": 4.1145, "step": 5515 }, { "epoch": 0.05516, "grad_norm": 0.9148149024980577, "learning_rate": 0.003, "loss": 4.1517, "step": 5516 }, { "epoch": 0.05517, "grad_norm": 0.9500608738728219, "learning_rate": 0.003, "loss": 4.1135, "step": 5517 }, { "epoch": 0.05518, "grad_norm": 0.8711962258266909, "learning_rate": 0.003, "loss": 4.1339, "step": 5518 }, { "epoch": 0.05519, "grad_norm": 0.8912312013555587, "learning_rate": 0.003, "loss": 4.1629, "step": 5519 }, { "epoch": 0.0552, "grad_norm": 0.7356449479297673, "learning_rate": 0.003, "loss": 4.1323, "step": 5520 }, { "epoch": 0.05521, "grad_norm": 0.652079639982586, "learning_rate": 0.003, "loss": 4.1438, "step": 5521 }, { "epoch": 0.05522, "grad_norm": 0.7227034463026758, "learning_rate": 0.003, "loss": 4.1587, "step": 5522 }, { "epoch": 0.05523, "grad_norm": 0.6646962930116027, "learning_rate": 0.003, "loss": 4.1304, "step": 5523 }, { "epoch": 0.05524, "grad_norm": 0.6345664894540247, "learning_rate": 0.003, "loss": 4.1448, "step": 5524 }, { "epoch": 0.05525, "grad_norm": 0.7628485020641987, "learning_rate": 0.003, "loss": 4.1119, "step": 5525 }, { "epoch": 0.05526, "grad_norm": 0.9521465562602499, "learning_rate": 0.003, "loss": 4.1243, "step": 5526 }, { "epoch": 0.05527, "grad_norm": 0.9654484523561925, "learning_rate": 0.003, "loss": 4.0874, "step": 5527 }, { "epoch": 0.05528, "grad_norm": 0.9467279300817267, "learning_rate": 0.003, "loss": 4.137, "step": 5528 }, { "epoch": 0.05529, "grad_norm": 0.8441582406490136, "learning_rate": 0.003, "loss": 4.1446, "step": 5529 }, { "epoch": 0.0553, "grad_norm": 0.8409264258057348, "learning_rate": 0.003, "loss": 4.1574, "step": 5530 }, { "epoch": 0.05531, "grad_norm": 0.898368141131664, "learning_rate": 0.003, "loss": 4.1485, "step": 5531 }, { "epoch": 0.05532, "grad_norm": 0.9193518146488663, "learning_rate": 0.003, "loss": 4.1331, "step": 5532 }, { "epoch": 0.05533, "grad_norm": 0.9804221843926488, "learning_rate": 0.003, "loss": 4.1541, "step": 5533 }, { "epoch": 0.05534, "grad_norm": 1.0187659844050676, "learning_rate": 0.003, "loss": 4.1449, "step": 5534 }, { "epoch": 0.05535, "grad_norm": 0.9901354181855488, "learning_rate": 0.003, "loss": 4.1562, "step": 5535 }, { "epoch": 0.05536, "grad_norm": 0.8825476878279837, "learning_rate": 0.003, "loss": 4.1468, "step": 5536 }, { "epoch": 0.05537, "grad_norm": 0.7556391963162472, "learning_rate": 0.003, "loss": 4.1375, "step": 5537 }, { "epoch": 0.05538, "grad_norm": 0.6817528820545807, "learning_rate": 0.003, "loss": 4.1332, "step": 5538 }, { "epoch": 0.05539, "grad_norm": 0.7027437428476654, "learning_rate": 0.003, "loss": 4.1186, "step": 5539 }, { "epoch": 0.0554, "grad_norm": 0.7674376622806265, "learning_rate": 0.003, "loss": 4.1747, "step": 5540 }, { "epoch": 0.05541, "grad_norm": 0.9105163029193267, "learning_rate": 0.003, "loss": 4.1584, "step": 5541 }, { "epoch": 0.05542, "grad_norm": 0.8086875293524934, "learning_rate": 0.003, "loss": 4.1253, "step": 5542 }, { "epoch": 0.05543, "grad_norm": 0.7420293385687333, "learning_rate": 0.003, "loss": 4.1174, "step": 5543 }, { "epoch": 0.05544, "grad_norm": 0.8559082347379338, "learning_rate": 0.003, "loss": 4.1594, "step": 5544 }, { "epoch": 0.05545, "grad_norm": 0.8152608790796877, "learning_rate": 0.003, "loss": 4.1494, "step": 5545 }, { "epoch": 0.05546, "grad_norm": 0.6798845886566196, "learning_rate": 0.003, "loss": 4.1289, "step": 5546 }, { "epoch": 0.05547, "grad_norm": 0.69937059223718, "learning_rate": 0.003, "loss": 4.1266, "step": 5547 }, { "epoch": 0.05548, "grad_norm": 0.6313128061744391, "learning_rate": 0.003, "loss": 4.0991, "step": 5548 }, { "epoch": 0.05549, "grad_norm": 0.6803316037182424, "learning_rate": 0.003, "loss": 4.1454, "step": 5549 }, { "epoch": 0.0555, "grad_norm": 0.8849647916705993, "learning_rate": 0.003, "loss": 4.1291, "step": 5550 }, { "epoch": 0.05551, "grad_norm": 0.9946377104603599, "learning_rate": 0.003, "loss": 4.1016, "step": 5551 }, { "epoch": 0.05552, "grad_norm": 0.9054479922928002, "learning_rate": 0.003, "loss": 4.1164, "step": 5552 }, { "epoch": 0.05553, "grad_norm": 0.8058698994488415, "learning_rate": 0.003, "loss": 4.14, "step": 5553 }, { "epoch": 0.05554, "grad_norm": 0.8113670227482532, "learning_rate": 0.003, "loss": 4.1253, "step": 5554 }, { "epoch": 0.05555, "grad_norm": 0.8623032272003974, "learning_rate": 0.003, "loss": 4.1697, "step": 5555 }, { "epoch": 0.05556, "grad_norm": 0.7815305295497466, "learning_rate": 0.003, "loss": 4.1187, "step": 5556 }, { "epoch": 0.05557, "grad_norm": 0.7613158957065711, "learning_rate": 0.003, "loss": 4.1482, "step": 5557 }, { "epoch": 0.05558, "grad_norm": 0.6848954615857884, "learning_rate": 0.003, "loss": 4.1024, "step": 5558 }, { "epoch": 0.05559, "grad_norm": 0.6558580775338294, "learning_rate": 0.003, "loss": 4.1263, "step": 5559 }, { "epoch": 0.0556, "grad_norm": 0.6949517459401227, "learning_rate": 0.003, "loss": 4.1522, "step": 5560 }, { "epoch": 0.05561, "grad_norm": 0.6020633152546068, "learning_rate": 0.003, "loss": 4.1023, "step": 5561 }, { "epoch": 0.05562, "grad_norm": 0.6130293848011331, "learning_rate": 0.003, "loss": 4.1158, "step": 5562 }, { "epoch": 0.05563, "grad_norm": 0.5305748367682133, "learning_rate": 0.003, "loss": 4.1211, "step": 5563 }, { "epoch": 0.05564, "grad_norm": 0.5360201658815928, "learning_rate": 0.003, "loss": 4.1054, "step": 5564 }, { "epoch": 0.05565, "grad_norm": 0.5782685165038048, "learning_rate": 0.003, "loss": 4.1461, "step": 5565 }, { "epoch": 0.05566, "grad_norm": 0.7133895920716543, "learning_rate": 0.003, "loss": 4.0986, "step": 5566 }, { "epoch": 0.05567, "grad_norm": 0.826072535494595, "learning_rate": 0.003, "loss": 4.1109, "step": 5567 }, { "epoch": 0.05568, "grad_norm": 0.9865934604781055, "learning_rate": 0.003, "loss": 4.1465, "step": 5568 }, { "epoch": 0.05569, "grad_norm": 1.1577665175633736, "learning_rate": 0.003, "loss": 4.1238, "step": 5569 }, { "epoch": 0.0557, "grad_norm": 0.836822155212909, "learning_rate": 0.003, "loss": 4.1424, "step": 5570 }, { "epoch": 0.05571, "grad_norm": 0.8107309429680243, "learning_rate": 0.003, "loss": 4.1347, "step": 5571 }, { "epoch": 0.05572, "grad_norm": 0.7471424734568298, "learning_rate": 0.003, "loss": 4.1077, "step": 5572 }, { "epoch": 0.05573, "grad_norm": 0.834234761336203, "learning_rate": 0.003, "loss": 4.1533, "step": 5573 }, { "epoch": 0.05574, "grad_norm": 0.9390828719578128, "learning_rate": 0.003, "loss": 4.1247, "step": 5574 }, { "epoch": 0.05575, "grad_norm": 1.0260723102103353, "learning_rate": 0.003, "loss": 4.1331, "step": 5575 }, { "epoch": 0.05576, "grad_norm": 0.8611281671804792, "learning_rate": 0.003, "loss": 4.1032, "step": 5576 }, { "epoch": 0.05577, "grad_norm": 0.7889811692592865, "learning_rate": 0.003, "loss": 4.1267, "step": 5577 }, { "epoch": 0.05578, "grad_norm": 0.8753881201720198, "learning_rate": 0.003, "loss": 4.1268, "step": 5578 }, { "epoch": 0.05579, "grad_norm": 0.8075261839099657, "learning_rate": 0.003, "loss": 4.1474, "step": 5579 }, { "epoch": 0.0558, "grad_norm": 0.6451347078049557, "learning_rate": 0.003, "loss": 4.1276, "step": 5580 }, { "epoch": 0.05581, "grad_norm": 0.5565812999963974, "learning_rate": 0.003, "loss": 4.1399, "step": 5581 }, { "epoch": 0.05582, "grad_norm": 0.5875801341192183, "learning_rate": 0.003, "loss": 4.1519, "step": 5582 }, { "epoch": 0.05583, "grad_norm": 0.6172149180481271, "learning_rate": 0.003, "loss": 4.1257, "step": 5583 }, { "epoch": 0.05584, "grad_norm": 0.6243564462276252, "learning_rate": 0.003, "loss": 4.1224, "step": 5584 }, { "epoch": 0.05585, "grad_norm": 0.7526662295082331, "learning_rate": 0.003, "loss": 4.0942, "step": 5585 }, { "epoch": 0.05586, "grad_norm": 0.926843716234651, "learning_rate": 0.003, "loss": 4.1419, "step": 5586 }, { "epoch": 0.05587, "grad_norm": 1.0068785999700622, "learning_rate": 0.003, "loss": 4.1489, "step": 5587 }, { "epoch": 0.05588, "grad_norm": 0.9183554908331668, "learning_rate": 0.003, "loss": 4.1226, "step": 5588 }, { "epoch": 0.05589, "grad_norm": 0.8399340851508159, "learning_rate": 0.003, "loss": 4.1356, "step": 5589 }, { "epoch": 0.0559, "grad_norm": 0.702522081897405, "learning_rate": 0.003, "loss": 4.126, "step": 5590 }, { "epoch": 0.05591, "grad_norm": 0.8554841678862133, "learning_rate": 0.003, "loss": 4.1384, "step": 5591 }, { "epoch": 0.05592, "grad_norm": 1.0796189206376294, "learning_rate": 0.003, "loss": 4.1463, "step": 5592 }, { "epoch": 0.05593, "grad_norm": 0.8728376363454068, "learning_rate": 0.003, "loss": 4.1311, "step": 5593 }, { "epoch": 0.05594, "grad_norm": 0.6937202641640116, "learning_rate": 0.003, "loss": 4.1183, "step": 5594 }, { "epoch": 0.05595, "grad_norm": 0.66356619963666, "learning_rate": 0.003, "loss": 4.098, "step": 5595 }, { "epoch": 0.05596, "grad_norm": 0.7927953508675363, "learning_rate": 0.003, "loss": 4.112, "step": 5596 }, { "epoch": 0.05597, "grad_norm": 1.0914599310465052, "learning_rate": 0.003, "loss": 4.1531, "step": 5597 }, { "epoch": 0.05598, "grad_norm": 0.979078781931696, "learning_rate": 0.003, "loss": 4.1089, "step": 5598 }, { "epoch": 0.05599, "grad_norm": 0.8737639680425687, "learning_rate": 0.003, "loss": 4.1574, "step": 5599 }, { "epoch": 0.056, "grad_norm": 0.9488588659190235, "learning_rate": 0.003, "loss": 4.1035, "step": 5600 }, { "epoch": 0.05601, "grad_norm": 0.9787141618869671, "learning_rate": 0.003, "loss": 4.1211, "step": 5601 }, { "epoch": 0.05602, "grad_norm": 1.1091138120552808, "learning_rate": 0.003, "loss": 4.1233, "step": 5602 }, { "epoch": 0.05603, "grad_norm": 0.8587130498163756, "learning_rate": 0.003, "loss": 4.1264, "step": 5603 }, { "epoch": 0.05604, "grad_norm": 0.7477443911515989, "learning_rate": 0.003, "loss": 4.1576, "step": 5604 }, { "epoch": 0.05605, "grad_norm": 0.6578770652147883, "learning_rate": 0.003, "loss": 4.0874, "step": 5605 }, { "epoch": 0.05606, "grad_norm": 0.6529008726545039, "learning_rate": 0.003, "loss": 4.1393, "step": 5606 }, { "epoch": 0.05607, "grad_norm": 0.6971218018514623, "learning_rate": 0.003, "loss": 4.1562, "step": 5607 }, { "epoch": 0.05608, "grad_norm": 0.8592836275322725, "learning_rate": 0.003, "loss": 4.0834, "step": 5608 }, { "epoch": 0.05609, "grad_norm": 1.0589082640672076, "learning_rate": 0.003, "loss": 4.135, "step": 5609 }, { "epoch": 0.0561, "grad_norm": 0.8647971161448784, "learning_rate": 0.003, "loss": 4.1651, "step": 5610 }, { "epoch": 0.05611, "grad_norm": 0.7323461767341091, "learning_rate": 0.003, "loss": 4.1315, "step": 5611 }, { "epoch": 0.05612, "grad_norm": 0.7346877413349945, "learning_rate": 0.003, "loss": 4.1228, "step": 5612 }, { "epoch": 0.05613, "grad_norm": 0.7046408743706015, "learning_rate": 0.003, "loss": 4.1484, "step": 5613 }, { "epoch": 0.05614, "grad_norm": 0.7374788617059296, "learning_rate": 0.003, "loss": 4.1303, "step": 5614 }, { "epoch": 0.05615, "grad_norm": 0.8226983375167506, "learning_rate": 0.003, "loss": 4.0993, "step": 5615 }, { "epoch": 0.05616, "grad_norm": 0.8679780386223853, "learning_rate": 0.003, "loss": 4.117, "step": 5616 }, { "epoch": 0.05617, "grad_norm": 0.9315265116837809, "learning_rate": 0.003, "loss": 4.1448, "step": 5617 }, { "epoch": 0.05618, "grad_norm": 0.7604395735189772, "learning_rate": 0.003, "loss": 4.1175, "step": 5618 }, { "epoch": 0.05619, "grad_norm": 0.7485651636552902, "learning_rate": 0.003, "loss": 4.177, "step": 5619 }, { "epoch": 0.0562, "grad_norm": 0.7181098485002947, "learning_rate": 0.003, "loss": 4.1281, "step": 5620 }, { "epoch": 0.05621, "grad_norm": 0.6644004914528993, "learning_rate": 0.003, "loss": 4.1353, "step": 5621 }, { "epoch": 0.05622, "grad_norm": 0.6212809614577611, "learning_rate": 0.003, "loss": 4.1164, "step": 5622 }, { "epoch": 0.05623, "grad_norm": 0.6362873457411731, "learning_rate": 0.003, "loss": 4.1385, "step": 5623 }, { "epoch": 0.05624, "grad_norm": 0.6095376423594582, "learning_rate": 0.003, "loss": 4.1528, "step": 5624 }, { "epoch": 0.05625, "grad_norm": 0.6012390884980424, "learning_rate": 0.003, "loss": 4.1461, "step": 5625 }, { "epoch": 0.05626, "grad_norm": 0.6373515773309039, "learning_rate": 0.003, "loss": 4.1126, "step": 5626 }, { "epoch": 0.05627, "grad_norm": 0.6997251309707232, "learning_rate": 0.003, "loss": 4.1249, "step": 5627 }, { "epoch": 0.05628, "grad_norm": 0.7740087467448155, "learning_rate": 0.003, "loss": 4.1205, "step": 5628 }, { "epoch": 0.05629, "grad_norm": 0.7635869528942756, "learning_rate": 0.003, "loss": 4.1309, "step": 5629 }, { "epoch": 0.0563, "grad_norm": 0.8230920756703966, "learning_rate": 0.003, "loss": 4.1105, "step": 5630 }, { "epoch": 0.05631, "grad_norm": 1.0656938602588153, "learning_rate": 0.003, "loss": 4.105, "step": 5631 }, { "epoch": 0.05632, "grad_norm": 1.0708267403591085, "learning_rate": 0.003, "loss": 4.1391, "step": 5632 }, { "epoch": 0.05633, "grad_norm": 0.789050996667616, "learning_rate": 0.003, "loss": 4.1317, "step": 5633 }, { "epoch": 0.05634, "grad_norm": 0.703640516716815, "learning_rate": 0.003, "loss": 4.1114, "step": 5634 }, { "epoch": 0.05635, "grad_norm": 0.8170971379255785, "learning_rate": 0.003, "loss": 4.1341, "step": 5635 }, { "epoch": 0.05636, "grad_norm": 0.7643132742033529, "learning_rate": 0.003, "loss": 4.1264, "step": 5636 }, { "epoch": 0.05637, "grad_norm": 0.7269418593201121, "learning_rate": 0.003, "loss": 4.1621, "step": 5637 }, { "epoch": 0.05638, "grad_norm": 0.6425160594147294, "learning_rate": 0.003, "loss": 4.1113, "step": 5638 }, { "epoch": 0.05639, "grad_norm": 0.59424152487318, "learning_rate": 0.003, "loss": 4.1173, "step": 5639 }, { "epoch": 0.0564, "grad_norm": 0.6307390916938302, "learning_rate": 0.003, "loss": 4.1285, "step": 5640 }, { "epoch": 0.05641, "grad_norm": 0.6398464382688712, "learning_rate": 0.003, "loss": 4.0785, "step": 5641 }, { "epoch": 0.05642, "grad_norm": 0.5607522531731973, "learning_rate": 0.003, "loss": 4.1426, "step": 5642 }, { "epoch": 0.05643, "grad_norm": 0.5848917477606379, "learning_rate": 0.003, "loss": 4.1019, "step": 5643 }, { "epoch": 0.05644, "grad_norm": 0.6555662778545387, "learning_rate": 0.003, "loss": 4.1172, "step": 5644 }, { "epoch": 0.05645, "grad_norm": 0.6055581079132784, "learning_rate": 0.003, "loss": 4.1071, "step": 5645 }, { "epoch": 0.05646, "grad_norm": 0.5727498420416256, "learning_rate": 0.003, "loss": 4.1154, "step": 5646 }, { "epoch": 0.05647, "grad_norm": 0.5143080586691178, "learning_rate": 0.003, "loss": 4.1149, "step": 5647 }, { "epoch": 0.05648, "grad_norm": 0.6091773925484103, "learning_rate": 0.003, "loss": 4.13, "step": 5648 }, { "epoch": 0.05649, "grad_norm": 0.7510364666789875, "learning_rate": 0.003, "loss": 4.1253, "step": 5649 }, { "epoch": 0.0565, "grad_norm": 0.8427546995305852, "learning_rate": 0.003, "loss": 4.1444, "step": 5650 }, { "epoch": 0.05651, "grad_norm": 0.9454344139315918, "learning_rate": 0.003, "loss": 4.1095, "step": 5651 }, { "epoch": 0.05652, "grad_norm": 1.0512411398620864, "learning_rate": 0.003, "loss": 4.1114, "step": 5652 }, { "epoch": 0.05653, "grad_norm": 1.0651426178923196, "learning_rate": 0.003, "loss": 4.1443, "step": 5653 }, { "epoch": 0.05654, "grad_norm": 0.8316486999901311, "learning_rate": 0.003, "loss": 4.1458, "step": 5654 }, { "epoch": 0.05655, "grad_norm": 0.7989690345947827, "learning_rate": 0.003, "loss": 4.1368, "step": 5655 }, { "epoch": 0.05656, "grad_norm": 0.7220583330129677, "learning_rate": 0.003, "loss": 4.1322, "step": 5656 }, { "epoch": 0.05657, "grad_norm": 0.6534481422931128, "learning_rate": 0.003, "loss": 4.13, "step": 5657 }, { "epoch": 0.05658, "grad_norm": 0.5840017265991315, "learning_rate": 0.003, "loss": 4.1165, "step": 5658 }, { "epoch": 0.05659, "grad_norm": 0.6445129008912936, "learning_rate": 0.003, "loss": 4.1191, "step": 5659 }, { "epoch": 0.0566, "grad_norm": 0.6350047659342725, "learning_rate": 0.003, "loss": 4.1249, "step": 5660 }, { "epoch": 0.05661, "grad_norm": 0.75443344313924, "learning_rate": 0.003, "loss": 4.1615, "step": 5661 }, { "epoch": 0.05662, "grad_norm": 0.8347625040490713, "learning_rate": 0.003, "loss": 4.1044, "step": 5662 }, { "epoch": 0.05663, "grad_norm": 0.7844268329201943, "learning_rate": 0.003, "loss": 4.1254, "step": 5663 }, { "epoch": 0.05664, "grad_norm": 0.6882757704839232, "learning_rate": 0.003, "loss": 4.1018, "step": 5664 }, { "epoch": 0.05665, "grad_norm": 0.7143954784573894, "learning_rate": 0.003, "loss": 4.1122, "step": 5665 }, { "epoch": 0.05666, "grad_norm": 0.7697196574672253, "learning_rate": 0.003, "loss": 4.1278, "step": 5666 }, { "epoch": 0.05667, "grad_norm": 0.6930833704435834, "learning_rate": 0.003, "loss": 4.0974, "step": 5667 }, { "epoch": 0.05668, "grad_norm": 0.7588668210696813, "learning_rate": 0.003, "loss": 4.1428, "step": 5668 }, { "epoch": 0.05669, "grad_norm": 0.7785704484779421, "learning_rate": 0.003, "loss": 4.0928, "step": 5669 }, { "epoch": 0.0567, "grad_norm": 0.7557718596536394, "learning_rate": 0.003, "loss": 4.1333, "step": 5670 }, { "epoch": 0.05671, "grad_norm": 0.7694361102868486, "learning_rate": 0.003, "loss": 4.1168, "step": 5671 }, { "epoch": 0.05672, "grad_norm": 0.8007083587054245, "learning_rate": 0.003, "loss": 4.1211, "step": 5672 }, { "epoch": 0.05673, "grad_norm": 0.8580257925305742, "learning_rate": 0.003, "loss": 4.0999, "step": 5673 }, { "epoch": 0.05674, "grad_norm": 0.9196710283055469, "learning_rate": 0.003, "loss": 4.1366, "step": 5674 }, { "epoch": 0.05675, "grad_norm": 0.9492581195457488, "learning_rate": 0.003, "loss": 4.129, "step": 5675 }, { "epoch": 0.05676, "grad_norm": 1.0914070056598988, "learning_rate": 0.003, "loss": 4.146, "step": 5676 }, { "epoch": 0.05677, "grad_norm": 0.8885367967761989, "learning_rate": 0.003, "loss": 4.1347, "step": 5677 }, { "epoch": 0.05678, "grad_norm": 0.7905086473346482, "learning_rate": 0.003, "loss": 4.1278, "step": 5678 }, { "epoch": 0.05679, "grad_norm": 0.6956868554948951, "learning_rate": 0.003, "loss": 4.14, "step": 5679 }, { "epoch": 0.0568, "grad_norm": 0.7085015911445326, "learning_rate": 0.003, "loss": 4.117, "step": 5680 }, { "epoch": 0.05681, "grad_norm": 0.7982290524952671, "learning_rate": 0.003, "loss": 4.1228, "step": 5681 }, { "epoch": 0.05682, "grad_norm": 0.8702967583238443, "learning_rate": 0.003, "loss": 4.138, "step": 5682 }, { "epoch": 0.05683, "grad_norm": 0.8957753899417163, "learning_rate": 0.003, "loss": 4.0995, "step": 5683 }, { "epoch": 0.05684, "grad_norm": 0.7828031111834381, "learning_rate": 0.003, "loss": 4.1079, "step": 5684 }, { "epoch": 0.05685, "grad_norm": 0.7982392827676595, "learning_rate": 0.003, "loss": 4.1506, "step": 5685 }, { "epoch": 0.05686, "grad_norm": 0.7549245911981988, "learning_rate": 0.003, "loss": 4.1508, "step": 5686 }, { "epoch": 0.05687, "grad_norm": 0.7845211375597457, "learning_rate": 0.003, "loss": 4.103, "step": 5687 }, { "epoch": 0.05688, "grad_norm": 0.8652123527501867, "learning_rate": 0.003, "loss": 4.1191, "step": 5688 }, { "epoch": 0.05689, "grad_norm": 1.1480876620771052, "learning_rate": 0.003, "loss": 4.1102, "step": 5689 }, { "epoch": 0.0569, "grad_norm": 1.0790768924826935, "learning_rate": 0.003, "loss": 4.1365, "step": 5690 }, { "epoch": 0.05691, "grad_norm": 1.015989841166836, "learning_rate": 0.003, "loss": 4.1641, "step": 5691 }, { "epoch": 0.05692, "grad_norm": 0.8594661410624701, "learning_rate": 0.003, "loss": 4.1349, "step": 5692 }, { "epoch": 0.05693, "grad_norm": 0.6762044853377914, "learning_rate": 0.003, "loss": 4.1319, "step": 5693 }, { "epoch": 0.05694, "grad_norm": 0.7821423521203583, "learning_rate": 0.003, "loss": 4.1115, "step": 5694 }, { "epoch": 0.05695, "grad_norm": 0.8134766301370081, "learning_rate": 0.003, "loss": 4.1135, "step": 5695 }, { "epoch": 0.05696, "grad_norm": 0.8476984030864564, "learning_rate": 0.003, "loss": 4.1189, "step": 5696 }, { "epoch": 0.05697, "grad_norm": 0.8589446601517351, "learning_rate": 0.003, "loss": 4.1562, "step": 5697 }, { "epoch": 0.05698, "grad_norm": 0.7927970537237816, "learning_rate": 0.003, "loss": 4.1535, "step": 5698 }, { "epoch": 0.05699, "grad_norm": 0.6663534787165875, "learning_rate": 0.003, "loss": 4.1331, "step": 5699 }, { "epoch": 0.057, "grad_norm": 0.6962826442411336, "learning_rate": 0.003, "loss": 4.1161, "step": 5700 }, { "epoch": 0.05701, "grad_norm": 0.748056579704057, "learning_rate": 0.003, "loss": 4.1391, "step": 5701 }, { "epoch": 0.05702, "grad_norm": 0.7276987471205317, "learning_rate": 0.003, "loss": 4.119, "step": 5702 }, { "epoch": 0.05703, "grad_norm": 0.7077636085898174, "learning_rate": 0.003, "loss": 4.159, "step": 5703 }, { "epoch": 0.05704, "grad_norm": 0.9037559825169571, "learning_rate": 0.003, "loss": 4.1266, "step": 5704 }, { "epoch": 0.05705, "grad_norm": 0.8075509768599368, "learning_rate": 0.003, "loss": 4.124, "step": 5705 }, { "epoch": 0.05706, "grad_norm": 0.7674921460937386, "learning_rate": 0.003, "loss": 4.1165, "step": 5706 }, { "epoch": 0.05707, "grad_norm": 0.7539599082513933, "learning_rate": 0.003, "loss": 4.1188, "step": 5707 }, { "epoch": 0.05708, "grad_norm": 0.6799305657607112, "learning_rate": 0.003, "loss": 4.1537, "step": 5708 }, { "epoch": 0.05709, "grad_norm": 0.5479624493880318, "learning_rate": 0.003, "loss": 4.1207, "step": 5709 }, { "epoch": 0.0571, "grad_norm": 0.5121373459902853, "learning_rate": 0.003, "loss": 4.1015, "step": 5710 }, { "epoch": 0.05711, "grad_norm": 0.7280903597835207, "learning_rate": 0.003, "loss": 4.1343, "step": 5711 }, { "epoch": 0.05712, "grad_norm": 1.0090249543371728, "learning_rate": 0.003, "loss": 4.1159, "step": 5712 }, { "epoch": 0.05713, "grad_norm": 1.1666935879947706, "learning_rate": 0.003, "loss": 4.1256, "step": 5713 }, { "epoch": 0.05714, "grad_norm": 0.6574654065182656, "learning_rate": 0.003, "loss": 4.1657, "step": 5714 }, { "epoch": 0.05715, "grad_norm": 0.6534581937051788, "learning_rate": 0.003, "loss": 4.1688, "step": 5715 }, { "epoch": 0.05716, "grad_norm": 0.7637170840325201, "learning_rate": 0.003, "loss": 4.1401, "step": 5716 }, { "epoch": 0.05717, "grad_norm": 0.80055490534206, "learning_rate": 0.003, "loss": 4.1548, "step": 5717 }, { "epoch": 0.05718, "grad_norm": 0.8922767275182982, "learning_rate": 0.003, "loss": 4.1398, "step": 5718 }, { "epoch": 0.05719, "grad_norm": 0.8933371654984332, "learning_rate": 0.003, "loss": 4.142, "step": 5719 }, { "epoch": 0.0572, "grad_norm": 0.8966300715229761, "learning_rate": 0.003, "loss": 4.1342, "step": 5720 }, { "epoch": 0.05721, "grad_norm": 0.8505332145208457, "learning_rate": 0.003, "loss": 4.1181, "step": 5721 }, { "epoch": 0.05722, "grad_norm": 0.7349175001912129, "learning_rate": 0.003, "loss": 4.1237, "step": 5722 }, { "epoch": 0.05723, "grad_norm": 0.723220376175918, "learning_rate": 0.003, "loss": 4.1257, "step": 5723 }, { "epoch": 0.05724, "grad_norm": 0.680563858030686, "learning_rate": 0.003, "loss": 4.1224, "step": 5724 }, { "epoch": 0.05725, "grad_norm": 0.8088590173981038, "learning_rate": 0.003, "loss": 4.1389, "step": 5725 }, { "epoch": 0.05726, "grad_norm": 0.9418455002457423, "learning_rate": 0.003, "loss": 4.1317, "step": 5726 }, { "epoch": 0.05727, "grad_norm": 1.0373122053831039, "learning_rate": 0.003, "loss": 4.1217, "step": 5727 }, { "epoch": 0.05728, "grad_norm": 0.9012220097719827, "learning_rate": 0.003, "loss": 4.1502, "step": 5728 }, { "epoch": 0.05729, "grad_norm": 0.9082669562608017, "learning_rate": 0.003, "loss": 4.1295, "step": 5729 }, { "epoch": 0.0573, "grad_norm": 0.943356157606432, "learning_rate": 0.003, "loss": 4.104, "step": 5730 }, { "epoch": 0.05731, "grad_norm": 0.8723864537449718, "learning_rate": 0.003, "loss": 4.1203, "step": 5731 }, { "epoch": 0.05732, "grad_norm": 0.8570219760726053, "learning_rate": 0.003, "loss": 4.1478, "step": 5732 }, { "epoch": 0.05733, "grad_norm": 0.8298818226350395, "learning_rate": 0.003, "loss": 4.1567, "step": 5733 }, { "epoch": 0.05734, "grad_norm": 0.8547854470967996, "learning_rate": 0.003, "loss": 4.1537, "step": 5734 }, { "epoch": 0.05735, "grad_norm": 0.8145784886674767, "learning_rate": 0.003, "loss": 4.1161, "step": 5735 }, { "epoch": 0.05736, "grad_norm": 0.6881823470117722, "learning_rate": 0.003, "loss": 4.139, "step": 5736 }, { "epoch": 0.05737, "grad_norm": 0.5844418060230477, "learning_rate": 0.003, "loss": 4.1273, "step": 5737 }, { "epoch": 0.05738, "grad_norm": 0.5430304440202582, "learning_rate": 0.003, "loss": 4.0985, "step": 5738 }, { "epoch": 0.05739, "grad_norm": 0.5591474039370316, "learning_rate": 0.003, "loss": 4.1055, "step": 5739 }, { "epoch": 0.0574, "grad_norm": 0.5762447209115041, "learning_rate": 0.003, "loss": 4.1108, "step": 5740 }, { "epoch": 0.05741, "grad_norm": 0.5659495615021296, "learning_rate": 0.003, "loss": 4.0992, "step": 5741 }, { "epoch": 0.05742, "grad_norm": 0.5868859562225888, "learning_rate": 0.003, "loss": 4.1159, "step": 5742 }, { "epoch": 0.05743, "grad_norm": 0.6325935486380904, "learning_rate": 0.003, "loss": 4.1088, "step": 5743 }, { "epoch": 0.05744, "grad_norm": 0.7198789457341803, "learning_rate": 0.003, "loss": 4.121, "step": 5744 }, { "epoch": 0.05745, "grad_norm": 0.8372703754668815, "learning_rate": 0.003, "loss": 4.1213, "step": 5745 }, { "epoch": 0.05746, "grad_norm": 1.013456290369712, "learning_rate": 0.003, "loss": 4.1139, "step": 5746 }, { "epoch": 0.05747, "grad_norm": 0.9926529931921089, "learning_rate": 0.003, "loss": 4.1411, "step": 5747 }, { "epoch": 0.05748, "grad_norm": 0.8673968423379839, "learning_rate": 0.003, "loss": 4.1592, "step": 5748 }, { "epoch": 0.05749, "grad_norm": 0.8265215218017055, "learning_rate": 0.003, "loss": 4.1121, "step": 5749 }, { "epoch": 0.0575, "grad_norm": 0.9141071050046009, "learning_rate": 0.003, "loss": 4.1328, "step": 5750 }, { "epoch": 0.05751, "grad_norm": 0.8867858497305796, "learning_rate": 0.003, "loss": 4.1531, "step": 5751 }, { "epoch": 0.05752, "grad_norm": 0.8160084253804927, "learning_rate": 0.003, "loss": 4.1369, "step": 5752 }, { "epoch": 0.05753, "grad_norm": 0.7570642462201534, "learning_rate": 0.003, "loss": 4.1294, "step": 5753 }, { "epoch": 0.05754, "grad_norm": 0.6931105461575188, "learning_rate": 0.003, "loss": 4.1256, "step": 5754 }, { "epoch": 0.05755, "grad_norm": 0.7892100308149763, "learning_rate": 0.003, "loss": 4.1134, "step": 5755 }, { "epoch": 0.05756, "grad_norm": 0.7797288548736725, "learning_rate": 0.003, "loss": 4.1459, "step": 5756 }, { "epoch": 0.05757, "grad_norm": 0.9143238759273115, "learning_rate": 0.003, "loss": 4.134, "step": 5757 }, { "epoch": 0.05758, "grad_norm": 0.9568176868550978, "learning_rate": 0.003, "loss": 4.1324, "step": 5758 }, { "epoch": 0.05759, "grad_norm": 0.9850995699177724, "learning_rate": 0.003, "loss": 4.1335, "step": 5759 }, { "epoch": 0.0576, "grad_norm": 0.9114825113358783, "learning_rate": 0.003, "loss": 4.1321, "step": 5760 }, { "epoch": 0.05761, "grad_norm": 0.8393162691101879, "learning_rate": 0.003, "loss": 4.1244, "step": 5761 }, { "epoch": 0.05762, "grad_norm": 0.8185722596967431, "learning_rate": 0.003, "loss": 4.1103, "step": 5762 }, { "epoch": 0.05763, "grad_norm": 0.7941154884714461, "learning_rate": 0.003, "loss": 4.1287, "step": 5763 }, { "epoch": 0.05764, "grad_norm": 0.8167052006322443, "learning_rate": 0.003, "loss": 4.128, "step": 5764 }, { "epoch": 0.05765, "grad_norm": 0.9075689951343894, "learning_rate": 0.003, "loss": 4.1415, "step": 5765 }, { "epoch": 0.05766, "grad_norm": 0.9379058663927884, "learning_rate": 0.003, "loss": 4.1443, "step": 5766 }, { "epoch": 0.05767, "grad_norm": 0.980509823532266, "learning_rate": 0.003, "loss": 4.1339, "step": 5767 }, { "epoch": 0.05768, "grad_norm": 1.0177076074132854, "learning_rate": 0.003, "loss": 4.1549, "step": 5768 }, { "epoch": 0.05769, "grad_norm": 1.0429502276363345, "learning_rate": 0.003, "loss": 4.139, "step": 5769 }, { "epoch": 0.0577, "grad_norm": 1.1100087019496394, "learning_rate": 0.003, "loss": 4.1491, "step": 5770 }, { "epoch": 0.05771, "grad_norm": 0.7223949464608548, "learning_rate": 0.003, "loss": 4.1454, "step": 5771 }, { "epoch": 0.05772, "grad_norm": 0.5382829044206766, "learning_rate": 0.003, "loss": 4.1464, "step": 5772 }, { "epoch": 0.05773, "grad_norm": 0.6406979044556045, "learning_rate": 0.003, "loss": 4.1415, "step": 5773 }, { "epoch": 0.05774, "grad_norm": 0.8097734695290948, "learning_rate": 0.003, "loss": 4.1152, "step": 5774 }, { "epoch": 0.05775, "grad_norm": 1.0604370012894522, "learning_rate": 0.003, "loss": 4.1297, "step": 5775 }, { "epoch": 0.05776, "grad_norm": 1.0011226290122666, "learning_rate": 0.003, "loss": 4.1261, "step": 5776 }, { "epoch": 0.05777, "grad_norm": 0.799664737208272, "learning_rate": 0.003, "loss": 4.1371, "step": 5777 }, { "epoch": 0.05778, "grad_norm": 0.7400439760795068, "learning_rate": 0.003, "loss": 4.1222, "step": 5778 }, { "epoch": 0.05779, "grad_norm": 0.8139200513919773, "learning_rate": 0.003, "loss": 4.1449, "step": 5779 }, { "epoch": 0.0578, "grad_norm": 0.7887623553765343, "learning_rate": 0.003, "loss": 4.1033, "step": 5780 }, { "epoch": 0.05781, "grad_norm": 0.6516840422961846, "learning_rate": 0.003, "loss": 4.1126, "step": 5781 }, { "epoch": 0.05782, "grad_norm": 0.6059160139010061, "learning_rate": 0.003, "loss": 4.1428, "step": 5782 }, { "epoch": 0.05783, "grad_norm": 0.6322968665722302, "learning_rate": 0.003, "loss": 4.0845, "step": 5783 }, { "epoch": 0.05784, "grad_norm": 0.6350140210950438, "learning_rate": 0.003, "loss": 4.1248, "step": 5784 }, { "epoch": 0.05785, "grad_norm": 0.6503751753400261, "learning_rate": 0.003, "loss": 4.1374, "step": 5785 }, { "epoch": 0.05786, "grad_norm": 0.6778764586815985, "learning_rate": 0.003, "loss": 4.1245, "step": 5786 }, { "epoch": 0.05787, "grad_norm": 0.5988251572879624, "learning_rate": 0.003, "loss": 4.093, "step": 5787 }, { "epoch": 0.05788, "grad_norm": 0.4761288844619093, "learning_rate": 0.003, "loss": 4.1109, "step": 5788 }, { "epoch": 0.05789, "grad_norm": 0.4810545422328699, "learning_rate": 0.003, "loss": 4.1101, "step": 5789 }, { "epoch": 0.0579, "grad_norm": 0.48719290090643896, "learning_rate": 0.003, "loss": 4.1274, "step": 5790 }, { "epoch": 0.05791, "grad_norm": 0.6649633480208823, "learning_rate": 0.003, "loss": 4.1083, "step": 5791 }, { "epoch": 0.05792, "grad_norm": 0.836880563895898, "learning_rate": 0.003, "loss": 4.1126, "step": 5792 }, { "epoch": 0.05793, "grad_norm": 0.8850525856438148, "learning_rate": 0.003, "loss": 4.1047, "step": 5793 }, { "epoch": 0.05794, "grad_norm": 0.7567725522310005, "learning_rate": 0.003, "loss": 4.118, "step": 5794 }, { "epoch": 0.05795, "grad_norm": 0.6015273097107093, "learning_rate": 0.003, "loss": 4.1475, "step": 5795 }, { "epoch": 0.05796, "grad_norm": 0.6630617490292271, "learning_rate": 0.003, "loss": 4.1181, "step": 5796 }, { "epoch": 0.05797, "grad_norm": 0.6842122117299039, "learning_rate": 0.003, "loss": 4.1188, "step": 5797 }, { "epoch": 0.05798, "grad_norm": 0.6729171219216635, "learning_rate": 0.003, "loss": 4.0983, "step": 5798 }, { "epoch": 0.05799, "grad_norm": 0.6050205840034845, "learning_rate": 0.003, "loss": 4.1294, "step": 5799 }, { "epoch": 0.058, "grad_norm": 0.5873051359813098, "learning_rate": 0.003, "loss": 4.1282, "step": 5800 }, { "epoch": 0.05801, "grad_norm": 0.6839087744451585, "learning_rate": 0.003, "loss": 4.1102, "step": 5801 }, { "epoch": 0.05802, "grad_norm": 0.7630433000288757, "learning_rate": 0.003, "loss": 4.1185, "step": 5802 }, { "epoch": 0.05803, "grad_norm": 0.8857270500243197, "learning_rate": 0.003, "loss": 4.0863, "step": 5803 }, { "epoch": 0.05804, "grad_norm": 0.8401507038678849, "learning_rate": 0.003, "loss": 4.0831, "step": 5804 }, { "epoch": 0.05805, "grad_norm": 0.7475439434893316, "learning_rate": 0.003, "loss": 4.0891, "step": 5805 }, { "epoch": 0.05806, "grad_norm": 0.7978687405953363, "learning_rate": 0.003, "loss": 4.128, "step": 5806 }, { "epoch": 0.05807, "grad_norm": 0.8733737077265966, "learning_rate": 0.003, "loss": 4.1388, "step": 5807 }, { "epoch": 0.05808, "grad_norm": 0.7983979140429381, "learning_rate": 0.003, "loss": 4.1475, "step": 5808 }, { "epoch": 0.05809, "grad_norm": 0.8908751163199425, "learning_rate": 0.003, "loss": 4.1541, "step": 5809 }, { "epoch": 0.0581, "grad_norm": 0.9552572694017188, "learning_rate": 0.003, "loss": 4.1022, "step": 5810 }, { "epoch": 0.05811, "grad_norm": 0.9248914071476577, "learning_rate": 0.003, "loss": 4.1204, "step": 5811 }, { "epoch": 0.05812, "grad_norm": 0.9817321870382631, "learning_rate": 0.003, "loss": 4.1247, "step": 5812 }, { "epoch": 0.05813, "grad_norm": 1.0001250001988298, "learning_rate": 0.003, "loss": 4.1355, "step": 5813 }, { "epoch": 0.05814, "grad_norm": 0.8827566805540904, "learning_rate": 0.003, "loss": 4.1153, "step": 5814 }, { "epoch": 0.05815, "grad_norm": 0.8029521049860747, "learning_rate": 0.003, "loss": 4.094, "step": 5815 }, { "epoch": 0.05816, "grad_norm": 0.70939056436584, "learning_rate": 0.003, "loss": 4.1343, "step": 5816 }, { "epoch": 0.05817, "grad_norm": 0.726146469102038, "learning_rate": 0.003, "loss": 4.1461, "step": 5817 }, { "epoch": 0.05818, "grad_norm": 0.6869846356437459, "learning_rate": 0.003, "loss": 4.1257, "step": 5818 }, { "epoch": 0.05819, "grad_norm": 0.7498344281131131, "learning_rate": 0.003, "loss": 4.1189, "step": 5819 }, { "epoch": 0.0582, "grad_norm": 0.7991793787271796, "learning_rate": 0.003, "loss": 4.1253, "step": 5820 }, { "epoch": 0.05821, "grad_norm": 0.8692851524335367, "learning_rate": 0.003, "loss": 4.1427, "step": 5821 }, { "epoch": 0.05822, "grad_norm": 0.9485327101586195, "learning_rate": 0.003, "loss": 4.1088, "step": 5822 }, { "epoch": 0.05823, "grad_norm": 0.8570830392648408, "learning_rate": 0.003, "loss": 4.1397, "step": 5823 }, { "epoch": 0.05824, "grad_norm": 0.8874388461019113, "learning_rate": 0.003, "loss": 4.1426, "step": 5824 }, { "epoch": 0.05825, "grad_norm": 0.7062702547735038, "learning_rate": 0.003, "loss": 4.1608, "step": 5825 }, { "epoch": 0.05826, "grad_norm": 0.6393477795637886, "learning_rate": 0.003, "loss": 4.1078, "step": 5826 }, { "epoch": 0.05827, "grad_norm": 0.8019064704761822, "learning_rate": 0.003, "loss": 4.1371, "step": 5827 }, { "epoch": 0.05828, "grad_norm": 0.935978602016839, "learning_rate": 0.003, "loss": 4.0949, "step": 5828 }, { "epoch": 0.05829, "grad_norm": 1.0118867950089439, "learning_rate": 0.003, "loss": 4.1439, "step": 5829 }, { "epoch": 0.0583, "grad_norm": 0.9012310576760173, "learning_rate": 0.003, "loss": 4.0895, "step": 5830 }, { "epoch": 0.05831, "grad_norm": 0.7921513486784892, "learning_rate": 0.003, "loss": 4.1011, "step": 5831 }, { "epoch": 0.05832, "grad_norm": 0.8500687381471804, "learning_rate": 0.003, "loss": 4.148, "step": 5832 }, { "epoch": 0.05833, "grad_norm": 0.8901324912360562, "learning_rate": 0.003, "loss": 4.1358, "step": 5833 }, { "epoch": 0.05834, "grad_norm": 0.9308154213152057, "learning_rate": 0.003, "loss": 4.1442, "step": 5834 }, { "epoch": 0.05835, "grad_norm": 0.7729299081011445, "learning_rate": 0.003, "loss": 4.1607, "step": 5835 }, { "epoch": 0.05836, "grad_norm": 0.6500047045753582, "learning_rate": 0.003, "loss": 4.1399, "step": 5836 }, { "epoch": 0.05837, "grad_norm": 0.6365900961231694, "learning_rate": 0.003, "loss": 4.135, "step": 5837 }, { "epoch": 0.05838, "grad_norm": 0.6835199033150741, "learning_rate": 0.003, "loss": 4.1369, "step": 5838 }, { "epoch": 0.05839, "grad_norm": 0.6793630131258913, "learning_rate": 0.003, "loss": 4.1364, "step": 5839 }, { "epoch": 0.0584, "grad_norm": 0.6942313892219114, "learning_rate": 0.003, "loss": 4.1543, "step": 5840 }, { "epoch": 0.05841, "grad_norm": 0.8257321604395392, "learning_rate": 0.003, "loss": 4.1101, "step": 5841 }, { "epoch": 0.05842, "grad_norm": 0.9932148908544386, "learning_rate": 0.003, "loss": 4.1315, "step": 5842 }, { "epoch": 0.05843, "grad_norm": 1.060226393564342, "learning_rate": 0.003, "loss": 4.1376, "step": 5843 }, { "epoch": 0.05844, "grad_norm": 0.7244723778858251, "learning_rate": 0.003, "loss": 4.1253, "step": 5844 }, { "epoch": 0.05845, "grad_norm": 0.5661816774133244, "learning_rate": 0.003, "loss": 4.1206, "step": 5845 }, { "epoch": 0.05846, "grad_norm": 0.6964991790304752, "learning_rate": 0.003, "loss": 4.1155, "step": 5846 }, { "epoch": 0.05847, "grad_norm": 0.8137963768049039, "learning_rate": 0.003, "loss": 4.1448, "step": 5847 }, { "epoch": 0.05848, "grad_norm": 0.9321439032747159, "learning_rate": 0.003, "loss": 4.1239, "step": 5848 }, { "epoch": 0.05849, "grad_norm": 0.9690612770794925, "learning_rate": 0.003, "loss": 4.092, "step": 5849 }, { "epoch": 0.0585, "grad_norm": 0.8070900144801142, "learning_rate": 0.003, "loss": 4.1203, "step": 5850 }, { "epoch": 0.05851, "grad_norm": 0.7978154280770444, "learning_rate": 0.003, "loss": 4.1098, "step": 5851 }, { "epoch": 0.05852, "grad_norm": 0.9108907816600593, "learning_rate": 0.003, "loss": 4.1228, "step": 5852 }, { "epoch": 0.05853, "grad_norm": 0.8478337317220017, "learning_rate": 0.003, "loss": 4.1098, "step": 5853 }, { "epoch": 0.05854, "grad_norm": 0.8702591663009297, "learning_rate": 0.003, "loss": 4.1814, "step": 5854 }, { "epoch": 0.05855, "grad_norm": 0.7888882543910517, "learning_rate": 0.003, "loss": 4.1356, "step": 5855 }, { "epoch": 0.05856, "grad_norm": 0.7469242966369579, "learning_rate": 0.003, "loss": 4.1279, "step": 5856 }, { "epoch": 0.05857, "grad_norm": 0.706624864851467, "learning_rate": 0.003, "loss": 4.1372, "step": 5857 }, { "epoch": 0.05858, "grad_norm": 0.7270168320884999, "learning_rate": 0.003, "loss": 4.1077, "step": 5858 }, { "epoch": 0.05859, "grad_norm": 0.7529935867004306, "learning_rate": 0.003, "loss": 4.1324, "step": 5859 }, { "epoch": 0.0586, "grad_norm": 0.6444427467245577, "learning_rate": 0.003, "loss": 4.1347, "step": 5860 }, { "epoch": 0.05861, "grad_norm": 0.5606393662770783, "learning_rate": 0.003, "loss": 4.147, "step": 5861 }, { "epoch": 0.05862, "grad_norm": 0.5657377156785917, "learning_rate": 0.003, "loss": 4.1369, "step": 5862 }, { "epoch": 0.05863, "grad_norm": 0.5760395754129868, "learning_rate": 0.003, "loss": 4.1341, "step": 5863 }, { "epoch": 0.05864, "grad_norm": 0.6345646643457323, "learning_rate": 0.003, "loss": 4.1103, "step": 5864 }, { "epoch": 0.05865, "grad_norm": 0.6894044067934254, "learning_rate": 0.003, "loss": 4.1062, "step": 5865 }, { "epoch": 0.05866, "grad_norm": 0.783353091605477, "learning_rate": 0.003, "loss": 4.1302, "step": 5866 }, { "epoch": 0.05867, "grad_norm": 0.8917647666324704, "learning_rate": 0.003, "loss": 4.1147, "step": 5867 }, { "epoch": 0.05868, "grad_norm": 1.0201499997700771, "learning_rate": 0.003, "loss": 4.1059, "step": 5868 }, { "epoch": 0.05869, "grad_norm": 0.9506659421163607, "learning_rate": 0.003, "loss": 4.1166, "step": 5869 }, { "epoch": 0.0587, "grad_norm": 0.7664369973041759, "learning_rate": 0.003, "loss": 4.1105, "step": 5870 }, { "epoch": 0.05871, "grad_norm": 0.7087081834503488, "learning_rate": 0.003, "loss": 4.1128, "step": 5871 }, { "epoch": 0.05872, "grad_norm": 0.6953730352638628, "learning_rate": 0.003, "loss": 4.1093, "step": 5872 }, { "epoch": 0.05873, "grad_norm": 0.671656467983226, "learning_rate": 0.003, "loss": 4.1224, "step": 5873 }, { "epoch": 0.05874, "grad_norm": 0.6955207948522178, "learning_rate": 0.003, "loss": 4.1132, "step": 5874 }, { "epoch": 0.05875, "grad_norm": 0.6268815574500972, "learning_rate": 0.003, "loss": 4.1228, "step": 5875 }, { "epoch": 0.05876, "grad_norm": 0.6389310541639931, "learning_rate": 0.003, "loss": 4.1187, "step": 5876 }, { "epoch": 0.05877, "grad_norm": 0.6217227109072572, "learning_rate": 0.003, "loss": 4.1241, "step": 5877 }, { "epoch": 0.05878, "grad_norm": 0.7692902630082031, "learning_rate": 0.003, "loss": 4.1385, "step": 5878 }, { "epoch": 0.05879, "grad_norm": 0.7440526157383079, "learning_rate": 0.003, "loss": 4.1519, "step": 5879 }, { "epoch": 0.0588, "grad_norm": 0.8420901226461514, "learning_rate": 0.003, "loss": 4.1187, "step": 5880 }, { "epoch": 0.05881, "grad_norm": 0.781449431655522, "learning_rate": 0.003, "loss": 4.1208, "step": 5881 }, { "epoch": 0.05882, "grad_norm": 0.7627663058529272, "learning_rate": 0.003, "loss": 4.136, "step": 5882 }, { "epoch": 0.05883, "grad_norm": 0.8235508005883959, "learning_rate": 0.003, "loss": 4.1376, "step": 5883 }, { "epoch": 0.05884, "grad_norm": 0.9282232426526248, "learning_rate": 0.003, "loss": 4.1275, "step": 5884 }, { "epoch": 0.05885, "grad_norm": 0.8846373141454603, "learning_rate": 0.003, "loss": 4.1403, "step": 5885 }, { "epoch": 0.05886, "grad_norm": 1.1084709963523778, "learning_rate": 0.003, "loss": 4.1454, "step": 5886 }, { "epoch": 0.05887, "grad_norm": 0.9597419334563931, "learning_rate": 0.003, "loss": 4.1302, "step": 5887 }, { "epoch": 0.05888, "grad_norm": 1.1484270558519247, "learning_rate": 0.003, "loss": 4.0923, "step": 5888 }, { "epoch": 0.05889, "grad_norm": 0.7934646401717872, "learning_rate": 0.003, "loss": 4.1205, "step": 5889 }, { "epoch": 0.0589, "grad_norm": 0.7136730779704626, "learning_rate": 0.003, "loss": 4.1228, "step": 5890 }, { "epoch": 0.05891, "grad_norm": 0.725739807105299, "learning_rate": 0.003, "loss": 4.1214, "step": 5891 }, { "epoch": 0.05892, "grad_norm": 0.7256800823290586, "learning_rate": 0.003, "loss": 4.1515, "step": 5892 }, { "epoch": 0.05893, "grad_norm": 0.7692017239614891, "learning_rate": 0.003, "loss": 4.1265, "step": 5893 }, { "epoch": 0.05894, "grad_norm": 0.7387008350548316, "learning_rate": 0.003, "loss": 4.1008, "step": 5894 }, { "epoch": 0.05895, "grad_norm": 0.6999771063166865, "learning_rate": 0.003, "loss": 4.1099, "step": 5895 }, { "epoch": 0.05896, "grad_norm": 0.6586617304026913, "learning_rate": 0.003, "loss": 4.1334, "step": 5896 }, { "epoch": 0.05897, "grad_norm": 0.7731203514227012, "learning_rate": 0.003, "loss": 4.1445, "step": 5897 }, { "epoch": 0.05898, "grad_norm": 0.8305320510139119, "learning_rate": 0.003, "loss": 4.1428, "step": 5898 }, { "epoch": 0.05899, "grad_norm": 0.7742225479585695, "learning_rate": 0.003, "loss": 4.1025, "step": 5899 }, { "epoch": 0.059, "grad_norm": 0.9048055534854499, "learning_rate": 0.003, "loss": 4.1318, "step": 5900 }, { "epoch": 0.05901, "grad_norm": 0.9868685055861205, "learning_rate": 0.003, "loss": 4.1192, "step": 5901 }, { "epoch": 0.05902, "grad_norm": 1.0603964019326821, "learning_rate": 0.003, "loss": 4.1368, "step": 5902 }, { "epoch": 0.05903, "grad_norm": 1.0745850428190593, "learning_rate": 0.003, "loss": 4.1428, "step": 5903 }, { "epoch": 0.05904, "grad_norm": 0.8629637800138508, "learning_rate": 0.003, "loss": 4.1233, "step": 5904 }, { "epoch": 0.05905, "grad_norm": 0.746934964755465, "learning_rate": 0.003, "loss": 4.1213, "step": 5905 }, { "epoch": 0.05906, "grad_norm": 0.7381867732393038, "learning_rate": 0.003, "loss": 4.1215, "step": 5906 }, { "epoch": 0.05907, "grad_norm": 0.8572362096896168, "learning_rate": 0.003, "loss": 4.1132, "step": 5907 }, { "epoch": 0.05908, "grad_norm": 1.0322237476040848, "learning_rate": 0.003, "loss": 4.1436, "step": 5908 }, { "epoch": 0.05909, "grad_norm": 0.8902648134987068, "learning_rate": 0.003, "loss": 4.1457, "step": 5909 }, { "epoch": 0.0591, "grad_norm": 0.773959915980087, "learning_rate": 0.003, "loss": 4.1168, "step": 5910 }, { "epoch": 0.05911, "grad_norm": 0.8192300954731131, "learning_rate": 0.003, "loss": 4.1067, "step": 5911 }, { "epoch": 0.05912, "grad_norm": 0.8546496512225619, "learning_rate": 0.003, "loss": 4.1508, "step": 5912 }, { "epoch": 0.05913, "grad_norm": 0.7554616555250914, "learning_rate": 0.003, "loss": 4.1702, "step": 5913 }, { "epoch": 0.05914, "grad_norm": 0.7048163682769332, "learning_rate": 0.003, "loss": 4.1399, "step": 5914 }, { "epoch": 0.05915, "grad_norm": 0.7670702397330968, "learning_rate": 0.003, "loss": 4.1385, "step": 5915 }, { "epoch": 0.05916, "grad_norm": 0.7678316112541398, "learning_rate": 0.003, "loss": 4.1246, "step": 5916 }, { "epoch": 0.05917, "grad_norm": 0.6726151262347494, "learning_rate": 0.003, "loss": 4.1064, "step": 5917 }, { "epoch": 0.05918, "grad_norm": 0.6440357536146859, "learning_rate": 0.003, "loss": 4.1188, "step": 5918 }, { "epoch": 0.05919, "grad_norm": 0.6990263177794974, "learning_rate": 0.003, "loss": 4.1158, "step": 5919 }, { "epoch": 0.0592, "grad_norm": 0.7179007290375548, "learning_rate": 0.003, "loss": 4.0966, "step": 5920 }, { "epoch": 0.05921, "grad_norm": 0.7012296689409627, "learning_rate": 0.003, "loss": 4.1342, "step": 5921 }, { "epoch": 0.05922, "grad_norm": 0.7174027660375633, "learning_rate": 0.003, "loss": 4.126, "step": 5922 }, { "epoch": 0.05923, "grad_norm": 0.7806176175934582, "learning_rate": 0.003, "loss": 4.0927, "step": 5923 }, { "epoch": 0.05924, "grad_norm": 0.8832244353079346, "learning_rate": 0.003, "loss": 4.1249, "step": 5924 }, { "epoch": 0.05925, "grad_norm": 0.8531781306643916, "learning_rate": 0.003, "loss": 4.1126, "step": 5925 }, { "epoch": 0.05926, "grad_norm": 0.6880234314584872, "learning_rate": 0.003, "loss": 4.1153, "step": 5926 }, { "epoch": 0.05927, "grad_norm": 0.5242997783745522, "learning_rate": 0.003, "loss": 4.0932, "step": 5927 }, { "epoch": 0.05928, "grad_norm": 0.7118263988410835, "learning_rate": 0.003, "loss": 4.1155, "step": 5928 }, { "epoch": 0.05929, "grad_norm": 0.9051922436752257, "learning_rate": 0.003, "loss": 4.1295, "step": 5929 }, { "epoch": 0.0593, "grad_norm": 0.9625875611083993, "learning_rate": 0.003, "loss": 4.1346, "step": 5930 }, { "epoch": 0.05931, "grad_norm": 0.8328206700541043, "learning_rate": 0.003, "loss": 4.1154, "step": 5931 }, { "epoch": 0.05932, "grad_norm": 0.6522428602922113, "learning_rate": 0.003, "loss": 4.1378, "step": 5932 }, { "epoch": 0.05933, "grad_norm": 0.8202258652975196, "learning_rate": 0.003, "loss": 4.1083, "step": 5933 }, { "epoch": 0.05934, "grad_norm": 0.945676463256145, "learning_rate": 0.003, "loss": 4.1538, "step": 5934 }, { "epoch": 0.05935, "grad_norm": 0.9491618675679084, "learning_rate": 0.003, "loss": 4.1398, "step": 5935 }, { "epoch": 0.05936, "grad_norm": 0.7346756788314402, "learning_rate": 0.003, "loss": 4.1319, "step": 5936 }, { "epoch": 0.05937, "grad_norm": 0.6849843625907307, "learning_rate": 0.003, "loss": 4.1291, "step": 5937 }, { "epoch": 0.05938, "grad_norm": 0.842493843061451, "learning_rate": 0.003, "loss": 4.1086, "step": 5938 }, { "epoch": 0.05939, "grad_norm": 0.9802131731998531, "learning_rate": 0.003, "loss": 4.1259, "step": 5939 }, { "epoch": 0.0594, "grad_norm": 1.0740955279537077, "learning_rate": 0.003, "loss": 4.1484, "step": 5940 }, { "epoch": 0.05941, "grad_norm": 0.80664432417099, "learning_rate": 0.003, "loss": 4.1417, "step": 5941 }, { "epoch": 0.05942, "grad_norm": 0.6673463401001458, "learning_rate": 0.003, "loss": 4.127, "step": 5942 }, { "epoch": 0.05943, "grad_norm": 0.7136729446036693, "learning_rate": 0.003, "loss": 4.1155, "step": 5943 }, { "epoch": 0.05944, "grad_norm": 0.7748772994265909, "learning_rate": 0.003, "loss": 4.0725, "step": 5944 }, { "epoch": 0.05945, "grad_norm": 0.8319748600017105, "learning_rate": 0.003, "loss": 4.1153, "step": 5945 }, { "epoch": 0.05946, "grad_norm": 0.7912457127582416, "learning_rate": 0.003, "loss": 4.111, "step": 5946 }, { "epoch": 0.05947, "grad_norm": 0.7794353450592082, "learning_rate": 0.003, "loss": 4.105, "step": 5947 }, { "epoch": 0.05948, "grad_norm": 0.8118650335500223, "learning_rate": 0.003, "loss": 4.1199, "step": 5948 }, { "epoch": 0.05949, "grad_norm": 0.8550107923144452, "learning_rate": 0.003, "loss": 4.1159, "step": 5949 }, { "epoch": 0.0595, "grad_norm": 0.8915593726429895, "learning_rate": 0.003, "loss": 4.1317, "step": 5950 }, { "epoch": 0.05951, "grad_norm": 0.8945628866915003, "learning_rate": 0.003, "loss": 4.127, "step": 5951 }, { "epoch": 0.05952, "grad_norm": 1.0089347086265024, "learning_rate": 0.003, "loss": 4.1428, "step": 5952 }, { "epoch": 0.05953, "grad_norm": 1.0649366317902482, "learning_rate": 0.003, "loss": 4.1271, "step": 5953 }, { "epoch": 0.05954, "grad_norm": 1.0265306010895543, "learning_rate": 0.003, "loss": 4.1486, "step": 5954 }, { "epoch": 0.05955, "grad_norm": 1.0053891091737848, "learning_rate": 0.003, "loss": 4.1313, "step": 5955 }, { "epoch": 0.05956, "grad_norm": 0.9418944733574852, "learning_rate": 0.003, "loss": 4.1144, "step": 5956 }, { "epoch": 0.05957, "grad_norm": 0.8074037334150802, "learning_rate": 0.003, "loss": 4.1176, "step": 5957 }, { "epoch": 0.05958, "grad_norm": 0.835853516513737, "learning_rate": 0.003, "loss": 4.1292, "step": 5958 }, { "epoch": 0.05959, "grad_norm": 0.9524075368140398, "learning_rate": 0.003, "loss": 4.1328, "step": 5959 }, { "epoch": 0.0596, "grad_norm": 0.9443691821790957, "learning_rate": 0.003, "loss": 4.1497, "step": 5960 }, { "epoch": 0.05961, "grad_norm": 0.8502001962303277, "learning_rate": 0.003, "loss": 4.1367, "step": 5961 }, { "epoch": 0.05962, "grad_norm": 0.7110403654732431, "learning_rate": 0.003, "loss": 4.1236, "step": 5962 }, { "epoch": 0.05963, "grad_norm": 0.6983877872125744, "learning_rate": 0.003, "loss": 4.1316, "step": 5963 }, { "epoch": 0.05964, "grad_norm": 0.6706508091828537, "learning_rate": 0.003, "loss": 4.124, "step": 5964 }, { "epoch": 0.05965, "grad_norm": 0.5516979364941672, "learning_rate": 0.003, "loss": 4.1158, "step": 5965 }, { "epoch": 0.05966, "grad_norm": 0.5599182409877457, "learning_rate": 0.003, "loss": 4.1312, "step": 5966 }, { "epoch": 0.05967, "grad_norm": 0.5140228003501242, "learning_rate": 0.003, "loss": 4.1081, "step": 5967 }, { "epoch": 0.05968, "grad_norm": 0.5043516900053563, "learning_rate": 0.003, "loss": 4.1064, "step": 5968 }, { "epoch": 0.05969, "grad_norm": 0.4781424578723309, "learning_rate": 0.003, "loss": 4.0957, "step": 5969 }, { "epoch": 0.0597, "grad_norm": 0.49499059228808473, "learning_rate": 0.003, "loss": 4.102, "step": 5970 }, { "epoch": 0.05971, "grad_norm": 0.5302688602898687, "learning_rate": 0.003, "loss": 4.1204, "step": 5971 }, { "epoch": 0.05972, "grad_norm": 0.5628921179090366, "learning_rate": 0.003, "loss": 4.1306, "step": 5972 }, { "epoch": 0.05973, "grad_norm": 0.6729538698391679, "learning_rate": 0.003, "loss": 4.0945, "step": 5973 }, { "epoch": 0.05974, "grad_norm": 0.8325991069119943, "learning_rate": 0.003, "loss": 4.0936, "step": 5974 }, { "epoch": 0.05975, "grad_norm": 1.0067237957827262, "learning_rate": 0.003, "loss": 4.1348, "step": 5975 }, { "epoch": 0.05976, "grad_norm": 1.2218569863697355, "learning_rate": 0.003, "loss": 4.1282, "step": 5976 }, { "epoch": 0.05977, "grad_norm": 0.6120282975916178, "learning_rate": 0.003, "loss": 4.1255, "step": 5977 }, { "epoch": 0.05978, "grad_norm": 0.634934658722155, "learning_rate": 0.003, "loss": 4.1114, "step": 5978 }, { "epoch": 0.05979, "grad_norm": 0.795528878435463, "learning_rate": 0.003, "loss": 4.1217, "step": 5979 }, { "epoch": 0.0598, "grad_norm": 0.7850331087825336, "learning_rate": 0.003, "loss": 4.0976, "step": 5980 }, { "epoch": 0.05981, "grad_norm": 0.7268016197563338, "learning_rate": 0.003, "loss": 4.1002, "step": 5981 }, { "epoch": 0.05982, "grad_norm": 0.6606189209405257, "learning_rate": 0.003, "loss": 4.1138, "step": 5982 }, { "epoch": 0.05983, "grad_norm": 0.7268589111503444, "learning_rate": 0.003, "loss": 4.1294, "step": 5983 }, { "epoch": 0.05984, "grad_norm": 0.7708148438016759, "learning_rate": 0.003, "loss": 4.0984, "step": 5984 }, { "epoch": 0.05985, "grad_norm": 0.7846506991954134, "learning_rate": 0.003, "loss": 4.134, "step": 5985 }, { "epoch": 0.05986, "grad_norm": 0.7546183129518631, "learning_rate": 0.003, "loss": 4.1308, "step": 5986 }, { "epoch": 0.05987, "grad_norm": 0.8496680958314422, "learning_rate": 0.003, "loss": 4.105, "step": 5987 }, { "epoch": 0.05988, "grad_norm": 0.9751701746085983, "learning_rate": 0.003, "loss": 4.121, "step": 5988 }, { "epoch": 0.05989, "grad_norm": 1.0265361397551773, "learning_rate": 0.003, "loss": 4.1218, "step": 5989 }, { "epoch": 0.0599, "grad_norm": 1.032703400806406, "learning_rate": 0.003, "loss": 4.1273, "step": 5990 }, { "epoch": 0.05991, "grad_norm": 0.9301589844396891, "learning_rate": 0.003, "loss": 4.096, "step": 5991 }, { "epoch": 0.05992, "grad_norm": 0.8529001190170848, "learning_rate": 0.003, "loss": 4.1232, "step": 5992 }, { "epoch": 0.05993, "grad_norm": 0.7232314574469758, "learning_rate": 0.003, "loss": 4.1272, "step": 5993 }, { "epoch": 0.05994, "grad_norm": 0.6055257270988292, "learning_rate": 0.003, "loss": 4.1071, "step": 5994 }, { "epoch": 0.05995, "grad_norm": 0.6461271491413968, "learning_rate": 0.003, "loss": 4.1133, "step": 5995 }, { "epoch": 0.05996, "grad_norm": 0.6721002555843104, "learning_rate": 0.003, "loss": 4.1485, "step": 5996 }, { "epoch": 0.05997, "grad_norm": 0.7240739565812643, "learning_rate": 0.003, "loss": 4.0945, "step": 5997 }, { "epoch": 0.05998, "grad_norm": 0.6950086390271106, "learning_rate": 0.003, "loss": 4.1417, "step": 5998 }, { "epoch": 0.05999, "grad_norm": 0.750185481402966, "learning_rate": 0.003, "loss": 4.1105, "step": 5999 }, { "epoch": 0.06, "grad_norm": 0.8158906083982681, "learning_rate": 0.003, "loss": 4.1242, "step": 6000 }, { "epoch": 0.06001, "grad_norm": 0.7786341857744299, "learning_rate": 0.003, "loss": 4.1079, "step": 6001 }, { "epoch": 0.06002, "grad_norm": 0.6362829691059452, "learning_rate": 0.003, "loss": 4.1, "step": 6002 }, { "epoch": 0.06003, "grad_norm": 0.6439798777608323, "learning_rate": 0.003, "loss": 4.1191, "step": 6003 }, { "epoch": 0.06004, "grad_norm": 0.7447283430547924, "learning_rate": 0.003, "loss": 4.1004, "step": 6004 }, { "epoch": 0.06005, "grad_norm": 0.8074281820106757, "learning_rate": 0.003, "loss": 4.1189, "step": 6005 }, { "epoch": 0.06006, "grad_norm": 0.8310801813091088, "learning_rate": 0.003, "loss": 4.103, "step": 6006 }, { "epoch": 0.06007, "grad_norm": 0.739959036143615, "learning_rate": 0.003, "loss": 4.1522, "step": 6007 }, { "epoch": 0.06008, "grad_norm": 0.7512598056369182, "learning_rate": 0.003, "loss": 4.1108, "step": 6008 }, { "epoch": 0.06009, "grad_norm": 0.8453751856138448, "learning_rate": 0.003, "loss": 4.1186, "step": 6009 }, { "epoch": 0.0601, "grad_norm": 0.9834836783342701, "learning_rate": 0.003, "loss": 4.1254, "step": 6010 }, { "epoch": 0.06011, "grad_norm": 1.2294686050706798, "learning_rate": 0.003, "loss": 4.1418, "step": 6011 }, { "epoch": 0.06012, "grad_norm": 0.6713324153136399, "learning_rate": 0.003, "loss": 4.1324, "step": 6012 }, { "epoch": 0.06013, "grad_norm": 0.7264483540409177, "learning_rate": 0.003, "loss": 4.1253, "step": 6013 }, { "epoch": 0.06014, "grad_norm": 0.8963286802156893, "learning_rate": 0.003, "loss": 4.1183, "step": 6014 }, { "epoch": 0.06015, "grad_norm": 1.0516872025977433, "learning_rate": 0.003, "loss": 4.137, "step": 6015 }, { "epoch": 0.06016, "grad_norm": 0.9867378972073549, "learning_rate": 0.003, "loss": 4.1417, "step": 6016 }, { "epoch": 0.06017, "grad_norm": 0.8189207241122054, "learning_rate": 0.003, "loss": 4.1409, "step": 6017 }, { "epoch": 0.06018, "grad_norm": 0.6248389222598227, "learning_rate": 0.003, "loss": 4.1515, "step": 6018 }, { "epoch": 0.06019, "grad_norm": 0.6382727015073211, "learning_rate": 0.003, "loss": 4.1185, "step": 6019 }, { "epoch": 0.0602, "grad_norm": 0.5942681644648503, "learning_rate": 0.003, "loss": 4.1121, "step": 6020 }, { "epoch": 0.06021, "grad_norm": 0.6307301681376707, "learning_rate": 0.003, "loss": 4.1322, "step": 6021 }, { "epoch": 0.06022, "grad_norm": 0.6750537233371731, "learning_rate": 0.003, "loss": 4.1374, "step": 6022 }, { "epoch": 0.06023, "grad_norm": 0.7128799339128652, "learning_rate": 0.003, "loss": 4.1231, "step": 6023 }, { "epoch": 0.06024, "grad_norm": 0.7520500789879113, "learning_rate": 0.003, "loss": 4.0988, "step": 6024 }, { "epoch": 0.06025, "grad_norm": 0.7805119674993733, "learning_rate": 0.003, "loss": 4.156, "step": 6025 }, { "epoch": 0.06026, "grad_norm": 0.7499035678190326, "learning_rate": 0.003, "loss": 4.1246, "step": 6026 }, { "epoch": 0.06027, "grad_norm": 0.7664750877261561, "learning_rate": 0.003, "loss": 4.1167, "step": 6027 }, { "epoch": 0.06028, "grad_norm": 0.7776094623285253, "learning_rate": 0.003, "loss": 4.1418, "step": 6028 }, { "epoch": 0.06029, "grad_norm": 0.8761047706417141, "learning_rate": 0.003, "loss": 4.1355, "step": 6029 }, { "epoch": 0.0603, "grad_norm": 0.9104716332204089, "learning_rate": 0.003, "loss": 4.1337, "step": 6030 }, { "epoch": 0.06031, "grad_norm": 0.8922902846451947, "learning_rate": 0.003, "loss": 4.127, "step": 6031 }, { "epoch": 0.06032, "grad_norm": 0.9770316982513426, "learning_rate": 0.003, "loss": 4.1335, "step": 6032 }, { "epoch": 0.06033, "grad_norm": 0.9457538473651317, "learning_rate": 0.003, "loss": 4.1363, "step": 6033 }, { "epoch": 0.06034, "grad_norm": 0.9217776085904755, "learning_rate": 0.003, "loss": 4.151, "step": 6034 }, { "epoch": 0.06035, "grad_norm": 0.9280484794744475, "learning_rate": 0.003, "loss": 4.1415, "step": 6035 }, { "epoch": 0.06036, "grad_norm": 1.0009116543043906, "learning_rate": 0.003, "loss": 4.0963, "step": 6036 }, { "epoch": 0.06037, "grad_norm": 1.100896351699259, "learning_rate": 0.003, "loss": 4.1413, "step": 6037 }, { "epoch": 0.06038, "grad_norm": 0.751886944643684, "learning_rate": 0.003, "loss": 4.1173, "step": 6038 }, { "epoch": 0.06039, "grad_norm": 0.6560741836996629, "learning_rate": 0.003, "loss": 4.0955, "step": 6039 }, { "epoch": 0.0604, "grad_norm": 0.6307042035546973, "learning_rate": 0.003, "loss": 4.1208, "step": 6040 }, { "epoch": 0.06041, "grad_norm": 0.6583077177213943, "learning_rate": 0.003, "loss": 4.1357, "step": 6041 }, { "epoch": 0.06042, "grad_norm": 0.5980033750895085, "learning_rate": 0.003, "loss": 4.1128, "step": 6042 }, { "epoch": 0.06043, "grad_norm": 0.6032908893943834, "learning_rate": 0.003, "loss": 4.1176, "step": 6043 }, { "epoch": 0.06044, "grad_norm": 0.6209590746215262, "learning_rate": 0.003, "loss": 4.1185, "step": 6044 }, { "epoch": 0.06045, "grad_norm": 0.6136074895659309, "learning_rate": 0.003, "loss": 4.1182, "step": 6045 }, { "epoch": 0.06046, "grad_norm": 0.6915063121187296, "learning_rate": 0.003, "loss": 4.1221, "step": 6046 }, { "epoch": 0.06047, "grad_norm": 0.7812065657084274, "learning_rate": 0.003, "loss": 4.103, "step": 6047 }, { "epoch": 0.06048, "grad_norm": 0.784591842396142, "learning_rate": 0.003, "loss": 4.1017, "step": 6048 }, { "epoch": 0.06049, "grad_norm": 0.856638395736723, "learning_rate": 0.003, "loss": 4.1, "step": 6049 }, { "epoch": 0.0605, "grad_norm": 1.1075444631976008, "learning_rate": 0.003, "loss": 4.1364, "step": 6050 }, { "epoch": 0.06051, "grad_norm": 1.0022518924058987, "learning_rate": 0.003, "loss": 4.1445, "step": 6051 }, { "epoch": 0.06052, "grad_norm": 0.9359706571597523, "learning_rate": 0.003, "loss": 4.1408, "step": 6052 }, { "epoch": 0.06053, "grad_norm": 0.8709162398571225, "learning_rate": 0.003, "loss": 4.1005, "step": 6053 }, { "epoch": 0.06054, "grad_norm": 0.9149227979278415, "learning_rate": 0.003, "loss": 4.138, "step": 6054 }, { "epoch": 0.06055, "grad_norm": 1.0346925153764615, "learning_rate": 0.003, "loss": 4.1351, "step": 6055 }, { "epoch": 0.06056, "grad_norm": 0.9282475689671914, "learning_rate": 0.003, "loss": 4.1269, "step": 6056 }, { "epoch": 0.06057, "grad_norm": 0.9497057705026418, "learning_rate": 0.003, "loss": 4.1123, "step": 6057 }, { "epoch": 0.06058, "grad_norm": 1.003018583474644, "learning_rate": 0.003, "loss": 4.1316, "step": 6058 }, { "epoch": 0.06059, "grad_norm": 0.9574516546287778, "learning_rate": 0.003, "loss": 4.1447, "step": 6059 }, { "epoch": 0.0606, "grad_norm": 0.9267456613771629, "learning_rate": 0.003, "loss": 4.1427, "step": 6060 }, { "epoch": 0.06061, "grad_norm": 1.0318438116932864, "learning_rate": 0.003, "loss": 4.1518, "step": 6061 }, { "epoch": 0.06062, "grad_norm": 1.0400522917116977, "learning_rate": 0.003, "loss": 4.1579, "step": 6062 }, { "epoch": 0.06063, "grad_norm": 1.0497641878303774, "learning_rate": 0.003, "loss": 4.1133, "step": 6063 }, { "epoch": 0.06064, "grad_norm": 1.0662498014953086, "learning_rate": 0.003, "loss": 4.1748, "step": 6064 }, { "epoch": 0.06065, "grad_norm": 0.8198498372995232, "learning_rate": 0.003, "loss": 4.1373, "step": 6065 }, { "epoch": 0.06066, "grad_norm": 0.7109049478395263, "learning_rate": 0.003, "loss": 4.166, "step": 6066 }, { "epoch": 0.06067, "grad_norm": 0.6804945226918797, "learning_rate": 0.003, "loss": 4.1385, "step": 6067 }, { "epoch": 0.06068, "grad_norm": 0.6372476248357833, "learning_rate": 0.003, "loss": 4.1206, "step": 6068 }, { "epoch": 0.06069, "grad_norm": 0.5605214633243844, "learning_rate": 0.003, "loss": 4.1259, "step": 6069 }, { "epoch": 0.0607, "grad_norm": 0.5452568213138966, "learning_rate": 0.003, "loss": 4.1273, "step": 6070 }, { "epoch": 0.06071, "grad_norm": 0.6180224930197478, "learning_rate": 0.003, "loss": 4.1005, "step": 6071 }, { "epoch": 0.06072, "grad_norm": 0.7813523705166946, "learning_rate": 0.003, "loss": 4.1091, "step": 6072 }, { "epoch": 0.06073, "grad_norm": 0.9736890155608084, "learning_rate": 0.003, "loss": 4.1425, "step": 6073 }, { "epoch": 0.06074, "grad_norm": 1.0327347386785892, "learning_rate": 0.003, "loss": 4.1275, "step": 6074 }, { "epoch": 0.06075, "grad_norm": 0.7472307019465172, "learning_rate": 0.003, "loss": 4.1415, "step": 6075 }, { "epoch": 0.06076, "grad_norm": 0.6670271053470518, "learning_rate": 0.003, "loss": 4.1251, "step": 6076 }, { "epoch": 0.06077, "grad_norm": 0.7254836202543244, "learning_rate": 0.003, "loss": 4.1225, "step": 6077 }, { "epoch": 0.06078, "grad_norm": 0.6208465711650167, "learning_rate": 0.003, "loss": 4.1262, "step": 6078 }, { "epoch": 0.06079, "grad_norm": 0.7113936555649242, "learning_rate": 0.003, "loss": 4.1217, "step": 6079 }, { "epoch": 0.0608, "grad_norm": 0.689258626457068, "learning_rate": 0.003, "loss": 4.119, "step": 6080 }, { "epoch": 0.06081, "grad_norm": 0.6551455177926783, "learning_rate": 0.003, "loss": 4.0983, "step": 6081 }, { "epoch": 0.06082, "grad_norm": 0.6542633333810457, "learning_rate": 0.003, "loss": 4.1454, "step": 6082 }, { "epoch": 0.06083, "grad_norm": 0.6010838532362384, "learning_rate": 0.003, "loss": 4.1273, "step": 6083 }, { "epoch": 0.06084, "grad_norm": 0.5910563656180051, "learning_rate": 0.003, "loss": 4.0919, "step": 6084 }, { "epoch": 0.06085, "grad_norm": 0.6028680880478657, "learning_rate": 0.003, "loss": 4.0771, "step": 6085 }, { "epoch": 0.06086, "grad_norm": 0.5204888285801396, "learning_rate": 0.003, "loss": 4.0813, "step": 6086 }, { "epoch": 0.06087, "grad_norm": 0.5900462834410196, "learning_rate": 0.003, "loss": 4.1224, "step": 6087 }, { "epoch": 0.06088, "grad_norm": 0.6959450877948858, "learning_rate": 0.003, "loss": 4.1019, "step": 6088 }, { "epoch": 0.06089, "grad_norm": 0.6784516411313329, "learning_rate": 0.003, "loss": 4.1012, "step": 6089 }, { "epoch": 0.0609, "grad_norm": 0.7035758936099877, "learning_rate": 0.003, "loss": 4.14, "step": 6090 }, { "epoch": 0.06091, "grad_norm": 0.8330650414758795, "learning_rate": 0.003, "loss": 4.1164, "step": 6091 }, { "epoch": 0.06092, "grad_norm": 0.8930098103439321, "learning_rate": 0.003, "loss": 4.0994, "step": 6092 }, { "epoch": 0.06093, "grad_norm": 1.0224308602932068, "learning_rate": 0.003, "loss": 4.115, "step": 6093 }, { "epoch": 0.06094, "grad_norm": 0.9894448173843531, "learning_rate": 0.003, "loss": 4.1193, "step": 6094 }, { "epoch": 0.06095, "grad_norm": 0.8810812235713495, "learning_rate": 0.003, "loss": 4.1225, "step": 6095 }, { "epoch": 0.06096, "grad_norm": 0.6354430317367544, "learning_rate": 0.003, "loss": 4.1062, "step": 6096 }, { "epoch": 0.06097, "grad_norm": 0.6376701446414055, "learning_rate": 0.003, "loss": 4.1213, "step": 6097 }, { "epoch": 0.06098, "grad_norm": 0.8448719370273449, "learning_rate": 0.003, "loss": 4.0962, "step": 6098 }, { "epoch": 0.06099, "grad_norm": 0.969987418580439, "learning_rate": 0.003, "loss": 4.1229, "step": 6099 }, { "epoch": 0.061, "grad_norm": 1.0255182095149356, "learning_rate": 0.003, "loss": 4.1198, "step": 6100 }, { "epoch": 0.06101, "grad_norm": 0.8139563380818391, "learning_rate": 0.003, "loss": 4.1465, "step": 6101 }, { "epoch": 0.06102, "grad_norm": 0.6862499005656783, "learning_rate": 0.003, "loss": 4.1248, "step": 6102 }, { "epoch": 0.06103, "grad_norm": 0.7284494857382159, "learning_rate": 0.003, "loss": 4.1373, "step": 6103 }, { "epoch": 0.06104, "grad_norm": 0.7385823683842133, "learning_rate": 0.003, "loss": 4.1067, "step": 6104 }, { "epoch": 0.06105, "grad_norm": 0.7289519050865993, "learning_rate": 0.003, "loss": 4.115, "step": 6105 }, { "epoch": 0.06106, "grad_norm": 0.7418004159768921, "learning_rate": 0.003, "loss": 4.1343, "step": 6106 }, { "epoch": 0.06107, "grad_norm": 0.683000547263818, "learning_rate": 0.003, "loss": 4.1453, "step": 6107 }, { "epoch": 0.06108, "grad_norm": 0.5960849690912845, "learning_rate": 0.003, "loss": 4.1079, "step": 6108 }, { "epoch": 0.06109, "grad_norm": 0.6202810402417694, "learning_rate": 0.003, "loss": 4.0857, "step": 6109 }, { "epoch": 0.0611, "grad_norm": 0.7084749148334659, "learning_rate": 0.003, "loss": 4.1041, "step": 6110 }, { "epoch": 0.06111, "grad_norm": 0.8416354572669955, "learning_rate": 0.003, "loss": 4.1221, "step": 6111 }, { "epoch": 0.06112, "grad_norm": 1.1477396706080005, "learning_rate": 0.003, "loss": 4.1278, "step": 6112 }, { "epoch": 0.06113, "grad_norm": 0.9298557660649742, "learning_rate": 0.003, "loss": 4.1337, "step": 6113 }, { "epoch": 0.06114, "grad_norm": 0.8426576582561913, "learning_rate": 0.003, "loss": 4.095, "step": 6114 }, { "epoch": 0.06115, "grad_norm": 0.8345003178868361, "learning_rate": 0.003, "loss": 4.0997, "step": 6115 }, { "epoch": 0.06116, "grad_norm": 0.8891402025914743, "learning_rate": 0.003, "loss": 4.1225, "step": 6116 }, { "epoch": 0.06117, "grad_norm": 0.8733942465346064, "learning_rate": 0.003, "loss": 4.1167, "step": 6117 }, { "epoch": 0.06118, "grad_norm": 0.8300392838566886, "learning_rate": 0.003, "loss": 4.0911, "step": 6118 }, { "epoch": 0.06119, "grad_norm": 1.0718155087870083, "learning_rate": 0.003, "loss": 4.0895, "step": 6119 }, { "epoch": 0.0612, "grad_norm": 1.082038424508775, "learning_rate": 0.003, "loss": 4.1363, "step": 6120 }, { "epoch": 0.06121, "grad_norm": 1.0039524800797741, "learning_rate": 0.003, "loss": 4.12, "step": 6121 }, { "epoch": 0.06122, "grad_norm": 0.858861812903392, "learning_rate": 0.003, "loss": 4.1343, "step": 6122 }, { "epoch": 0.06123, "grad_norm": 0.7517337557672539, "learning_rate": 0.003, "loss": 4.131, "step": 6123 }, { "epoch": 0.06124, "grad_norm": 0.8054070621318578, "learning_rate": 0.003, "loss": 4.1251, "step": 6124 }, { "epoch": 0.06125, "grad_norm": 0.8798723198485504, "learning_rate": 0.003, "loss": 4.0915, "step": 6125 }, { "epoch": 0.06126, "grad_norm": 0.9746332514781364, "learning_rate": 0.003, "loss": 4.1116, "step": 6126 }, { "epoch": 0.06127, "grad_norm": 1.0519385433383126, "learning_rate": 0.003, "loss": 4.1099, "step": 6127 }, { "epoch": 0.06128, "grad_norm": 0.7862904704912201, "learning_rate": 0.003, "loss": 4.1178, "step": 6128 }, { "epoch": 0.06129, "grad_norm": 0.8108064027519418, "learning_rate": 0.003, "loss": 4.1301, "step": 6129 }, { "epoch": 0.0613, "grad_norm": 0.7803383051940936, "learning_rate": 0.003, "loss": 4.1383, "step": 6130 }, { "epoch": 0.06131, "grad_norm": 0.807939857772501, "learning_rate": 0.003, "loss": 4.136, "step": 6131 }, { "epoch": 0.06132, "grad_norm": 0.9270834066209759, "learning_rate": 0.003, "loss": 4.1373, "step": 6132 }, { "epoch": 0.06133, "grad_norm": 1.057590287106868, "learning_rate": 0.003, "loss": 4.1308, "step": 6133 }, { "epoch": 0.06134, "grad_norm": 0.9315718021788794, "learning_rate": 0.003, "loss": 4.1219, "step": 6134 }, { "epoch": 0.06135, "grad_norm": 0.7682639425500988, "learning_rate": 0.003, "loss": 4.1459, "step": 6135 }, { "epoch": 0.06136, "grad_norm": 0.6453509034899395, "learning_rate": 0.003, "loss": 4.1199, "step": 6136 }, { "epoch": 0.06137, "grad_norm": 0.5089285133051348, "learning_rate": 0.003, "loss": 4.1142, "step": 6137 }, { "epoch": 0.06138, "grad_norm": 0.5921446681322661, "learning_rate": 0.003, "loss": 4.1093, "step": 6138 }, { "epoch": 0.06139, "grad_norm": 0.6572428733185919, "learning_rate": 0.003, "loss": 4.1294, "step": 6139 }, { "epoch": 0.0614, "grad_norm": 0.7594320601513859, "learning_rate": 0.003, "loss": 4.0882, "step": 6140 }, { "epoch": 0.06141, "grad_norm": 0.7701960965013912, "learning_rate": 0.003, "loss": 4.0918, "step": 6141 }, { "epoch": 0.06142, "grad_norm": 0.708324006436222, "learning_rate": 0.003, "loss": 4.1139, "step": 6142 }, { "epoch": 0.06143, "grad_norm": 0.8072574094920275, "learning_rate": 0.003, "loss": 4.1253, "step": 6143 }, { "epoch": 0.06144, "grad_norm": 0.9407988342872765, "learning_rate": 0.003, "loss": 4.108, "step": 6144 }, { "epoch": 0.06145, "grad_norm": 0.9922781126952929, "learning_rate": 0.003, "loss": 4.1304, "step": 6145 }, { "epoch": 0.06146, "grad_norm": 0.9137503629646905, "learning_rate": 0.003, "loss": 4.0974, "step": 6146 }, { "epoch": 0.06147, "grad_norm": 0.8208946959727162, "learning_rate": 0.003, "loss": 4.1489, "step": 6147 }, { "epoch": 0.06148, "grad_norm": 0.7535313704885915, "learning_rate": 0.003, "loss": 4.1189, "step": 6148 }, { "epoch": 0.06149, "grad_norm": 0.7754246480784573, "learning_rate": 0.003, "loss": 4.1117, "step": 6149 }, { "epoch": 0.0615, "grad_norm": 0.8086681823805969, "learning_rate": 0.003, "loss": 4.1301, "step": 6150 }, { "epoch": 0.06151, "grad_norm": 0.7520491914732798, "learning_rate": 0.003, "loss": 4.1233, "step": 6151 }, { "epoch": 0.06152, "grad_norm": 0.7385892490900827, "learning_rate": 0.003, "loss": 4.1083, "step": 6152 }, { "epoch": 0.06153, "grad_norm": 0.585306853249163, "learning_rate": 0.003, "loss": 4.1309, "step": 6153 }, { "epoch": 0.06154, "grad_norm": 0.6075200706981868, "learning_rate": 0.003, "loss": 4.0769, "step": 6154 }, { "epoch": 0.06155, "grad_norm": 0.7008926353906986, "learning_rate": 0.003, "loss": 4.123, "step": 6155 }, { "epoch": 0.06156, "grad_norm": 0.7377701370473413, "learning_rate": 0.003, "loss": 4.1097, "step": 6156 }, { "epoch": 0.06157, "grad_norm": 0.6372062483340378, "learning_rate": 0.003, "loss": 4.1258, "step": 6157 }, { "epoch": 0.06158, "grad_norm": 0.5711395334899737, "learning_rate": 0.003, "loss": 4.147, "step": 6158 }, { "epoch": 0.06159, "grad_norm": 0.6974553713335565, "learning_rate": 0.003, "loss": 4.106, "step": 6159 }, { "epoch": 0.0616, "grad_norm": 0.7914523454155186, "learning_rate": 0.003, "loss": 4.1007, "step": 6160 }, { "epoch": 0.06161, "grad_norm": 0.9147836100156929, "learning_rate": 0.003, "loss": 4.1323, "step": 6161 }, { "epoch": 0.06162, "grad_norm": 1.0202822458279153, "learning_rate": 0.003, "loss": 4.112, "step": 6162 }, { "epoch": 0.06163, "grad_norm": 0.8492609261522512, "learning_rate": 0.003, "loss": 4.149, "step": 6163 }, { "epoch": 0.06164, "grad_norm": 0.6305588102799871, "learning_rate": 0.003, "loss": 4.1197, "step": 6164 }, { "epoch": 0.06165, "grad_norm": 0.70222905462584, "learning_rate": 0.003, "loss": 4.1037, "step": 6165 }, { "epoch": 0.06166, "grad_norm": 0.7330174697865238, "learning_rate": 0.003, "loss": 4.1323, "step": 6166 }, { "epoch": 0.06167, "grad_norm": 0.8120770794656365, "learning_rate": 0.003, "loss": 4.1048, "step": 6167 }, { "epoch": 0.06168, "grad_norm": 0.9461948541503907, "learning_rate": 0.003, "loss": 4.1221, "step": 6168 }, { "epoch": 0.06169, "grad_norm": 0.8911742935865481, "learning_rate": 0.003, "loss": 4.1144, "step": 6169 }, { "epoch": 0.0617, "grad_norm": 0.7323433640480617, "learning_rate": 0.003, "loss": 4.1158, "step": 6170 }, { "epoch": 0.06171, "grad_norm": 0.6208020453162558, "learning_rate": 0.003, "loss": 4.1142, "step": 6171 }, { "epoch": 0.06172, "grad_norm": 0.7291939143005758, "learning_rate": 0.003, "loss": 4.0931, "step": 6172 }, { "epoch": 0.06173, "grad_norm": 0.7913577473386937, "learning_rate": 0.003, "loss": 4.1242, "step": 6173 }, { "epoch": 0.06174, "grad_norm": 0.9778156595493965, "learning_rate": 0.003, "loss": 4.1249, "step": 6174 }, { "epoch": 0.06175, "grad_norm": 1.1932141855839438, "learning_rate": 0.003, "loss": 4.1365, "step": 6175 }, { "epoch": 0.06176, "grad_norm": 0.7326715787881036, "learning_rate": 0.003, "loss": 4.1201, "step": 6176 }, { "epoch": 0.06177, "grad_norm": 0.7923716099483298, "learning_rate": 0.003, "loss": 4.116, "step": 6177 }, { "epoch": 0.06178, "grad_norm": 0.9764360789021225, "learning_rate": 0.003, "loss": 4.1127, "step": 6178 }, { "epoch": 0.06179, "grad_norm": 1.0887682373744165, "learning_rate": 0.003, "loss": 4.135, "step": 6179 }, { "epoch": 0.0618, "grad_norm": 0.8700014538602673, "learning_rate": 0.003, "loss": 4.1276, "step": 6180 }, { "epoch": 0.06181, "grad_norm": 0.8463904835451614, "learning_rate": 0.003, "loss": 4.1436, "step": 6181 }, { "epoch": 0.06182, "grad_norm": 0.8112163844886693, "learning_rate": 0.003, "loss": 4.1031, "step": 6182 }, { "epoch": 0.06183, "grad_norm": 0.8784890732030473, "learning_rate": 0.003, "loss": 4.1191, "step": 6183 }, { "epoch": 0.06184, "grad_norm": 0.8158970894757084, "learning_rate": 0.003, "loss": 4.1197, "step": 6184 }, { "epoch": 0.06185, "grad_norm": 0.8186525569563441, "learning_rate": 0.003, "loss": 4.1077, "step": 6185 }, { "epoch": 0.06186, "grad_norm": 0.8640122378248711, "learning_rate": 0.003, "loss": 4.1353, "step": 6186 }, { "epoch": 0.06187, "grad_norm": 0.9198602507085508, "learning_rate": 0.003, "loss": 4.1429, "step": 6187 }, { "epoch": 0.06188, "grad_norm": 0.9582358948075637, "learning_rate": 0.003, "loss": 4.123, "step": 6188 }, { "epoch": 0.06189, "grad_norm": 0.9436060131627309, "learning_rate": 0.003, "loss": 4.1141, "step": 6189 }, { "epoch": 0.0619, "grad_norm": 0.8106004278533174, "learning_rate": 0.003, "loss": 4.1567, "step": 6190 }, { "epoch": 0.06191, "grad_norm": 0.7377926481554797, "learning_rate": 0.003, "loss": 4.1079, "step": 6191 }, { "epoch": 0.06192, "grad_norm": 0.7292550893414801, "learning_rate": 0.003, "loss": 4.1096, "step": 6192 }, { "epoch": 0.06193, "grad_norm": 0.796675035271722, "learning_rate": 0.003, "loss": 4.0783, "step": 6193 }, { "epoch": 0.06194, "grad_norm": 0.9834328051273128, "learning_rate": 0.003, "loss": 4.1093, "step": 6194 }, { "epoch": 0.06195, "grad_norm": 0.9674422632284655, "learning_rate": 0.003, "loss": 4.127, "step": 6195 }, { "epoch": 0.06196, "grad_norm": 0.9127448537843487, "learning_rate": 0.003, "loss": 4.1387, "step": 6196 }, { "epoch": 0.06197, "grad_norm": 0.8018597016816854, "learning_rate": 0.003, "loss": 4.1196, "step": 6197 }, { "epoch": 0.06198, "grad_norm": 0.6492553167687579, "learning_rate": 0.003, "loss": 4.1469, "step": 6198 }, { "epoch": 0.06199, "grad_norm": 0.500075335938287, "learning_rate": 0.003, "loss": 4.1312, "step": 6199 }, { "epoch": 0.062, "grad_norm": 0.6408031097259872, "learning_rate": 0.003, "loss": 4.1388, "step": 6200 }, { "epoch": 0.06201, "grad_norm": 0.6882374713932283, "learning_rate": 0.003, "loss": 4.1048, "step": 6201 }, { "epoch": 0.06202, "grad_norm": 0.6650459048800811, "learning_rate": 0.003, "loss": 4.1492, "step": 6202 }, { "epoch": 0.06203, "grad_norm": 0.7279713021245527, "learning_rate": 0.003, "loss": 4.1338, "step": 6203 }, { "epoch": 0.06204, "grad_norm": 0.8176381462944218, "learning_rate": 0.003, "loss": 4.1308, "step": 6204 }, { "epoch": 0.06205, "grad_norm": 0.8635078911095838, "learning_rate": 0.003, "loss": 4.1327, "step": 6205 }, { "epoch": 0.06206, "grad_norm": 0.9051460085260756, "learning_rate": 0.003, "loss": 4.1253, "step": 6206 }, { "epoch": 0.06207, "grad_norm": 0.9683554461781614, "learning_rate": 0.003, "loss": 4.0917, "step": 6207 }, { "epoch": 0.06208, "grad_norm": 0.9491242929275403, "learning_rate": 0.003, "loss": 4.1203, "step": 6208 }, { "epoch": 0.06209, "grad_norm": 0.946544007384069, "learning_rate": 0.003, "loss": 4.1078, "step": 6209 }, { "epoch": 0.0621, "grad_norm": 0.7496490508161171, "learning_rate": 0.003, "loss": 4.1606, "step": 6210 }, { "epoch": 0.06211, "grad_norm": 0.7213205165847495, "learning_rate": 0.003, "loss": 4.1221, "step": 6211 }, { "epoch": 0.06212, "grad_norm": 0.7495412802850021, "learning_rate": 0.003, "loss": 4.1295, "step": 6212 }, { "epoch": 0.06213, "grad_norm": 0.7517545861084816, "learning_rate": 0.003, "loss": 4.1064, "step": 6213 }, { "epoch": 0.06214, "grad_norm": 0.8206749672685049, "learning_rate": 0.003, "loss": 4.1092, "step": 6214 }, { "epoch": 0.06215, "grad_norm": 0.8335784823968934, "learning_rate": 0.003, "loss": 4.1283, "step": 6215 }, { "epoch": 0.06216, "grad_norm": 0.8600050966450496, "learning_rate": 0.003, "loss": 4.1572, "step": 6216 }, { "epoch": 0.06217, "grad_norm": 0.7434739467397622, "learning_rate": 0.003, "loss": 4.1152, "step": 6217 }, { "epoch": 0.06218, "grad_norm": 0.7048668827578362, "learning_rate": 0.003, "loss": 4.1013, "step": 6218 }, { "epoch": 0.06219, "grad_norm": 0.772659800309845, "learning_rate": 0.003, "loss": 4.1341, "step": 6219 }, { "epoch": 0.0622, "grad_norm": 0.8655188591453645, "learning_rate": 0.003, "loss": 4.1305, "step": 6220 }, { "epoch": 0.06221, "grad_norm": 0.855208686912549, "learning_rate": 0.003, "loss": 4.1321, "step": 6221 }, { "epoch": 0.06222, "grad_norm": 0.7753969968080353, "learning_rate": 0.003, "loss": 4.1176, "step": 6222 }, { "epoch": 0.06223, "grad_norm": 0.7347873178842909, "learning_rate": 0.003, "loss": 4.1243, "step": 6223 }, { "epoch": 0.06224, "grad_norm": 0.7453826365575011, "learning_rate": 0.003, "loss": 4.095, "step": 6224 }, { "epoch": 0.06225, "grad_norm": 0.6725261598977015, "learning_rate": 0.003, "loss": 4.1197, "step": 6225 }, { "epoch": 0.06226, "grad_norm": 0.7149958935506678, "learning_rate": 0.003, "loss": 4.1017, "step": 6226 }, { "epoch": 0.06227, "grad_norm": 0.6062843965023491, "learning_rate": 0.003, "loss": 4.1101, "step": 6227 }, { "epoch": 0.06228, "grad_norm": 0.5956924427962597, "learning_rate": 0.003, "loss": 4.1163, "step": 6228 }, { "epoch": 0.06229, "grad_norm": 0.49453429887940603, "learning_rate": 0.003, "loss": 4.0981, "step": 6229 }, { "epoch": 0.0623, "grad_norm": 0.4966784851981883, "learning_rate": 0.003, "loss": 4.1103, "step": 6230 }, { "epoch": 0.06231, "grad_norm": 0.5841536691365076, "learning_rate": 0.003, "loss": 4.1017, "step": 6231 }, { "epoch": 0.06232, "grad_norm": 0.6363555647753291, "learning_rate": 0.003, "loss": 4.098, "step": 6232 }, { "epoch": 0.06233, "grad_norm": 0.739340815218559, "learning_rate": 0.003, "loss": 4.0966, "step": 6233 }, { "epoch": 0.06234, "grad_norm": 0.983288242920816, "learning_rate": 0.003, "loss": 4.1054, "step": 6234 }, { "epoch": 0.06235, "grad_norm": 1.256808031451369, "learning_rate": 0.003, "loss": 4.121, "step": 6235 }, { "epoch": 0.06236, "grad_norm": 0.879699004471445, "learning_rate": 0.003, "loss": 4.1267, "step": 6236 }, { "epoch": 0.06237, "grad_norm": 0.7521683358827187, "learning_rate": 0.003, "loss": 4.1184, "step": 6237 }, { "epoch": 0.06238, "grad_norm": 0.9148201380927862, "learning_rate": 0.003, "loss": 4.1211, "step": 6238 }, { "epoch": 0.06239, "grad_norm": 0.9607286756265107, "learning_rate": 0.003, "loss": 4.1067, "step": 6239 }, { "epoch": 0.0624, "grad_norm": 0.9539386861231751, "learning_rate": 0.003, "loss": 4.1235, "step": 6240 }, { "epoch": 0.06241, "grad_norm": 0.9355231782983046, "learning_rate": 0.003, "loss": 4.1058, "step": 6241 }, { "epoch": 0.06242, "grad_norm": 0.9288421332433013, "learning_rate": 0.003, "loss": 4.1331, "step": 6242 }, { "epoch": 0.06243, "grad_norm": 0.8502254135089574, "learning_rate": 0.003, "loss": 4.123, "step": 6243 }, { "epoch": 0.06244, "grad_norm": 0.9732395834043053, "learning_rate": 0.003, "loss": 4.1585, "step": 6244 }, { "epoch": 0.06245, "grad_norm": 1.1071153201796413, "learning_rate": 0.003, "loss": 4.1402, "step": 6245 }, { "epoch": 0.06246, "grad_norm": 1.0940195095069707, "learning_rate": 0.003, "loss": 4.1576, "step": 6246 }, { "epoch": 0.06247, "grad_norm": 0.9412652481549822, "learning_rate": 0.003, "loss": 4.1377, "step": 6247 }, { "epoch": 0.06248, "grad_norm": 0.895195578619345, "learning_rate": 0.003, "loss": 4.136, "step": 6248 }, { "epoch": 0.06249, "grad_norm": 0.9518706201636767, "learning_rate": 0.003, "loss": 4.1519, "step": 6249 }, { "epoch": 0.0625, "grad_norm": 0.9872959824439677, "learning_rate": 0.003, "loss": 4.137, "step": 6250 }, { "epoch": 0.06251, "grad_norm": 0.8928606640381823, "learning_rate": 0.003, "loss": 4.1229, "step": 6251 }, { "epoch": 0.06252, "grad_norm": 0.8613095945713102, "learning_rate": 0.003, "loss": 4.1447, "step": 6252 }, { "epoch": 0.06253, "grad_norm": 0.8745234996707003, "learning_rate": 0.003, "loss": 4.1324, "step": 6253 }, { "epoch": 0.06254, "grad_norm": 0.9945273839471781, "learning_rate": 0.003, "loss": 4.1301, "step": 6254 }, { "epoch": 0.06255, "grad_norm": 0.9317981123576337, "learning_rate": 0.003, "loss": 4.1423, "step": 6255 }, { "epoch": 0.06256, "grad_norm": 0.935398061718941, "learning_rate": 0.003, "loss": 4.1146, "step": 6256 }, { "epoch": 0.06257, "grad_norm": 0.9753404112298208, "learning_rate": 0.003, "loss": 4.095, "step": 6257 }, { "epoch": 0.06258, "grad_norm": 0.7914387634721403, "learning_rate": 0.003, "loss": 4.1434, "step": 6258 }, { "epoch": 0.06259, "grad_norm": 0.8459303686065615, "learning_rate": 0.003, "loss": 4.1596, "step": 6259 }, { "epoch": 0.0626, "grad_norm": 0.8608798731117852, "learning_rate": 0.003, "loss": 4.1331, "step": 6260 }, { "epoch": 0.06261, "grad_norm": 0.9015697577574606, "learning_rate": 0.003, "loss": 4.1183, "step": 6261 }, { "epoch": 0.06262, "grad_norm": 0.8626927231908963, "learning_rate": 0.003, "loss": 4.1227, "step": 6262 }, { "epoch": 0.06263, "grad_norm": 0.8363061035136914, "learning_rate": 0.003, "loss": 4.1492, "step": 6263 }, { "epoch": 0.06264, "grad_norm": 0.870156868679017, "learning_rate": 0.003, "loss": 4.1245, "step": 6264 }, { "epoch": 0.06265, "grad_norm": 0.9555307920316731, "learning_rate": 0.003, "loss": 4.145, "step": 6265 }, { "epoch": 0.06266, "grad_norm": 0.8998498765721897, "learning_rate": 0.003, "loss": 4.1287, "step": 6266 }, { "epoch": 0.06267, "grad_norm": 0.8579446764983243, "learning_rate": 0.003, "loss": 4.1345, "step": 6267 }, { "epoch": 0.06268, "grad_norm": 0.7491708731971836, "learning_rate": 0.003, "loss": 4.1014, "step": 6268 }, { "epoch": 0.06269, "grad_norm": 0.5769598306382601, "learning_rate": 0.003, "loss": 4.1013, "step": 6269 }, { "epoch": 0.0627, "grad_norm": 0.612130695073576, "learning_rate": 0.003, "loss": 4.1176, "step": 6270 }, { "epoch": 0.06271, "grad_norm": 0.5804514862016513, "learning_rate": 0.003, "loss": 4.129, "step": 6271 }, { "epoch": 0.06272, "grad_norm": 0.6768271666465923, "learning_rate": 0.003, "loss": 4.1408, "step": 6272 }, { "epoch": 0.06273, "grad_norm": 0.725620628836043, "learning_rate": 0.003, "loss": 4.1407, "step": 6273 }, { "epoch": 0.06274, "grad_norm": 0.7124587885763234, "learning_rate": 0.003, "loss": 4.1181, "step": 6274 }, { "epoch": 0.06275, "grad_norm": 0.6549941998076714, "learning_rate": 0.003, "loss": 4.1093, "step": 6275 }, { "epoch": 0.06276, "grad_norm": 0.651422535989974, "learning_rate": 0.003, "loss": 4.1216, "step": 6276 }, { "epoch": 0.06277, "grad_norm": 0.7632957652022478, "learning_rate": 0.003, "loss": 4.1264, "step": 6277 }, { "epoch": 0.06278, "grad_norm": 0.8921311551644955, "learning_rate": 0.003, "loss": 4.0826, "step": 6278 }, { "epoch": 0.06279, "grad_norm": 0.9316054360840504, "learning_rate": 0.003, "loss": 4.1418, "step": 6279 }, { "epoch": 0.0628, "grad_norm": 0.9480087249404298, "learning_rate": 0.003, "loss": 4.1147, "step": 6280 }, { "epoch": 0.06281, "grad_norm": 0.9437555976288077, "learning_rate": 0.003, "loss": 4.094, "step": 6281 }, { "epoch": 0.06282, "grad_norm": 0.8181620617485008, "learning_rate": 0.003, "loss": 4.1155, "step": 6282 }, { "epoch": 0.06283, "grad_norm": 0.6530022259763903, "learning_rate": 0.003, "loss": 4.0889, "step": 6283 }, { "epoch": 0.06284, "grad_norm": 0.5914752081352821, "learning_rate": 0.003, "loss": 4.1161, "step": 6284 }, { "epoch": 0.06285, "grad_norm": 0.5833107165282243, "learning_rate": 0.003, "loss": 4.1141, "step": 6285 }, { "epoch": 0.06286, "grad_norm": 0.6373596862276327, "learning_rate": 0.003, "loss": 4.1034, "step": 6286 }, { "epoch": 0.06287, "grad_norm": 0.6834837784355613, "learning_rate": 0.003, "loss": 4.0962, "step": 6287 }, { "epoch": 0.06288, "grad_norm": 0.6535796667468495, "learning_rate": 0.003, "loss": 4.1224, "step": 6288 }, { "epoch": 0.06289, "grad_norm": 0.6174828688423645, "learning_rate": 0.003, "loss": 4.0771, "step": 6289 }, { "epoch": 0.0629, "grad_norm": 0.4959130253790628, "learning_rate": 0.003, "loss": 4.1004, "step": 6290 }, { "epoch": 0.06291, "grad_norm": 0.5595818315925254, "learning_rate": 0.003, "loss": 4.0754, "step": 6291 }, { "epoch": 0.06292, "grad_norm": 0.5248847935016591, "learning_rate": 0.003, "loss": 4.0995, "step": 6292 }, { "epoch": 0.06293, "grad_norm": 0.5063929729505692, "learning_rate": 0.003, "loss": 4.0604, "step": 6293 }, { "epoch": 0.06294, "grad_norm": 0.46785323488692016, "learning_rate": 0.003, "loss": 4.1102, "step": 6294 }, { "epoch": 0.06295, "grad_norm": 0.4202500873613126, "learning_rate": 0.003, "loss": 4.0538, "step": 6295 }, { "epoch": 0.06296, "grad_norm": 0.42143259177649955, "learning_rate": 0.003, "loss": 4.1268, "step": 6296 }, { "epoch": 0.06297, "grad_norm": 0.49833908617620376, "learning_rate": 0.003, "loss": 4.1005, "step": 6297 }, { "epoch": 0.06298, "grad_norm": 0.7251722641880546, "learning_rate": 0.003, "loss": 4.1009, "step": 6298 }, { "epoch": 0.06299, "grad_norm": 1.1796989504352162, "learning_rate": 0.003, "loss": 4.112, "step": 6299 }, { "epoch": 0.063, "grad_norm": 1.0778085487061984, "learning_rate": 0.003, "loss": 4.1599, "step": 6300 }, { "epoch": 0.06301, "grad_norm": 0.890093583413934, "learning_rate": 0.003, "loss": 4.1362, "step": 6301 }, { "epoch": 0.06302, "grad_norm": 0.8849818441147115, "learning_rate": 0.003, "loss": 4.1248, "step": 6302 }, { "epoch": 0.06303, "grad_norm": 0.9016935377264436, "learning_rate": 0.003, "loss": 4.1265, "step": 6303 }, { "epoch": 0.06304, "grad_norm": 0.9975700771133877, "learning_rate": 0.003, "loss": 4.1474, "step": 6304 }, { "epoch": 0.06305, "grad_norm": 1.078399082415521, "learning_rate": 0.003, "loss": 4.1567, "step": 6305 }, { "epoch": 0.06306, "grad_norm": 0.8656165382853225, "learning_rate": 0.003, "loss": 4.1111, "step": 6306 }, { "epoch": 0.06307, "grad_norm": 0.9837508136152356, "learning_rate": 0.003, "loss": 4.113, "step": 6307 }, { "epoch": 0.06308, "grad_norm": 1.0753663335239507, "learning_rate": 0.003, "loss": 4.1403, "step": 6308 }, { "epoch": 0.06309, "grad_norm": 0.9920769784954451, "learning_rate": 0.003, "loss": 4.1273, "step": 6309 }, { "epoch": 0.0631, "grad_norm": 0.8930854451381682, "learning_rate": 0.003, "loss": 4.1275, "step": 6310 }, { "epoch": 0.06311, "grad_norm": 0.9784115790794385, "learning_rate": 0.003, "loss": 4.125, "step": 6311 }, { "epoch": 0.06312, "grad_norm": 0.9052068566792085, "learning_rate": 0.003, "loss": 4.1182, "step": 6312 }, { "epoch": 0.06313, "grad_norm": 0.8062060403102312, "learning_rate": 0.003, "loss": 4.1391, "step": 6313 }, { "epoch": 0.06314, "grad_norm": 0.8612542206730062, "learning_rate": 0.003, "loss": 4.1427, "step": 6314 }, { "epoch": 0.06315, "grad_norm": 1.2349281974671547, "learning_rate": 0.003, "loss": 4.1291, "step": 6315 }, { "epoch": 0.06316, "grad_norm": 0.9748496951834336, "learning_rate": 0.003, "loss": 4.0956, "step": 6316 }, { "epoch": 0.06317, "grad_norm": 0.8725933515560151, "learning_rate": 0.003, "loss": 4.1222, "step": 6317 }, { "epoch": 0.06318, "grad_norm": 0.7900742702838245, "learning_rate": 0.003, "loss": 4.1613, "step": 6318 }, { "epoch": 0.06319, "grad_norm": 0.8185448176929361, "learning_rate": 0.003, "loss": 4.1638, "step": 6319 }, { "epoch": 0.0632, "grad_norm": 0.7867763226056398, "learning_rate": 0.003, "loss": 4.1237, "step": 6320 }, { "epoch": 0.06321, "grad_norm": 0.7397415591963721, "learning_rate": 0.003, "loss": 4.136, "step": 6321 }, { "epoch": 0.06322, "grad_norm": 0.7295018218458792, "learning_rate": 0.003, "loss": 4.1205, "step": 6322 }, { "epoch": 0.06323, "grad_norm": 0.8038126764870634, "learning_rate": 0.003, "loss": 4.1163, "step": 6323 }, { "epoch": 0.06324, "grad_norm": 0.8381105892893556, "learning_rate": 0.003, "loss": 4.1371, "step": 6324 }, { "epoch": 0.06325, "grad_norm": 0.7753841480841397, "learning_rate": 0.003, "loss": 4.1158, "step": 6325 }, { "epoch": 0.06326, "grad_norm": 0.7397370113159383, "learning_rate": 0.003, "loss": 4.1282, "step": 6326 }, { "epoch": 0.06327, "grad_norm": 0.8110748971387649, "learning_rate": 0.003, "loss": 4.0988, "step": 6327 }, { "epoch": 0.06328, "grad_norm": 0.8165737037187061, "learning_rate": 0.003, "loss": 4.1491, "step": 6328 }, { "epoch": 0.06329, "grad_norm": 0.8459755592595166, "learning_rate": 0.003, "loss": 4.1322, "step": 6329 }, { "epoch": 0.0633, "grad_norm": 0.7837172627440523, "learning_rate": 0.003, "loss": 4.1074, "step": 6330 }, { "epoch": 0.06331, "grad_norm": 0.7382526500041345, "learning_rate": 0.003, "loss": 4.1036, "step": 6331 }, { "epoch": 0.06332, "grad_norm": 0.8446347024742388, "learning_rate": 0.003, "loss": 4.1058, "step": 6332 }, { "epoch": 0.06333, "grad_norm": 0.8755392261266487, "learning_rate": 0.003, "loss": 4.1119, "step": 6333 }, { "epoch": 0.06334, "grad_norm": 0.9464338840507761, "learning_rate": 0.003, "loss": 4.1143, "step": 6334 }, { "epoch": 0.06335, "grad_norm": 0.9871255748238474, "learning_rate": 0.003, "loss": 4.0839, "step": 6335 }, { "epoch": 0.06336, "grad_norm": 1.011604006822508, "learning_rate": 0.003, "loss": 4.1484, "step": 6336 }, { "epoch": 0.06337, "grad_norm": 1.054372126242909, "learning_rate": 0.003, "loss": 4.1529, "step": 6337 }, { "epoch": 0.06338, "grad_norm": 0.8149090879744815, "learning_rate": 0.003, "loss": 4.1337, "step": 6338 }, { "epoch": 0.06339, "grad_norm": 0.8045420062877509, "learning_rate": 0.003, "loss": 4.107, "step": 6339 }, { "epoch": 0.0634, "grad_norm": 0.773692028277079, "learning_rate": 0.003, "loss": 4.102, "step": 6340 }, { "epoch": 0.06341, "grad_norm": 0.834869748376275, "learning_rate": 0.003, "loss": 4.1011, "step": 6341 }, { "epoch": 0.06342, "grad_norm": 0.8239421334821241, "learning_rate": 0.003, "loss": 4.1283, "step": 6342 }, { "epoch": 0.06343, "grad_norm": 0.7317579621776237, "learning_rate": 0.003, "loss": 4.1343, "step": 6343 }, { "epoch": 0.06344, "grad_norm": 0.7009311944721036, "learning_rate": 0.003, "loss": 4.0895, "step": 6344 }, { "epoch": 0.06345, "grad_norm": 0.6736306168677829, "learning_rate": 0.003, "loss": 4.1068, "step": 6345 }, { "epoch": 0.06346, "grad_norm": 0.6489992606346455, "learning_rate": 0.003, "loss": 4.1217, "step": 6346 }, { "epoch": 0.06347, "grad_norm": 0.7318992775914919, "learning_rate": 0.003, "loss": 4.1099, "step": 6347 }, { "epoch": 0.06348, "grad_norm": 0.8506294348624643, "learning_rate": 0.003, "loss": 4.1073, "step": 6348 }, { "epoch": 0.06349, "grad_norm": 0.8414498040161846, "learning_rate": 0.003, "loss": 4.1078, "step": 6349 }, { "epoch": 0.0635, "grad_norm": 0.7853415582358513, "learning_rate": 0.003, "loss": 4.0995, "step": 6350 }, { "epoch": 0.06351, "grad_norm": 0.8089386421880802, "learning_rate": 0.003, "loss": 4.1269, "step": 6351 }, { "epoch": 0.06352, "grad_norm": 0.7578096372347558, "learning_rate": 0.003, "loss": 4.1038, "step": 6352 }, { "epoch": 0.06353, "grad_norm": 0.7726928843900696, "learning_rate": 0.003, "loss": 4.1191, "step": 6353 }, { "epoch": 0.06354, "grad_norm": 0.8235268790134066, "learning_rate": 0.003, "loss": 4.1143, "step": 6354 }, { "epoch": 0.06355, "grad_norm": 0.8685123509492527, "learning_rate": 0.003, "loss": 4.124, "step": 6355 }, { "epoch": 0.06356, "grad_norm": 0.9033483279727317, "learning_rate": 0.003, "loss": 4.1175, "step": 6356 }, { "epoch": 0.06357, "grad_norm": 0.8189510174670123, "learning_rate": 0.003, "loss": 4.1338, "step": 6357 }, { "epoch": 0.06358, "grad_norm": 0.7233022129389708, "learning_rate": 0.003, "loss": 4.1001, "step": 6358 }, { "epoch": 0.06359, "grad_norm": 0.591618308364435, "learning_rate": 0.003, "loss": 4.0988, "step": 6359 }, { "epoch": 0.0636, "grad_norm": 0.6724366774358936, "learning_rate": 0.003, "loss": 4.1258, "step": 6360 }, { "epoch": 0.06361, "grad_norm": 0.7710988154121916, "learning_rate": 0.003, "loss": 4.1203, "step": 6361 }, { "epoch": 0.06362, "grad_norm": 0.8624550458659666, "learning_rate": 0.003, "loss": 4.1283, "step": 6362 }, { "epoch": 0.06363, "grad_norm": 0.9165629494523708, "learning_rate": 0.003, "loss": 4.1128, "step": 6363 }, { "epoch": 0.06364, "grad_norm": 0.7471123965901227, "learning_rate": 0.003, "loss": 4.1082, "step": 6364 }, { "epoch": 0.06365, "grad_norm": 0.674364493786534, "learning_rate": 0.003, "loss": 4.0935, "step": 6365 }, { "epoch": 0.06366, "grad_norm": 0.7335045602903685, "learning_rate": 0.003, "loss": 4.1234, "step": 6366 }, { "epoch": 0.06367, "grad_norm": 0.829927307349712, "learning_rate": 0.003, "loss": 4.1067, "step": 6367 }, { "epoch": 0.06368, "grad_norm": 0.9487139825002684, "learning_rate": 0.003, "loss": 4.1467, "step": 6368 }, { "epoch": 0.06369, "grad_norm": 0.911983799114357, "learning_rate": 0.003, "loss": 4.1529, "step": 6369 }, { "epoch": 0.0637, "grad_norm": 1.002706686712587, "learning_rate": 0.003, "loss": 4.1168, "step": 6370 }, { "epoch": 0.06371, "grad_norm": 1.0290249495706973, "learning_rate": 0.003, "loss": 4.1453, "step": 6371 }, { "epoch": 0.06372, "grad_norm": 0.9956241176141073, "learning_rate": 0.003, "loss": 4.1401, "step": 6372 }, { "epoch": 0.06373, "grad_norm": 0.9694674226068865, "learning_rate": 0.003, "loss": 4.1382, "step": 6373 }, { "epoch": 0.06374, "grad_norm": 0.8725376570060053, "learning_rate": 0.003, "loss": 4.0933, "step": 6374 }, { "epoch": 0.06375, "grad_norm": 0.8676467464492347, "learning_rate": 0.003, "loss": 4.1163, "step": 6375 }, { "epoch": 0.06376, "grad_norm": 0.8676286162915308, "learning_rate": 0.003, "loss": 4.1383, "step": 6376 }, { "epoch": 0.06377, "grad_norm": 0.9874746445128965, "learning_rate": 0.003, "loss": 4.1141, "step": 6377 }, { "epoch": 0.06378, "grad_norm": 1.0158658512452112, "learning_rate": 0.003, "loss": 4.1542, "step": 6378 }, { "epoch": 0.06379, "grad_norm": 1.0804519520556228, "learning_rate": 0.003, "loss": 4.1544, "step": 6379 }, { "epoch": 0.0638, "grad_norm": 1.0517174792581394, "learning_rate": 0.003, "loss": 4.1343, "step": 6380 }, { "epoch": 0.06381, "grad_norm": 0.9191882933740307, "learning_rate": 0.003, "loss": 4.1127, "step": 6381 }, { "epoch": 0.06382, "grad_norm": 1.112305209925744, "learning_rate": 0.003, "loss": 4.1259, "step": 6382 }, { "epoch": 0.06383, "grad_norm": 0.887564897445918, "learning_rate": 0.003, "loss": 4.1516, "step": 6383 }, { "epoch": 0.06384, "grad_norm": 0.7444199605679525, "learning_rate": 0.003, "loss": 4.1456, "step": 6384 }, { "epoch": 0.06385, "grad_norm": 0.6861155001402504, "learning_rate": 0.003, "loss": 4.1053, "step": 6385 }, { "epoch": 0.06386, "grad_norm": 0.6803077477027196, "learning_rate": 0.003, "loss": 4.1145, "step": 6386 }, { "epoch": 0.06387, "grad_norm": 0.6585413586302139, "learning_rate": 0.003, "loss": 4.1229, "step": 6387 }, { "epoch": 0.06388, "grad_norm": 0.6526729101850073, "learning_rate": 0.003, "loss": 4.1148, "step": 6388 }, { "epoch": 0.06389, "grad_norm": 0.6961400897039125, "learning_rate": 0.003, "loss": 4.0977, "step": 6389 }, { "epoch": 0.0639, "grad_norm": 0.7971727357460624, "learning_rate": 0.003, "loss": 4.1202, "step": 6390 }, { "epoch": 0.06391, "grad_norm": 0.8100704061167459, "learning_rate": 0.003, "loss": 4.1393, "step": 6391 }, { "epoch": 0.06392, "grad_norm": 0.7878863222547036, "learning_rate": 0.003, "loss": 4.1102, "step": 6392 }, { "epoch": 0.06393, "grad_norm": 0.7832197554643082, "learning_rate": 0.003, "loss": 4.1226, "step": 6393 }, { "epoch": 0.06394, "grad_norm": 0.7676826793962613, "learning_rate": 0.003, "loss": 4.1154, "step": 6394 }, { "epoch": 0.06395, "grad_norm": 0.7865004179204782, "learning_rate": 0.003, "loss": 4.1403, "step": 6395 }, { "epoch": 0.06396, "grad_norm": 0.7481149498310921, "learning_rate": 0.003, "loss": 4.1519, "step": 6396 }, { "epoch": 0.06397, "grad_norm": 0.6578310984328573, "learning_rate": 0.003, "loss": 4.1029, "step": 6397 }, { "epoch": 0.06398, "grad_norm": 0.7814732896391083, "learning_rate": 0.003, "loss": 4.0974, "step": 6398 }, { "epoch": 0.06399, "grad_norm": 0.7910300530341595, "learning_rate": 0.003, "loss": 4.1275, "step": 6399 }, { "epoch": 0.064, "grad_norm": 0.7384277513313977, "learning_rate": 0.003, "loss": 4.1199, "step": 6400 }, { "epoch": 0.06401, "grad_norm": 0.6044786328825004, "learning_rate": 0.003, "loss": 4.0992, "step": 6401 }, { "epoch": 0.06402, "grad_norm": 0.5635787730251838, "learning_rate": 0.003, "loss": 4.1011, "step": 6402 }, { "epoch": 0.06403, "grad_norm": 0.4804036029092371, "learning_rate": 0.003, "loss": 4.0947, "step": 6403 }, { "epoch": 0.06404, "grad_norm": 0.46851547499916696, "learning_rate": 0.003, "loss": 4.1495, "step": 6404 }, { "epoch": 0.06405, "grad_norm": 0.4343792187108702, "learning_rate": 0.003, "loss": 4.1047, "step": 6405 }, { "epoch": 0.06406, "grad_norm": 0.4387967998008624, "learning_rate": 0.003, "loss": 4.0936, "step": 6406 }, { "epoch": 0.06407, "grad_norm": 0.3763862974231301, "learning_rate": 0.003, "loss": 4.0925, "step": 6407 }, { "epoch": 0.06408, "grad_norm": 0.3618832104359289, "learning_rate": 0.003, "loss": 4.0742, "step": 6408 }, { "epoch": 0.06409, "grad_norm": 0.3640424177742778, "learning_rate": 0.003, "loss": 4.1159, "step": 6409 }, { "epoch": 0.0641, "grad_norm": 0.5071096861354427, "learning_rate": 0.003, "loss": 4.1073, "step": 6410 }, { "epoch": 0.06411, "grad_norm": 0.7056594230315663, "learning_rate": 0.003, "loss": 4.1014, "step": 6411 }, { "epoch": 0.06412, "grad_norm": 1.0514700079261672, "learning_rate": 0.003, "loss": 4.1031, "step": 6412 }, { "epoch": 0.06413, "grad_norm": 1.4637822933171232, "learning_rate": 0.003, "loss": 4.1524, "step": 6413 }, { "epoch": 0.06414, "grad_norm": 0.6368174491993011, "learning_rate": 0.003, "loss": 4.0935, "step": 6414 }, { "epoch": 0.06415, "grad_norm": 0.6864691858033185, "learning_rate": 0.003, "loss": 4.0922, "step": 6415 }, { "epoch": 0.06416, "grad_norm": 0.6967129148461769, "learning_rate": 0.003, "loss": 4.13, "step": 6416 }, { "epoch": 0.06417, "grad_norm": 0.7877830132689768, "learning_rate": 0.003, "loss": 4.1027, "step": 6417 }, { "epoch": 0.06418, "grad_norm": 1.1025848418056756, "learning_rate": 0.003, "loss": 4.0996, "step": 6418 }, { "epoch": 0.06419, "grad_norm": 1.1303087225349653, "learning_rate": 0.003, "loss": 4.1096, "step": 6419 }, { "epoch": 0.0642, "grad_norm": 0.8179128267985711, "learning_rate": 0.003, "loss": 4.1156, "step": 6420 }, { "epoch": 0.06421, "grad_norm": 0.7061052538423624, "learning_rate": 0.003, "loss": 4.1237, "step": 6421 }, { "epoch": 0.06422, "grad_norm": 0.6712943095898026, "learning_rate": 0.003, "loss": 4.1112, "step": 6422 }, { "epoch": 0.06423, "grad_norm": 0.7842157557770016, "learning_rate": 0.003, "loss": 4.1304, "step": 6423 }, { "epoch": 0.06424, "grad_norm": 0.8391508667334445, "learning_rate": 0.003, "loss": 4.0946, "step": 6424 }, { "epoch": 0.06425, "grad_norm": 0.8032559980871449, "learning_rate": 0.003, "loss": 4.1154, "step": 6425 }, { "epoch": 0.06426, "grad_norm": 0.842568662503113, "learning_rate": 0.003, "loss": 4.0847, "step": 6426 }, { "epoch": 0.06427, "grad_norm": 0.9659133641186062, "learning_rate": 0.003, "loss": 4.1058, "step": 6427 }, { "epoch": 0.06428, "grad_norm": 0.9129797056099707, "learning_rate": 0.003, "loss": 4.1309, "step": 6428 }, { "epoch": 0.06429, "grad_norm": 0.8807646015484044, "learning_rate": 0.003, "loss": 4.1292, "step": 6429 }, { "epoch": 0.0643, "grad_norm": 0.9116849659316084, "learning_rate": 0.003, "loss": 4.102, "step": 6430 }, { "epoch": 0.06431, "grad_norm": 0.9401397808995475, "learning_rate": 0.003, "loss": 4.1161, "step": 6431 }, { "epoch": 0.06432, "grad_norm": 1.094843463270304, "learning_rate": 0.003, "loss": 4.1145, "step": 6432 }, { "epoch": 0.06433, "grad_norm": 0.97330153788876, "learning_rate": 0.003, "loss": 4.1417, "step": 6433 }, { "epoch": 0.06434, "grad_norm": 0.8801080101022402, "learning_rate": 0.003, "loss": 4.0901, "step": 6434 }, { "epoch": 0.06435, "grad_norm": 0.7207019820857952, "learning_rate": 0.003, "loss": 4.1034, "step": 6435 }, { "epoch": 0.06436, "grad_norm": 0.7301567424764769, "learning_rate": 0.003, "loss": 4.1209, "step": 6436 }, { "epoch": 0.06437, "grad_norm": 0.7242287759939515, "learning_rate": 0.003, "loss": 4.1168, "step": 6437 }, { "epoch": 0.06438, "grad_norm": 0.6183084411636348, "learning_rate": 0.003, "loss": 4.1088, "step": 6438 }, { "epoch": 0.06439, "grad_norm": 0.6942308219572709, "learning_rate": 0.003, "loss": 4.1093, "step": 6439 }, { "epoch": 0.0644, "grad_norm": 0.6502050855985386, "learning_rate": 0.003, "loss": 4.1023, "step": 6440 }, { "epoch": 0.06441, "grad_norm": 0.6169845208363806, "learning_rate": 0.003, "loss": 4.0835, "step": 6441 }, { "epoch": 0.06442, "grad_norm": 0.6761282291621807, "learning_rate": 0.003, "loss": 4.0981, "step": 6442 }, { "epoch": 0.06443, "grad_norm": 0.7049850390444782, "learning_rate": 0.003, "loss": 4.0903, "step": 6443 }, { "epoch": 0.06444, "grad_norm": 0.7982502738327323, "learning_rate": 0.003, "loss": 4.1005, "step": 6444 }, { "epoch": 0.06445, "grad_norm": 0.8849927446065019, "learning_rate": 0.003, "loss": 4.1174, "step": 6445 }, { "epoch": 0.06446, "grad_norm": 0.9290562203347035, "learning_rate": 0.003, "loss": 4.1066, "step": 6446 }, { "epoch": 0.06447, "grad_norm": 0.9155039355826416, "learning_rate": 0.003, "loss": 4.1222, "step": 6447 }, { "epoch": 0.06448, "grad_norm": 0.915311592818687, "learning_rate": 0.003, "loss": 4.1287, "step": 6448 }, { "epoch": 0.06449, "grad_norm": 0.8915234617599778, "learning_rate": 0.003, "loss": 4.1577, "step": 6449 }, { "epoch": 0.0645, "grad_norm": 0.8516263875404035, "learning_rate": 0.003, "loss": 4.1008, "step": 6450 }, { "epoch": 0.06451, "grad_norm": 0.9911826973450533, "learning_rate": 0.003, "loss": 4.1539, "step": 6451 }, { "epoch": 0.06452, "grad_norm": 1.0818621297011708, "learning_rate": 0.003, "loss": 4.1198, "step": 6452 }, { "epoch": 0.06453, "grad_norm": 0.9321332755148247, "learning_rate": 0.003, "loss": 4.1578, "step": 6453 }, { "epoch": 0.06454, "grad_norm": 0.850408071613158, "learning_rate": 0.003, "loss": 4.1097, "step": 6454 }, { "epoch": 0.06455, "grad_norm": 0.8713116964180613, "learning_rate": 0.003, "loss": 4.1244, "step": 6455 }, { "epoch": 0.06456, "grad_norm": 0.7964782772027043, "learning_rate": 0.003, "loss": 4.1358, "step": 6456 }, { "epoch": 0.06457, "grad_norm": 0.8045164932744877, "learning_rate": 0.003, "loss": 4.1174, "step": 6457 }, { "epoch": 0.06458, "grad_norm": 0.8513566837706239, "learning_rate": 0.003, "loss": 4.1279, "step": 6458 }, { "epoch": 0.06459, "grad_norm": 0.8890380816261108, "learning_rate": 0.003, "loss": 4.1399, "step": 6459 }, { "epoch": 0.0646, "grad_norm": 0.9142267341528604, "learning_rate": 0.003, "loss": 4.1163, "step": 6460 }, { "epoch": 0.06461, "grad_norm": 1.1972918407769095, "learning_rate": 0.003, "loss": 4.1696, "step": 6461 }, { "epoch": 0.06462, "grad_norm": 1.0336978547847568, "learning_rate": 0.003, "loss": 4.1123, "step": 6462 }, { "epoch": 0.06463, "grad_norm": 1.0122741524242997, "learning_rate": 0.003, "loss": 4.1167, "step": 6463 }, { "epoch": 0.06464, "grad_norm": 1.0493974717346164, "learning_rate": 0.003, "loss": 4.113, "step": 6464 }, { "epoch": 0.06465, "grad_norm": 0.9438301613214977, "learning_rate": 0.003, "loss": 4.1057, "step": 6465 }, { "epoch": 0.06466, "grad_norm": 0.8498735851960341, "learning_rate": 0.003, "loss": 4.1219, "step": 6466 }, { "epoch": 0.06467, "grad_norm": 0.8190041939424388, "learning_rate": 0.003, "loss": 4.1088, "step": 6467 }, { "epoch": 0.06468, "grad_norm": 0.8403763457939923, "learning_rate": 0.003, "loss": 4.1341, "step": 6468 }, { "epoch": 0.06469, "grad_norm": 0.7964142979953805, "learning_rate": 0.003, "loss": 4.1313, "step": 6469 }, { "epoch": 0.0647, "grad_norm": 0.7347258244702751, "learning_rate": 0.003, "loss": 4.1372, "step": 6470 }, { "epoch": 0.06471, "grad_norm": 0.6129309594714507, "learning_rate": 0.003, "loss": 4.1107, "step": 6471 }, { "epoch": 0.06472, "grad_norm": 0.6076460566383186, "learning_rate": 0.003, "loss": 4.1204, "step": 6472 }, { "epoch": 0.06473, "grad_norm": 0.5891815040105876, "learning_rate": 0.003, "loss": 4.125, "step": 6473 }, { "epoch": 0.06474, "grad_norm": 0.5162733573289747, "learning_rate": 0.003, "loss": 4.1398, "step": 6474 }, { "epoch": 0.06475, "grad_norm": 0.5962513852003754, "learning_rate": 0.003, "loss": 4.1372, "step": 6475 }, { "epoch": 0.06476, "grad_norm": 0.7146539756300717, "learning_rate": 0.003, "loss": 4.1091, "step": 6476 }, { "epoch": 0.06477, "grad_norm": 0.8105712416755565, "learning_rate": 0.003, "loss": 4.0953, "step": 6477 }, { "epoch": 0.06478, "grad_norm": 0.9652008011579472, "learning_rate": 0.003, "loss": 4.088, "step": 6478 }, { "epoch": 0.06479, "grad_norm": 1.1900520557936207, "learning_rate": 0.003, "loss": 4.1137, "step": 6479 }, { "epoch": 0.0648, "grad_norm": 0.7249331917094521, "learning_rate": 0.003, "loss": 4.0984, "step": 6480 }, { "epoch": 0.06481, "grad_norm": 0.6534504012207452, "learning_rate": 0.003, "loss": 4.1079, "step": 6481 }, { "epoch": 0.06482, "grad_norm": 0.7458945563014072, "learning_rate": 0.003, "loss": 4.1061, "step": 6482 }, { "epoch": 0.06483, "grad_norm": 0.909230494347854, "learning_rate": 0.003, "loss": 4.1186, "step": 6483 }, { "epoch": 0.06484, "grad_norm": 0.9786701503465991, "learning_rate": 0.003, "loss": 4.1186, "step": 6484 }, { "epoch": 0.06485, "grad_norm": 0.8056031175729698, "learning_rate": 0.003, "loss": 4.1381, "step": 6485 }, { "epoch": 0.06486, "grad_norm": 0.7315640607458516, "learning_rate": 0.003, "loss": 4.1369, "step": 6486 }, { "epoch": 0.06487, "grad_norm": 0.738123736009974, "learning_rate": 0.003, "loss": 4.124, "step": 6487 }, { "epoch": 0.06488, "grad_norm": 0.7016088617855374, "learning_rate": 0.003, "loss": 4.1303, "step": 6488 }, { "epoch": 0.06489, "grad_norm": 0.7706221605393652, "learning_rate": 0.003, "loss": 4.1173, "step": 6489 }, { "epoch": 0.0649, "grad_norm": 0.8075185716569853, "learning_rate": 0.003, "loss": 4.1072, "step": 6490 }, { "epoch": 0.06491, "grad_norm": 0.7917671230247577, "learning_rate": 0.003, "loss": 4.1121, "step": 6491 }, { "epoch": 0.06492, "grad_norm": 0.851038360366963, "learning_rate": 0.003, "loss": 4.128, "step": 6492 }, { "epoch": 0.06493, "grad_norm": 0.8479749542932367, "learning_rate": 0.003, "loss": 4.1155, "step": 6493 }, { "epoch": 0.06494, "grad_norm": 0.7857228913822375, "learning_rate": 0.003, "loss": 4.1237, "step": 6494 }, { "epoch": 0.06495, "grad_norm": 0.7000090472611841, "learning_rate": 0.003, "loss": 4.1133, "step": 6495 }, { "epoch": 0.06496, "grad_norm": 0.6189238416133499, "learning_rate": 0.003, "loss": 4.0771, "step": 6496 }, { "epoch": 0.06497, "grad_norm": 0.5745909145349337, "learning_rate": 0.003, "loss": 4.1012, "step": 6497 }, { "epoch": 0.06498, "grad_norm": 0.5294157379174294, "learning_rate": 0.003, "loss": 4.0934, "step": 6498 }, { "epoch": 0.06499, "grad_norm": 0.5466886858120876, "learning_rate": 0.003, "loss": 4.1253, "step": 6499 }, { "epoch": 0.065, "grad_norm": 0.6539813137699607, "learning_rate": 0.003, "loss": 4.1022, "step": 6500 }, { "epoch": 0.06501, "grad_norm": 0.6647161266915989, "learning_rate": 0.003, "loss": 4.0973, "step": 6501 }, { "epoch": 0.06502, "grad_norm": 0.6443788622041227, "learning_rate": 0.003, "loss": 4.1054, "step": 6502 }, { "epoch": 0.06503, "grad_norm": 0.7571231315947718, "learning_rate": 0.003, "loss": 4.1221, "step": 6503 }, { "epoch": 0.06504, "grad_norm": 0.9637682445146619, "learning_rate": 0.003, "loss": 4.1063, "step": 6504 }, { "epoch": 0.06505, "grad_norm": 1.3505877682488592, "learning_rate": 0.003, "loss": 4.1153, "step": 6505 }, { "epoch": 0.06506, "grad_norm": 0.7210710726778544, "learning_rate": 0.003, "loss": 4.0924, "step": 6506 }, { "epoch": 0.06507, "grad_norm": 0.8243540456063395, "learning_rate": 0.003, "loss": 4.0706, "step": 6507 }, { "epoch": 0.06508, "grad_norm": 0.9264270578051855, "learning_rate": 0.003, "loss": 4.1089, "step": 6508 }, { "epoch": 0.06509, "grad_norm": 0.8402439484381861, "learning_rate": 0.003, "loss": 4.0974, "step": 6509 }, { "epoch": 0.0651, "grad_norm": 0.8452797474799149, "learning_rate": 0.003, "loss": 4.1355, "step": 6510 }, { "epoch": 0.06511, "grad_norm": 0.8155771099505443, "learning_rate": 0.003, "loss": 4.099, "step": 6511 }, { "epoch": 0.06512, "grad_norm": 0.8504514854470253, "learning_rate": 0.003, "loss": 4.0886, "step": 6512 }, { "epoch": 0.06513, "grad_norm": 0.7996056231914204, "learning_rate": 0.003, "loss": 4.1182, "step": 6513 }, { "epoch": 0.06514, "grad_norm": 0.8483202440944669, "learning_rate": 0.003, "loss": 4.1088, "step": 6514 }, { "epoch": 0.06515, "grad_norm": 0.729647231890123, "learning_rate": 0.003, "loss": 4.0911, "step": 6515 }, { "epoch": 0.06516, "grad_norm": 0.7668262347734042, "learning_rate": 0.003, "loss": 4.1125, "step": 6516 }, { "epoch": 0.06517, "grad_norm": 0.9668803624817275, "learning_rate": 0.003, "loss": 4.1157, "step": 6517 }, { "epoch": 0.06518, "grad_norm": 0.9221337117494466, "learning_rate": 0.003, "loss": 4.1395, "step": 6518 }, { "epoch": 0.06519, "grad_norm": 0.8991712754647655, "learning_rate": 0.003, "loss": 4.1151, "step": 6519 }, { "epoch": 0.0652, "grad_norm": 0.9996762890374128, "learning_rate": 0.003, "loss": 4.0873, "step": 6520 }, { "epoch": 0.06521, "grad_norm": 0.9694147573902756, "learning_rate": 0.003, "loss": 4.1385, "step": 6521 }, { "epoch": 0.06522, "grad_norm": 1.1436487696458615, "learning_rate": 0.003, "loss": 4.1504, "step": 6522 }, { "epoch": 0.06523, "grad_norm": 1.1761311790032607, "learning_rate": 0.003, "loss": 4.1357, "step": 6523 }, { "epoch": 0.06524, "grad_norm": 0.9935715060905626, "learning_rate": 0.003, "loss": 4.1267, "step": 6524 }, { "epoch": 0.06525, "grad_norm": 1.0984134338267004, "learning_rate": 0.003, "loss": 4.1515, "step": 6525 }, { "epoch": 0.06526, "grad_norm": 0.8597157647493189, "learning_rate": 0.003, "loss": 4.1287, "step": 6526 }, { "epoch": 0.06527, "grad_norm": 0.9122953325479415, "learning_rate": 0.003, "loss": 4.1405, "step": 6527 }, { "epoch": 0.06528, "grad_norm": 0.9550047890046722, "learning_rate": 0.003, "loss": 4.1386, "step": 6528 }, { "epoch": 0.06529, "grad_norm": 0.9456502240272384, "learning_rate": 0.003, "loss": 4.1265, "step": 6529 }, { "epoch": 0.0653, "grad_norm": 0.8144448232250512, "learning_rate": 0.003, "loss": 4.1182, "step": 6530 }, { "epoch": 0.06531, "grad_norm": 0.7884345470232, "learning_rate": 0.003, "loss": 4.1073, "step": 6531 }, { "epoch": 0.06532, "grad_norm": 0.7546346553317457, "learning_rate": 0.003, "loss": 4.1361, "step": 6532 }, { "epoch": 0.06533, "grad_norm": 0.6337607386448925, "learning_rate": 0.003, "loss": 4.1334, "step": 6533 }, { "epoch": 0.06534, "grad_norm": 0.6571305549922228, "learning_rate": 0.003, "loss": 4.1256, "step": 6534 }, { "epoch": 0.06535, "grad_norm": 0.6727892441041884, "learning_rate": 0.003, "loss": 4.0859, "step": 6535 }, { "epoch": 0.06536, "grad_norm": 0.6796596200367593, "learning_rate": 0.003, "loss": 4.1024, "step": 6536 }, { "epoch": 0.06537, "grad_norm": 0.6850578598200898, "learning_rate": 0.003, "loss": 4.1186, "step": 6537 }, { "epoch": 0.06538, "grad_norm": 0.6903169559185911, "learning_rate": 0.003, "loss": 4.1232, "step": 6538 }, { "epoch": 0.06539, "grad_norm": 0.8869816313052468, "learning_rate": 0.003, "loss": 4.0958, "step": 6539 }, { "epoch": 0.0654, "grad_norm": 1.168073515327997, "learning_rate": 0.003, "loss": 4.1617, "step": 6540 }, { "epoch": 0.06541, "grad_norm": 0.7182356963906228, "learning_rate": 0.003, "loss": 4.1144, "step": 6541 }, { "epoch": 0.06542, "grad_norm": 0.6188811083162069, "learning_rate": 0.003, "loss": 4.0924, "step": 6542 }, { "epoch": 0.06543, "grad_norm": 0.8051980596249964, "learning_rate": 0.003, "loss": 4.1256, "step": 6543 }, { "epoch": 0.06544, "grad_norm": 0.9132061876425465, "learning_rate": 0.003, "loss": 4.1202, "step": 6544 }, { "epoch": 0.06545, "grad_norm": 0.9233000070732396, "learning_rate": 0.003, "loss": 4.1273, "step": 6545 }, { "epoch": 0.06546, "grad_norm": 0.7831417568832402, "learning_rate": 0.003, "loss": 4.1092, "step": 6546 }, { "epoch": 0.06547, "grad_norm": 0.6532883144447844, "learning_rate": 0.003, "loss": 4.1072, "step": 6547 }, { "epoch": 0.06548, "grad_norm": 0.6727801173157394, "learning_rate": 0.003, "loss": 4.107, "step": 6548 }, { "epoch": 0.06549, "grad_norm": 0.6217127018777375, "learning_rate": 0.003, "loss": 4.1333, "step": 6549 }, { "epoch": 0.0655, "grad_norm": 0.7089420407652894, "learning_rate": 0.003, "loss": 4.0866, "step": 6550 }, { "epoch": 0.06551, "grad_norm": 0.8434472089359629, "learning_rate": 0.003, "loss": 4.093, "step": 6551 }, { "epoch": 0.06552, "grad_norm": 0.957288546390429, "learning_rate": 0.003, "loss": 4.1361, "step": 6552 }, { "epoch": 0.06553, "grad_norm": 0.8538606588662652, "learning_rate": 0.003, "loss": 4.1126, "step": 6553 }, { "epoch": 0.06554, "grad_norm": 0.6821331682783609, "learning_rate": 0.003, "loss": 4.1141, "step": 6554 }, { "epoch": 0.06555, "grad_norm": 0.5970317370800081, "learning_rate": 0.003, "loss": 4.1065, "step": 6555 }, { "epoch": 0.06556, "grad_norm": 0.6339812838968247, "learning_rate": 0.003, "loss": 4.1253, "step": 6556 }, { "epoch": 0.06557, "grad_norm": 0.7300737435789836, "learning_rate": 0.003, "loss": 4.1005, "step": 6557 }, { "epoch": 0.06558, "grad_norm": 0.9340751362463283, "learning_rate": 0.003, "loss": 4.1393, "step": 6558 }, { "epoch": 0.06559, "grad_norm": 1.078857392761123, "learning_rate": 0.003, "loss": 4.1249, "step": 6559 }, { "epoch": 0.0656, "grad_norm": 0.7783300852395647, "learning_rate": 0.003, "loss": 4.1161, "step": 6560 }, { "epoch": 0.06561, "grad_norm": 0.6838855361080844, "learning_rate": 0.003, "loss": 4.1294, "step": 6561 }, { "epoch": 0.06562, "grad_norm": 0.8236313339666234, "learning_rate": 0.003, "loss": 4.0906, "step": 6562 }, { "epoch": 0.06563, "grad_norm": 0.9128215836077087, "learning_rate": 0.003, "loss": 4.113, "step": 6563 }, { "epoch": 0.06564, "grad_norm": 1.0183713945436557, "learning_rate": 0.003, "loss": 4.123, "step": 6564 }, { "epoch": 0.06565, "grad_norm": 0.9875484143170253, "learning_rate": 0.003, "loss": 4.0936, "step": 6565 }, { "epoch": 0.06566, "grad_norm": 0.8482203740641269, "learning_rate": 0.003, "loss": 4.1195, "step": 6566 }, { "epoch": 0.06567, "grad_norm": 0.8887069547124475, "learning_rate": 0.003, "loss": 4.1341, "step": 6567 }, { "epoch": 0.06568, "grad_norm": 1.0056485677045872, "learning_rate": 0.003, "loss": 4.1463, "step": 6568 }, { "epoch": 0.06569, "grad_norm": 0.9064862101322059, "learning_rate": 0.003, "loss": 4.0967, "step": 6569 }, { "epoch": 0.0657, "grad_norm": 0.7555124162684835, "learning_rate": 0.003, "loss": 4.1195, "step": 6570 }, { "epoch": 0.06571, "grad_norm": 0.7348370653563371, "learning_rate": 0.003, "loss": 4.1252, "step": 6571 }, { "epoch": 0.06572, "grad_norm": 0.8152888496093048, "learning_rate": 0.003, "loss": 4.0918, "step": 6572 }, { "epoch": 0.06573, "grad_norm": 0.7572678952633992, "learning_rate": 0.003, "loss": 4.1201, "step": 6573 }, { "epoch": 0.06574, "grad_norm": 0.7173617049609856, "learning_rate": 0.003, "loss": 4.1087, "step": 6574 }, { "epoch": 0.06575, "grad_norm": 0.7216202084765286, "learning_rate": 0.003, "loss": 4.1017, "step": 6575 }, { "epoch": 0.06576, "grad_norm": 0.7321185248183836, "learning_rate": 0.003, "loss": 4.0859, "step": 6576 }, { "epoch": 0.06577, "grad_norm": 0.812337799951959, "learning_rate": 0.003, "loss": 4.0993, "step": 6577 }, { "epoch": 0.06578, "grad_norm": 1.0097566236813522, "learning_rate": 0.003, "loss": 4.1116, "step": 6578 }, { "epoch": 0.06579, "grad_norm": 1.1289060393565276, "learning_rate": 0.003, "loss": 4.1044, "step": 6579 }, { "epoch": 0.0658, "grad_norm": 1.1236108631521293, "learning_rate": 0.003, "loss": 4.0992, "step": 6580 }, { "epoch": 0.06581, "grad_norm": 0.9095208566603907, "learning_rate": 0.003, "loss": 4.1357, "step": 6581 }, { "epoch": 0.06582, "grad_norm": 0.7078312720167307, "learning_rate": 0.003, "loss": 4.1261, "step": 6582 }, { "epoch": 0.06583, "grad_norm": 0.6250796377300359, "learning_rate": 0.003, "loss": 4.0935, "step": 6583 }, { "epoch": 0.06584, "grad_norm": 0.6182361788610296, "learning_rate": 0.003, "loss": 4.0816, "step": 6584 }, { "epoch": 0.06585, "grad_norm": 0.6199101947792578, "learning_rate": 0.003, "loss": 4.1048, "step": 6585 }, { "epoch": 0.06586, "grad_norm": 0.644608198538537, "learning_rate": 0.003, "loss": 4.1162, "step": 6586 }, { "epoch": 0.06587, "grad_norm": 0.7531896129898413, "learning_rate": 0.003, "loss": 4.1225, "step": 6587 }, { "epoch": 0.06588, "grad_norm": 0.9463136908207883, "learning_rate": 0.003, "loss": 4.1466, "step": 6588 }, { "epoch": 0.06589, "grad_norm": 1.0611698029062901, "learning_rate": 0.003, "loss": 4.1384, "step": 6589 }, { "epoch": 0.0659, "grad_norm": 0.9951512526105761, "learning_rate": 0.003, "loss": 4.1488, "step": 6590 }, { "epoch": 0.06591, "grad_norm": 0.9622203108833793, "learning_rate": 0.003, "loss": 4.1735, "step": 6591 }, { "epoch": 0.06592, "grad_norm": 0.7792553434807309, "learning_rate": 0.003, "loss": 4.1311, "step": 6592 }, { "epoch": 0.06593, "grad_norm": 0.7097135195972493, "learning_rate": 0.003, "loss": 4.1091, "step": 6593 }, { "epoch": 0.06594, "grad_norm": 0.7407425345106201, "learning_rate": 0.003, "loss": 4.1303, "step": 6594 }, { "epoch": 0.06595, "grad_norm": 0.7612553261817883, "learning_rate": 0.003, "loss": 4.0834, "step": 6595 }, { "epoch": 0.06596, "grad_norm": 0.7713906419670148, "learning_rate": 0.003, "loss": 4.1343, "step": 6596 }, { "epoch": 0.06597, "grad_norm": 0.95305210338701, "learning_rate": 0.003, "loss": 4.1171, "step": 6597 }, { "epoch": 0.06598, "grad_norm": 0.9912085601243025, "learning_rate": 0.003, "loss": 4.1523, "step": 6598 }, { "epoch": 0.06599, "grad_norm": 0.9563843538129239, "learning_rate": 0.003, "loss": 4.1346, "step": 6599 }, { "epoch": 0.066, "grad_norm": 0.9173651825854763, "learning_rate": 0.003, "loss": 4.1429, "step": 6600 }, { "epoch": 0.06601, "grad_norm": 0.8706761413967128, "learning_rate": 0.003, "loss": 4.1266, "step": 6601 }, { "epoch": 0.06602, "grad_norm": 0.8530638651302136, "learning_rate": 0.003, "loss": 4.1244, "step": 6602 }, { "epoch": 0.06603, "grad_norm": 0.7620877613851017, "learning_rate": 0.003, "loss": 4.1155, "step": 6603 }, { "epoch": 0.06604, "grad_norm": 0.7157412127972154, "learning_rate": 0.003, "loss": 4.1114, "step": 6604 }, { "epoch": 0.06605, "grad_norm": 0.7664275284896481, "learning_rate": 0.003, "loss": 4.1325, "step": 6605 }, { "epoch": 0.06606, "grad_norm": 0.8596918832087841, "learning_rate": 0.003, "loss": 4.119, "step": 6606 }, { "epoch": 0.06607, "grad_norm": 0.9733248442463999, "learning_rate": 0.003, "loss": 4.1112, "step": 6607 }, { "epoch": 0.06608, "grad_norm": 1.261129457668947, "learning_rate": 0.003, "loss": 4.1147, "step": 6608 }, { "epoch": 0.06609, "grad_norm": 0.7875463153702758, "learning_rate": 0.003, "loss": 4.123, "step": 6609 }, { "epoch": 0.0661, "grad_norm": 0.6418585115175501, "learning_rate": 0.003, "loss": 4.1225, "step": 6610 }, { "epoch": 0.06611, "grad_norm": 0.6118831582806695, "learning_rate": 0.003, "loss": 4.1074, "step": 6611 }, { "epoch": 0.06612, "grad_norm": 0.6131198639857838, "learning_rate": 0.003, "loss": 4.1031, "step": 6612 }, { "epoch": 0.06613, "grad_norm": 0.605891802900793, "learning_rate": 0.003, "loss": 4.1184, "step": 6613 }, { "epoch": 0.06614, "grad_norm": 0.7122334128472662, "learning_rate": 0.003, "loss": 4.1152, "step": 6614 }, { "epoch": 0.06615, "grad_norm": 0.986281825824359, "learning_rate": 0.003, "loss": 4.1009, "step": 6615 }, { "epoch": 0.06616, "grad_norm": 1.209509744232019, "learning_rate": 0.003, "loss": 4.1629, "step": 6616 }, { "epoch": 0.06617, "grad_norm": 0.589186192111392, "learning_rate": 0.003, "loss": 4.126, "step": 6617 }, { "epoch": 0.06618, "grad_norm": 0.817010663602591, "learning_rate": 0.003, "loss": 4.1429, "step": 6618 }, { "epoch": 0.06619, "grad_norm": 1.1163167401139134, "learning_rate": 0.003, "loss": 4.1143, "step": 6619 }, { "epoch": 0.0662, "grad_norm": 0.861557189675986, "learning_rate": 0.003, "loss": 4.101, "step": 6620 }, { "epoch": 0.06621, "grad_norm": 0.7734302388169766, "learning_rate": 0.003, "loss": 4.1324, "step": 6621 }, { "epoch": 0.06622, "grad_norm": 0.6681446519688905, "learning_rate": 0.003, "loss": 4.1237, "step": 6622 }, { "epoch": 0.06623, "grad_norm": 0.6086441119859902, "learning_rate": 0.003, "loss": 4.1052, "step": 6623 }, { "epoch": 0.06624, "grad_norm": 0.6161665012708084, "learning_rate": 0.003, "loss": 4.126, "step": 6624 }, { "epoch": 0.06625, "grad_norm": 0.6016793043105603, "learning_rate": 0.003, "loss": 4.1109, "step": 6625 }, { "epoch": 0.06626, "grad_norm": 0.5731558676794132, "learning_rate": 0.003, "loss": 4.129, "step": 6626 }, { "epoch": 0.06627, "grad_norm": 0.5748801713145845, "learning_rate": 0.003, "loss": 4.1173, "step": 6627 }, { "epoch": 0.06628, "grad_norm": 0.5724674668619956, "learning_rate": 0.003, "loss": 4.1197, "step": 6628 }, { "epoch": 0.06629, "grad_norm": 0.5329925465369826, "learning_rate": 0.003, "loss": 4.1118, "step": 6629 }, { "epoch": 0.0663, "grad_norm": 0.4945919230759341, "learning_rate": 0.003, "loss": 4.1287, "step": 6630 }, { "epoch": 0.06631, "grad_norm": 0.45658695378096065, "learning_rate": 0.003, "loss": 4.1026, "step": 6631 }, { "epoch": 0.06632, "grad_norm": 0.526478004374339, "learning_rate": 0.003, "loss": 4.1042, "step": 6632 }, { "epoch": 0.06633, "grad_norm": 0.6229089968891325, "learning_rate": 0.003, "loss": 4.1235, "step": 6633 }, { "epoch": 0.06634, "grad_norm": 0.7440020938095453, "learning_rate": 0.003, "loss": 4.1129, "step": 6634 }, { "epoch": 0.06635, "grad_norm": 1.0497670831766566, "learning_rate": 0.003, "loss": 4.1378, "step": 6635 }, { "epoch": 0.06636, "grad_norm": 1.14053551502134, "learning_rate": 0.003, "loss": 4.0949, "step": 6636 }, { "epoch": 0.06637, "grad_norm": 0.7502849737062908, "learning_rate": 0.003, "loss": 4.112, "step": 6637 }, { "epoch": 0.06638, "grad_norm": 0.8150413391233561, "learning_rate": 0.003, "loss": 4.0846, "step": 6638 }, { "epoch": 0.06639, "grad_norm": 0.9898719461642121, "learning_rate": 0.003, "loss": 4.1258, "step": 6639 }, { "epoch": 0.0664, "grad_norm": 1.1511766317227263, "learning_rate": 0.003, "loss": 4.1092, "step": 6640 }, { "epoch": 0.06641, "grad_norm": 0.8614640565436265, "learning_rate": 0.003, "loss": 4.1251, "step": 6641 }, { "epoch": 0.06642, "grad_norm": 0.7977417905197698, "learning_rate": 0.003, "loss": 4.1256, "step": 6642 }, { "epoch": 0.06643, "grad_norm": 0.7780801354815946, "learning_rate": 0.003, "loss": 4.114, "step": 6643 }, { "epoch": 0.06644, "grad_norm": 0.7755477432783495, "learning_rate": 0.003, "loss": 4.1157, "step": 6644 }, { "epoch": 0.06645, "grad_norm": 0.9119617892094356, "learning_rate": 0.003, "loss": 4.1069, "step": 6645 }, { "epoch": 0.06646, "grad_norm": 0.878567897301363, "learning_rate": 0.003, "loss": 4.1099, "step": 6646 }, { "epoch": 0.06647, "grad_norm": 0.8867140954494283, "learning_rate": 0.003, "loss": 4.1405, "step": 6647 }, { "epoch": 0.06648, "grad_norm": 0.8336313880890538, "learning_rate": 0.003, "loss": 4.1093, "step": 6648 }, { "epoch": 0.06649, "grad_norm": 0.8170814645370212, "learning_rate": 0.003, "loss": 4.1201, "step": 6649 }, { "epoch": 0.0665, "grad_norm": 0.7834859433585468, "learning_rate": 0.003, "loss": 4.1168, "step": 6650 }, { "epoch": 0.06651, "grad_norm": 0.8357692571639677, "learning_rate": 0.003, "loss": 4.0844, "step": 6651 }, { "epoch": 0.06652, "grad_norm": 0.9187801035749852, "learning_rate": 0.003, "loss": 4.1187, "step": 6652 }, { "epoch": 0.06653, "grad_norm": 0.969813469239403, "learning_rate": 0.003, "loss": 4.1504, "step": 6653 }, { "epoch": 0.06654, "grad_norm": 1.0521703710878825, "learning_rate": 0.003, "loss": 4.1142, "step": 6654 }, { "epoch": 0.06655, "grad_norm": 1.115077498474384, "learning_rate": 0.003, "loss": 4.133, "step": 6655 }, { "epoch": 0.06656, "grad_norm": 0.9993667641705132, "learning_rate": 0.003, "loss": 4.1122, "step": 6656 }, { "epoch": 0.06657, "grad_norm": 1.1623793190867178, "learning_rate": 0.003, "loss": 4.138, "step": 6657 }, { "epoch": 0.06658, "grad_norm": 0.8317171031791138, "learning_rate": 0.003, "loss": 4.1207, "step": 6658 }, { "epoch": 0.06659, "grad_norm": 0.7417282289820908, "learning_rate": 0.003, "loss": 4.1346, "step": 6659 }, { "epoch": 0.0666, "grad_norm": 0.7996438420969371, "learning_rate": 0.003, "loss": 4.1154, "step": 6660 }, { "epoch": 0.06661, "grad_norm": 0.8239468144667403, "learning_rate": 0.003, "loss": 4.115, "step": 6661 }, { "epoch": 0.06662, "grad_norm": 0.8636078960543694, "learning_rate": 0.003, "loss": 4.1075, "step": 6662 }, { "epoch": 0.06663, "grad_norm": 0.8286265545981787, "learning_rate": 0.003, "loss": 4.1754, "step": 6663 }, { "epoch": 0.06664, "grad_norm": 0.7760590887675644, "learning_rate": 0.003, "loss": 4.1319, "step": 6664 }, { "epoch": 0.06665, "grad_norm": 0.6944243205061547, "learning_rate": 0.003, "loss": 4.1548, "step": 6665 }, { "epoch": 0.06666, "grad_norm": 0.7180657446089408, "learning_rate": 0.003, "loss": 4.0845, "step": 6666 }, { "epoch": 0.06667, "grad_norm": 0.8974376505212266, "learning_rate": 0.003, "loss": 4.1154, "step": 6667 }, { "epoch": 0.06668, "grad_norm": 1.0172055502989037, "learning_rate": 0.003, "loss": 4.1119, "step": 6668 }, { "epoch": 0.06669, "grad_norm": 1.1607157063808713, "learning_rate": 0.003, "loss": 4.1234, "step": 6669 }, { "epoch": 0.0667, "grad_norm": 0.6558890868526093, "learning_rate": 0.003, "loss": 4.1248, "step": 6670 }, { "epoch": 0.06671, "grad_norm": 0.5339830424514167, "learning_rate": 0.003, "loss": 4.1163, "step": 6671 }, { "epoch": 0.06672, "grad_norm": 0.6960933681548969, "learning_rate": 0.003, "loss": 4.1, "step": 6672 }, { "epoch": 0.06673, "grad_norm": 0.8613148280885923, "learning_rate": 0.003, "loss": 4.1033, "step": 6673 }, { "epoch": 0.06674, "grad_norm": 1.0944320576632818, "learning_rate": 0.003, "loss": 4.136, "step": 6674 }, { "epoch": 0.06675, "grad_norm": 0.8624252033051901, "learning_rate": 0.003, "loss": 4.0746, "step": 6675 }, { "epoch": 0.06676, "grad_norm": 0.6153729593690301, "learning_rate": 0.003, "loss": 4.1197, "step": 6676 }, { "epoch": 0.06677, "grad_norm": 0.6138367390296136, "learning_rate": 0.003, "loss": 4.1283, "step": 6677 }, { "epoch": 0.06678, "grad_norm": 0.6766082571603963, "learning_rate": 0.003, "loss": 4.1348, "step": 6678 }, { "epoch": 0.06679, "grad_norm": 0.7509125610237447, "learning_rate": 0.003, "loss": 4.1065, "step": 6679 }, { "epoch": 0.0668, "grad_norm": 0.7800006560160819, "learning_rate": 0.003, "loss": 4.1423, "step": 6680 }, { "epoch": 0.06681, "grad_norm": 0.7276956103034433, "learning_rate": 0.003, "loss": 4.0959, "step": 6681 }, { "epoch": 0.06682, "grad_norm": 0.7677356222531484, "learning_rate": 0.003, "loss": 4.1246, "step": 6682 }, { "epoch": 0.06683, "grad_norm": 0.9103583128182188, "learning_rate": 0.003, "loss": 4.1183, "step": 6683 }, { "epoch": 0.06684, "grad_norm": 0.9184243344640678, "learning_rate": 0.003, "loss": 4.1264, "step": 6684 }, { "epoch": 0.06685, "grad_norm": 0.8953119859552124, "learning_rate": 0.003, "loss": 4.1215, "step": 6685 }, { "epoch": 0.06686, "grad_norm": 0.8649576016834076, "learning_rate": 0.003, "loss": 4.1023, "step": 6686 }, { "epoch": 0.06687, "grad_norm": 0.8737336563045488, "learning_rate": 0.003, "loss": 4.0922, "step": 6687 }, { "epoch": 0.06688, "grad_norm": 0.8001454108494693, "learning_rate": 0.003, "loss": 4.1044, "step": 6688 }, { "epoch": 0.06689, "grad_norm": 0.7007608354845569, "learning_rate": 0.003, "loss": 4.1377, "step": 6689 }, { "epoch": 0.0669, "grad_norm": 0.7346334248821653, "learning_rate": 0.003, "loss": 4.1024, "step": 6690 }, { "epoch": 0.06691, "grad_norm": 0.8242428244307782, "learning_rate": 0.003, "loss": 4.1227, "step": 6691 }, { "epoch": 0.06692, "grad_norm": 0.8785059768462102, "learning_rate": 0.003, "loss": 4.1006, "step": 6692 }, { "epoch": 0.06693, "grad_norm": 0.8419120527636385, "learning_rate": 0.003, "loss": 4.116, "step": 6693 }, { "epoch": 0.06694, "grad_norm": 0.8364345690497847, "learning_rate": 0.003, "loss": 4.112, "step": 6694 }, { "epoch": 0.06695, "grad_norm": 0.7282086608438694, "learning_rate": 0.003, "loss": 4.1101, "step": 6695 }, { "epoch": 0.06696, "grad_norm": 0.7156637313733002, "learning_rate": 0.003, "loss": 4.1117, "step": 6696 }, { "epoch": 0.06697, "grad_norm": 0.759716727374639, "learning_rate": 0.003, "loss": 4.1229, "step": 6697 }, { "epoch": 0.06698, "grad_norm": 0.6814818122358287, "learning_rate": 0.003, "loss": 4.1075, "step": 6698 }, { "epoch": 0.06699, "grad_norm": 0.8881481031361592, "learning_rate": 0.003, "loss": 4.1118, "step": 6699 }, { "epoch": 0.067, "grad_norm": 0.916609985935385, "learning_rate": 0.003, "loss": 4.1344, "step": 6700 }, { "epoch": 0.06701, "grad_norm": 0.7647456152832255, "learning_rate": 0.003, "loss": 4.1261, "step": 6701 }, { "epoch": 0.06702, "grad_norm": 0.9878626082963966, "learning_rate": 0.003, "loss": 4.1189, "step": 6702 }, { "epoch": 0.06703, "grad_norm": 1.450715682692916, "learning_rate": 0.003, "loss": 4.1498, "step": 6703 }, { "epoch": 0.06704, "grad_norm": 0.8099658530003969, "learning_rate": 0.003, "loss": 4.1068, "step": 6704 }, { "epoch": 0.06705, "grad_norm": 0.8003246468539195, "learning_rate": 0.003, "loss": 4.114, "step": 6705 }, { "epoch": 0.06706, "grad_norm": 0.9277530689587093, "learning_rate": 0.003, "loss": 4.165, "step": 6706 }, { "epoch": 0.06707, "grad_norm": 1.081334232913671, "learning_rate": 0.003, "loss": 4.1046, "step": 6707 }, { "epoch": 0.06708, "grad_norm": 0.7824655188582949, "learning_rate": 0.003, "loss": 4.1134, "step": 6708 }, { "epoch": 0.06709, "grad_norm": 0.6810909588960559, "learning_rate": 0.003, "loss": 4.1104, "step": 6709 }, { "epoch": 0.0671, "grad_norm": 0.6175677027859562, "learning_rate": 0.003, "loss": 4.131, "step": 6710 }, { "epoch": 0.06711, "grad_norm": 0.7248156251811534, "learning_rate": 0.003, "loss": 4.1407, "step": 6711 }, { "epoch": 0.06712, "grad_norm": 0.9236044790837477, "learning_rate": 0.003, "loss": 4.0971, "step": 6712 }, { "epoch": 0.06713, "grad_norm": 1.1656738764307082, "learning_rate": 0.003, "loss": 4.1193, "step": 6713 }, { "epoch": 0.06714, "grad_norm": 0.687130908864893, "learning_rate": 0.003, "loss": 4.1121, "step": 6714 }, { "epoch": 0.06715, "grad_norm": 0.6511308300062618, "learning_rate": 0.003, "loss": 4.1105, "step": 6715 }, { "epoch": 0.06716, "grad_norm": 0.9028221528538252, "learning_rate": 0.003, "loss": 4.1228, "step": 6716 }, { "epoch": 0.06717, "grad_norm": 0.9804042876410267, "learning_rate": 0.003, "loss": 4.1445, "step": 6717 }, { "epoch": 0.06718, "grad_norm": 1.0173300549817423, "learning_rate": 0.003, "loss": 4.1064, "step": 6718 }, { "epoch": 0.06719, "grad_norm": 0.9220403787562276, "learning_rate": 0.003, "loss": 4.1076, "step": 6719 }, { "epoch": 0.0672, "grad_norm": 0.7514745228001316, "learning_rate": 0.003, "loss": 4.1207, "step": 6720 }, { "epoch": 0.06721, "grad_norm": 0.8665408867149632, "learning_rate": 0.003, "loss": 4.134, "step": 6721 }, { "epoch": 0.06722, "grad_norm": 0.9302053283399239, "learning_rate": 0.003, "loss": 4.1493, "step": 6722 }, { "epoch": 0.06723, "grad_norm": 0.9301656966667754, "learning_rate": 0.003, "loss": 4.1395, "step": 6723 }, { "epoch": 0.06724, "grad_norm": 0.8389657280038182, "learning_rate": 0.003, "loss": 4.099, "step": 6724 }, { "epoch": 0.06725, "grad_norm": 0.7512173727545822, "learning_rate": 0.003, "loss": 4.1299, "step": 6725 }, { "epoch": 0.06726, "grad_norm": 0.7891627577920859, "learning_rate": 0.003, "loss": 4.1563, "step": 6726 }, { "epoch": 0.06727, "grad_norm": 0.9042190562513309, "learning_rate": 0.003, "loss": 4.1221, "step": 6727 }, { "epoch": 0.06728, "grad_norm": 1.0541308359487462, "learning_rate": 0.003, "loss": 4.0975, "step": 6728 }, { "epoch": 0.06729, "grad_norm": 1.0843675552428607, "learning_rate": 0.003, "loss": 4.1202, "step": 6729 }, { "epoch": 0.0673, "grad_norm": 1.0045820679439086, "learning_rate": 0.003, "loss": 4.128, "step": 6730 }, { "epoch": 0.06731, "grad_norm": 0.9002591648778789, "learning_rate": 0.003, "loss": 4.1006, "step": 6731 }, { "epoch": 0.06732, "grad_norm": 0.646912864765862, "learning_rate": 0.003, "loss": 4.1142, "step": 6732 }, { "epoch": 0.06733, "grad_norm": 0.7141084225358552, "learning_rate": 0.003, "loss": 4.1284, "step": 6733 }, { "epoch": 0.06734, "grad_norm": 0.7401896537351272, "learning_rate": 0.003, "loss": 4.1231, "step": 6734 }, { "epoch": 0.06735, "grad_norm": 0.716630242459992, "learning_rate": 0.003, "loss": 4.1002, "step": 6735 }, { "epoch": 0.06736, "grad_norm": 0.8346796605661073, "learning_rate": 0.003, "loss": 4.1056, "step": 6736 }, { "epoch": 0.06737, "grad_norm": 0.8558768913961102, "learning_rate": 0.003, "loss": 4.1144, "step": 6737 }, { "epoch": 0.06738, "grad_norm": 0.8608671123963944, "learning_rate": 0.003, "loss": 4.1098, "step": 6738 }, { "epoch": 0.06739, "grad_norm": 0.9596153104235727, "learning_rate": 0.003, "loss": 4.1182, "step": 6739 }, { "epoch": 0.0674, "grad_norm": 0.937939190033793, "learning_rate": 0.003, "loss": 4.1145, "step": 6740 }, { "epoch": 0.06741, "grad_norm": 0.7552811761560235, "learning_rate": 0.003, "loss": 4.0981, "step": 6741 }, { "epoch": 0.06742, "grad_norm": 0.650352878706116, "learning_rate": 0.003, "loss": 4.12, "step": 6742 }, { "epoch": 0.06743, "grad_norm": 0.6887739825416035, "learning_rate": 0.003, "loss": 4.0913, "step": 6743 }, { "epoch": 0.06744, "grad_norm": 0.6459118608591984, "learning_rate": 0.003, "loss": 4.1247, "step": 6744 }, { "epoch": 0.06745, "grad_norm": 0.7004216000552352, "learning_rate": 0.003, "loss": 4.0962, "step": 6745 }, { "epoch": 0.06746, "grad_norm": 0.6099448791242965, "learning_rate": 0.003, "loss": 4.1021, "step": 6746 }, { "epoch": 0.06747, "grad_norm": 0.6988500937390398, "learning_rate": 0.003, "loss": 4.1065, "step": 6747 }, { "epoch": 0.06748, "grad_norm": 0.7156433755323733, "learning_rate": 0.003, "loss": 4.1094, "step": 6748 }, { "epoch": 0.06749, "grad_norm": 0.801421918564714, "learning_rate": 0.003, "loss": 4.1034, "step": 6749 }, { "epoch": 0.0675, "grad_norm": 0.9888166246308364, "learning_rate": 0.003, "loss": 4.1015, "step": 6750 }, { "epoch": 0.06751, "grad_norm": 0.9741131169646507, "learning_rate": 0.003, "loss": 4.1407, "step": 6751 }, { "epoch": 0.06752, "grad_norm": 0.8319653111077174, "learning_rate": 0.003, "loss": 4.1293, "step": 6752 }, { "epoch": 0.06753, "grad_norm": 0.7778459734702268, "learning_rate": 0.003, "loss": 4.0881, "step": 6753 }, { "epoch": 0.06754, "grad_norm": 0.6877593644712623, "learning_rate": 0.003, "loss": 4.1132, "step": 6754 }, { "epoch": 0.06755, "grad_norm": 0.5706976203238855, "learning_rate": 0.003, "loss": 4.0969, "step": 6755 }, { "epoch": 0.06756, "grad_norm": 0.6454631853072159, "learning_rate": 0.003, "loss": 4.1147, "step": 6756 }, { "epoch": 0.06757, "grad_norm": 0.7487310221163939, "learning_rate": 0.003, "loss": 4.119, "step": 6757 }, { "epoch": 0.06758, "grad_norm": 0.9383683479121699, "learning_rate": 0.003, "loss": 4.1131, "step": 6758 }, { "epoch": 0.06759, "grad_norm": 1.2488321237213988, "learning_rate": 0.003, "loss": 4.152, "step": 6759 }, { "epoch": 0.0676, "grad_norm": 0.8044219051814184, "learning_rate": 0.003, "loss": 4.1233, "step": 6760 }, { "epoch": 0.06761, "grad_norm": 0.6504031656659648, "learning_rate": 0.003, "loss": 4.1028, "step": 6761 }, { "epoch": 0.06762, "grad_norm": 0.5974848351395243, "learning_rate": 0.003, "loss": 4.1069, "step": 6762 }, { "epoch": 0.06763, "grad_norm": 0.7658766240767714, "learning_rate": 0.003, "loss": 4.1079, "step": 6763 }, { "epoch": 0.06764, "grad_norm": 1.0816158497707196, "learning_rate": 0.003, "loss": 4.1001, "step": 6764 }, { "epoch": 0.06765, "grad_norm": 1.0931948991927563, "learning_rate": 0.003, "loss": 4.1191, "step": 6765 }, { "epoch": 0.06766, "grad_norm": 0.7550131909735355, "learning_rate": 0.003, "loss": 4.1244, "step": 6766 }, { "epoch": 0.06767, "grad_norm": 0.6220201794198699, "learning_rate": 0.003, "loss": 4.1212, "step": 6767 }, { "epoch": 0.06768, "grad_norm": 0.7460575761496198, "learning_rate": 0.003, "loss": 4.0848, "step": 6768 }, { "epoch": 0.06769, "grad_norm": 0.7893758885707454, "learning_rate": 0.003, "loss": 4.0993, "step": 6769 }, { "epoch": 0.0677, "grad_norm": 0.7505537103644507, "learning_rate": 0.003, "loss": 4.1165, "step": 6770 }, { "epoch": 0.06771, "grad_norm": 0.7025624171913118, "learning_rate": 0.003, "loss": 4.0995, "step": 6771 }, { "epoch": 0.06772, "grad_norm": 0.7560527883448278, "learning_rate": 0.003, "loss": 4.1098, "step": 6772 }, { "epoch": 0.06773, "grad_norm": 0.7196524539900757, "learning_rate": 0.003, "loss": 4.1252, "step": 6773 }, { "epoch": 0.06774, "grad_norm": 0.7389219268859994, "learning_rate": 0.003, "loss": 4.062, "step": 6774 }, { "epoch": 0.06775, "grad_norm": 0.6685763528477642, "learning_rate": 0.003, "loss": 4.1096, "step": 6775 }, { "epoch": 0.06776, "grad_norm": 0.5686448643650356, "learning_rate": 0.003, "loss": 4.1352, "step": 6776 }, { "epoch": 0.06777, "grad_norm": 0.5856469192051172, "learning_rate": 0.003, "loss": 4.1145, "step": 6777 }, { "epoch": 0.06778, "grad_norm": 0.5829647709549176, "learning_rate": 0.003, "loss": 4.0617, "step": 6778 }, { "epoch": 0.06779, "grad_norm": 0.58878250453052, "learning_rate": 0.003, "loss": 4.1078, "step": 6779 }, { "epoch": 0.0678, "grad_norm": 0.6705441528405734, "learning_rate": 0.003, "loss": 4.0718, "step": 6780 }, { "epoch": 0.06781, "grad_norm": 0.694256508354405, "learning_rate": 0.003, "loss": 4.1373, "step": 6781 }, { "epoch": 0.06782, "grad_norm": 0.7836721563560676, "learning_rate": 0.003, "loss": 4.1166, "step": 6782 }, { "epoch": 0.06783, "grad_norm": 0.9605929520599132, "learning_rate": 0.003, "loss": 4.1272, "step": 6783 }, { "epoch": 0.06784, "grad_norm": 0.9687369923954169, "learning_rate": 0.003, "loss": 4.1023, "step": 6784 }, { "epoch": 0.06785, "grad_norm": 0.8611250521831618, "learning_rate": 0.003, "loss": 4.1229, "step": 6785 }, { "epoch": 0.06786, "grad_norm": 0.8150784557873674, "learning_rate": 0.003, "loss": 4.0913, "step": 6786 }, { "epoch": 0.06787, "grad_norm": 0.7294549203147801, "learning_rate": 0.003, "loss": 4.0754, "step": 6787 }, { "epoch": 0.06788, "grad_norm": 0.7384718901245526, "learning_rate": 0.003, "loss": 4.1238, "step": 6788 }, { "epoch": 0.06789, "grad_norm": 0.7373796160060775, "learning_rate": 0.003, "loss": 4.0898, "step": 6789 }, { "epoch": 0.0679, "grad_norm": 0.7304803645302286, "learning_rate": 0.003, "loss": 4.1154, "step": 6790 }, { "epoch": 0.06791, "grad_norm": 0.7119811531380004, "learning_rate": 0.003, "loss": 4.0989, "step": 6791 }, { "epoch": 0.06792, "grad_norm": 0.8288761752825445, "learning_rate": 0.003, "loss": 4.122, "step": 6792 }, { "epoch": 0.06793, "grad_norm": 0.7074297261709009, "learning_rate": 0.003, "loss": 4.1159, "step": 6793 }, { "epoch": 0.06794, "grad_norm": 0.6917103888708278, "learning_rate": 0.003, "loss": 4.1297, "step": 6794 }, { "epoch": 0.06795, "grad_norm": 0.9056603235821777, "learning_rate": 0.003, "loss": 4.0846, "step": 6795 }, { "epoch": 0.06796, "grad_norm": 1.2619528323809053, "learning_rate": 0.003, "loss": 4.1357, "step": 6796 }, { "epoch": 0.06797, "grad_norm": 0.9779295543791401, "learning_rate": 0.003, "loss": 4.1424, "step": 6797 }, { "epoch": 0.06798, "grad_norm": 1.0466081925100106, "learning_rate": 0.003, "loss": 4.1134, "step": 6798 }, { "epoch": 0.06799, "grad_norm": 1.0487678592502534, "learning_rate": 0.003, "loss": 4.153, "step": 6799 }, { "epoch": 0.068, "grad_norm": 0.9892002508567624, "learning_rate": 0.003, "loss": 4.1263, "step": 6800 }, { "epoch": 0.06801, "grad_norm": 0.9771195211817344, "learning_rate": 0.003, "loss": 4.1038, "step": 6801 }, { "epoch": 0.06802, "grad_norm": 0.9110534102852377, "learning_rate": 0.003, "loss": 4.1182, "step": 6802 }, { "epoch": 0.06803, "grad_norm": 0.9513266489274651, "learning_rate": 0.003, "loss": 4.1217, "step": 6803 }, { "epoch": 0.06804, "grad_norm": 1.1138225723663524, "learning_rate": 0.003, "loss": 4.1412, "step": 6804 }, { "epoch": 0.06805, "grad_norm": 1.0317572052095008, "learning_rate": 0.003, "loss": 4.1185, "step": 6805 }, { "epoch": 0.06806, "grad_norm": 0.797666875905461, "learning_rate": 0.003, "loss": 4.1117, "step": 6806 }, { "epoch": 0.06807, "grad_norm": 0.7541059465600338, "learning_rate": 0.003, "loss": 4.1153, "step": 6807 }, { "epoch": 0.06808, "grad_norm": 0.6793248525851846, "learning_rate": 0.003, "loss": 4.1179, "step": 6808 }, { "epoch": 0.06809, "grad_norm": 0.6873438348421407, "learning_rate": 0.003, "loss": 4.0895, "step": 6809 }, { "epoch": 0.0681, "grad_norm": 0.6049328221467462, "learning_rate": 0.003, "loss": 4.1103, "step": 6810 }, { "epoch": 0.06811, "grad_norm": 0.6187722825054933, "learning_rate": 0.003, "loss": 4.1381, "step": 6811 }, { "epoch": 0.06812, "grad_norm": 0.7495708505892162, "learning_rate": 0.003, "loss": 4.1062, "step": 6812 }, { "epoch": 0.06813, "grad_norm": 0.8952833843553192, "learning_rate": 0.003, "loss": 4.0878, "step": 6813 }, { "epoch": 0.06814, "grad_norm": 1.052298391151623, "learning_rate": 0.003, "loss": 4.1321, "step": 6814 }, { "epoch": 0.06815, "grad_norm": 0.8914337446957912, "learning_rate": 0.003, "loss": 4.1363, "step": 6815 }, { "epoch": 0.06816, "grad_norm": 0.9633140015019734, "learning_rate": 0.003, "loss": 4.1361, "step": 6816 }, { "epoch": 0.06817, "grad_norm": 0.8885850734774637, "learning_rate": 0.003, "loss": 4.1044, "step": 6817 }, { "epoch": 0.06818, "grad_norm": 0.7384023274306349, "learning_rate": 0.003, "loss": 4.1212, "step": 6818 }, { "epoch": 0.06819, "grad_norm": 0.7256819329745252, "learning_rate": 0.003, "loss": 4.1145, "step": 6819 }, { "epoch": 0.0682, "grad_norm": 0.8464960000732561, "learning_rate": 0.003, "loss": 4.1449, "step": 6820 }, { "epoch": 0.06821, "grad_norm": 0.959885711235584, "learning_rate": 0.003, "loss": 4.1381, "step": 6821 }, { "epoch": 0.06822, "grad_norm": 1.0407940708061996, "learning_rate": 0.003, "loss": 4.1013, "step": 6822 }, { "epoch": 0.06823, "grad_norm": 0.9542538781486196, "learning_rate": 0.003, "loss": 4.1156, "step": 6823 }, { "epoch": 0.06824, "grad_norm": 0.8548426620050126, "learning_rate": 0.003, "loss": 4.0933, "step": 6824 }, { "epoch": 0.06825, "grad_norm": 0.7562330738587587, "learning_rate": 0.003, "loss": 4.1082, "step": 6825 }, { "epoch": 0.06826, "grad_norm": 0.6682601342553707, "learning_rate": 0.003, "loss": 4.1002, "step": 6826 }, { "epoch": 0.06827, "grad_norm": 0.5882550844092198, "learning_rate": 0.003, "loss": 4.087, "step": 6827 }, { "epoch": 0.06828, "grad_norm": 0.7012417787965474, "learning_rate": 0.003, "loss": 4.1137, "step": 6828 }, { "epoch": 0.06829, "grad_norm": 0.8327606673237082, "learning_rate": 0.003, "loss": 4.1202, "step": 6829 }, { "epoch": 0.0683, "grad_norm": 0.8528489072611283, "learning_rate": 0.003, "loss": 4.1112, "step": 6830 }, { "epoch": 0.06831, "grad_norm": 0.7377079020858234, "learning_rate": 0.003, "loss": 4.1327, "step": 6831 }, { "epoch": 0.06832, "grad_norm": 0.6999485550253548, "learning_rate": 0.003, "loss": 4.0981, "step": 6832 }, { "epoch": 0.06833, "grad_norm": 0.7712453382887129, "learning_rate": 0.003, "loss": 4.0852, "step": 6833 }, { "epoch": 0.06834, "grad_norm": 0.9741841802598422, "learning_rate": 0.003, "loss": 4.1248, "step": 6834 }, { "epoch": 0.06835, "grad_norm": 1.0151191229361403, "learning_rate": 0.003, "loss": 4.1314, "step": 6835 }, { "epoch": 0.06836, "grad_norm": 0.9928135661127082, "learning_rate": 0.003, "loss": 4.1557, "step": 6836 }, { "epoch": 0.06837, "grad_norm": 1.0176229330263218, "learning_rate": 0.003, "loss": 4.1103, "step": 6837 }, { "epoch": 0.06838, "grad_norm": 0.8062582072461082, "learning_rate": 0.003, "loss": 4.11, "step": 6838 }, { "epoch": 0.06839, "grad_norm": 1.0001169867154327, "learning_rate": 0.003, "loss": 4.1073, "step": 6839 }, { "epoch": 0.0684, "grad_norm": 1.2793247468125468, "learning_rate": 0.003, "loss": 4.1399, "step": 6840 }, { "epoch": 0.06841, "grad_norm": 0.9352650749931306, "learning_rate": 0.003, "loss": 4.1298, "step": 6841 }, { "epoch": 0.06842, "grad_norm": 0.873691761023133, "learning_rate": 0.003, "loss": 4.1281, "step": 6842 }, { "epoch": 0.06843, "grad_norm": 0.943397035481235, "learning_rate": 0.003, "loss": 4.1205, "step": 6843 }, { "epoch": 0.06844, "grad_norm": 1.2419554124329863, "learning_rate": 0.003, "loss": 4.1288, "step": 6844 }, { "epoch": 0.06845, "grad_norm": 0.7627006179431822, "learning_rate": 0.003, "loss": 4.1313, "step": 6845 }, { "epoch": 0.06846, "grad_norm": 0.7761262500841255, "learning_rate": 0.003, "loss": 4.1212, "step": 6846 }, { "epoch": 0.06847, "grad_norm": 0.9754798916992613, "learning_rate": 0.003, "loss": 4.1099, "step": 6847 }, { "epoch": 0.06848, "grad_norm": 1.2952128685404893, "learning_rate": 0.003, "loss": 4.1332, "step": 6848 }, { "epoch": 0.06849, "grad_norm": 0.8183956076455051, "learning_rate": 0.003, "loss": 4.1125, "step": 6849 }, { "epoch": 0.0685, "grad_norm": 0.7298588998388881, "learning_rate": 0.003, "loss": 4.1113, "step": 6850 }, { "epoch": 0.06851, "grad_norm": 0.7720044301801032, "learning_rate": 0.003, "loss": 4.0917, "step": 6851 }, { "epoch": 0.06852, "grad_norm": 0.9117508720265394, "learning_rate": 0.003, "loss": 4.104, "step": 6852 }, { "epoch": 0.06853, "grad_norm": 1.0533798726154795, "learning_rate": 0.003, "loss": 4.1073, "step": 6853 }, { "epoch": 0.06854, "grad_norm": 1.027625478858702, "learning_rate": 0.003, "loss": 4.1418, "step": 6854 }, { "epoch": 0.06855, "grad_norm": 0.9135813747015725, "learning_rate": 0.003, "loss": 4.1489, "step": 6855 }, { "epoch": 0.06856, "grad_norm": 0.9132419414958513, "learning_rate": 0.003, "loss": 4.1486, "step": 6856 }, { "epoch": 0.06857, "grad_norm": 0.9212345733341637, "learning_rate": 0.003, "loss": 4.1283, "step": 6857 }, { "epoch": 0.06858, "grad_norm": 0.8783800930563433, "learning_rate": 0.003, "loss": 4.0902, "step": 6858 }, { "epoch": 0.06859, "grad_norm": 0.6985668013167255, "learning_rate": 0.003, "loss": 4.0979, "step": 6859 }, { "epoch": 0.0686, "grad_norm": 0.6082758464053263, "learning_rate": 0.003, "loss": 4.1047, "step": 6860 }, { "epoch": 0.06861, "grad_norm": 0.5184883931489526, "learning_rate": 0.003, "loss": 4.0956, "step": 6861 }, { "epoch": 0.06862, "grad_norm": 0.5465866791407239, "learning_rate": 0.003, "loss": 4.0935, "step": 6862 }, { "epoch": 0.06863, "grad_norm": 0.6294940342821269, "learning_rate": 0.003, "loss": 4.1001, "step": 6863 }, { "epoch": 0.06864, "grad_norm": 0.819027721570075, "learning_rate": 0.003, "loss": 4.1144, "step": 6864 }, { "epoch": 0.06865, "grad_norm": 0.9537722441632093, "learning_rate": 0.003, "loss": 4.1017, "step": 6865 }, { "epoch": 0.06866, "grad_norm": 0.9579871191173921, "learning_rate": 0.003, "loss": 4.1245, "step": 6866 }, { "epoch": 0.06867, "grad_norm": 0.7874535648082873, "learning_rate": 0.003, "loss": 4.132, "step": 6867 }, { "epoch": 0.06868, "grad_norm": 0.7207326410237167, "learning_rate": 0.003, "loss": 4.0927, "step": 6868 }, { "epoch": 0.06869, "grad_norm": 0.8921760331303482, "learning_rate": 0.003, "loss": 4.1235, "step": 6869 }, { "epoch": 0.0687, "grad_norm": 1.0524247874039665, "learning_rate": 0.003, "loss": 4.0696, "step": 6870 }, { "epoch": 0.06871, "grad_norm": 0.9959937136708353, "learning_rate": 0.003, "loss": 4.0958, "step": 6871 }, { "epoch": 0.06872, "grad_norm": 0.7683343036479461, "learning_rate": 0.003, "loss": 4.093, "step": 6872 }, { "epoch": 0.06873, "grad_norm": 0.6815142284894331, "learning_rate": 0.003, "loss": 4.1016, "step": 6873 }, { "epoch": 0.06874, "grad_norm": 0.681503918141428, "learning_rate": 0.003, "loss": 4.1157, "step": 6874 }, { "epoch": 0.06875, "grad_norm": 0.671319808930849, "learning_rate": 0.003, "loss": 4.1171, "step": 6875 }, { "epoch": 0.06876, "grad_norm": 0.6432032608458409, "learning_rate": 0.003, "loss": 4.1207, "step": 6876 }, { "epoch": 0.06877, "grad_norm": 0.681246314917716, "learning_rate": 0.003, "loss": 4.0902, "step": 6877 }, { "epoch": 0.06878, "grad_norm": 0.6862800830361652, "learning_rate": 0.003, "loss": 4.0826, "step": 6878 }, { "epoch": 0.06879, "grad_norm": 0.6785172447705301, "learning_rate": 0.003, "loss": 4.0935, "step": 6879 }, { "epoch": 0.0688, "grad_norm": 0.6523592214155444, "learning_rate": 0.003, "loss": 4.1044, "step": 6880 }, { "epoch": 0.06881, "grad_norm": 0.7257213543926093, "learning_rate": 0.003, "loss": 4.0942, "step": 6881 }, { "epoch": 0.06882, "grad_norm": 0.8060377847691599, "learning_rate": 0.003, "loss": 4.1028, "step": 6882 }, { "epoch": 0.06883, "grad_norm": 0.792778596117736, "learning_rate": 0.003, "loss": 4.1251, "step": 6883 }, { "epoch": 0.06884, "grad_norm": 0.6894553250899708, "learning_rate": 0.003, "loss": 4.0897, "step": 6884 }, { "epoch": 0.06885, "grad_norm": 0.7168844072539149, "learning_rate": 0.003, "loss": 4.1098, "step": 6885 }, { "epoch": 0.06886, "grad_norm": 0.7480318972493727, "learning_rate": 0.003, "loss": 4.0949, "step": 6886 }, { "epoch": 0.06887, "grad_norm": 0.7617857801277111, "learning_rate": 0.003, "loss": 4.1175, "step": 6887 }, { "epoch": 0.06888, "grad_norm": 0.861679021778327, "learning_rate": 0.003, "loss": 4.0962, "step": 6888 }, { "epoch": 0.06889, "grad_norm": 0.8718407312295603, "learning_rate": 0.003, "loss": 4.1072, "step": 6889 }, { "epoch": 0.0689, "grad_norm": 1.0130605630972407, "learning_rate": 0.003, "loss": 4.0909, "step": 6890 }, { "epoch": 0.06891, "grad_norm": 1.3688291187731094, "learning_rate": 0.003, "loss": 4.1356, "step": 6891 }, { "epoch": 0.06892, "grad_norm": 0.7799161275909664, "learning_rate": 0.003, "loss": 4.1038, "step": 6892 }, { "epoch": 0.06893, "grad_norm": 0.7045235058505965, "learning_rate": 0.003, "loss": 4.1136, "step": 6893 }, { "epoch": 0.06894, "grad_norm": 0.6684728435369993, "learning_rate": 0.003, "loss": 4.0971, "step": 6894 }, { "epoch": 0.06895, "grad_norm": 0.6945126017991131, "learning_rate": 0.003, "loss": 4.0937, "step": 6895 }, { "epoch": 0.06896, "grad_norm": 0.7482855504159163, "learning_rate": 0.003, "loss": 4.0913, "step": 6896 }, { "epoch": 0.06897, "grad_norm": 0.8746143480093368, "learning_rate": 0.003, "loss": 4.1417, "step": 6897 }, { "epoch": 0.06898, "grad_norm": 0.8993489919013548, "learning_rate": 0.003, "loss": 4.0661, "step": 6898 }, { "epoch": 0.06899, "grad_norm": 1.0857843804381477, "learning_rate": 0.003, "loss": 4.1538, "step": 6899 }, { "epoch": 0.069, "grad_norm": 1.0648749747194601, "learning_rate": 0.003, "loss": 4.1172, "step": 6900 }, { "epoch": 0.06901, "grad_norm": 0.9006029334239385, "learning_rate": 0.003, "loss": 4.0895, "step": 6901 }, { "epoch": 0.06902, "grad_norm": 0.8416068012359819, "learning_rate": 0.003, "loss": 4.1022, "step": 6902 }, { "epoch": 0.06903, "grad_norm": 1.006337020622227, "learning_rate": 0.003, "loss": 4.1141, "step": 6903 }, { "epoch": 0.06904, "grad_norm": 1.045403521519028, "learning_rate": 0.003, "loss": 4.1181, "step": 6904 }, { "epoch": 0.06905, "grad_norm": 1.0035690205519932, "learning_rate": 0.003, "loss": 4.1433, "step": 6905 }, { "epoch": 0.06906, "grad_norm": 1.0866394477802588, "learning_rate": 0.003, "loss": 4.0927, "step": 6906 }, { "epoch": 0.06907, "grad_norm": 0.9795306254573748, "learning_rate": 0.003, "loss": 4.1113, "step": 6907 }, { "epoch": 0.06908, "grad_norm": 0.921664480593037, "learning_rate": 0.003, "loss": 4.0912, "step": 6908 }, { "epoch": 0.06909, "grad_norm": 0.7625577740017703, "learning_rate": 0.003, "loss": 4.1224, "step": 6909 }, { "epoch": 0.0691, "grad_norm": 0.7611464664895455, "learning_rate": 0.003, "loss": 4.09, "step": 6910 }, { "epoch": 0.06911, "grad_norm": 0.7481854469654662, "learning_rate": 0.003, "loss": 4.1344, "step": 6911 }, { "epoch": 0.06912, "grad_norm": 0.9007946005791158, "learning_rate": 0.003, "loss": 4.1152, "step": 6912 }, { "epoch": 0.06913, "grad_norm": 1.0181585830069129, "learning_rate": 0.003, "loss": 4.0909, "step": 6913 }, { "epoch": 0.06914, "grad_norm": 1.1870130455108574, "learning_rate": 0.003, "loss": 4.1449, "step": 6914 }, { "epoch": 0.06915, "grad_norm": 0.9236319005490047, "learning_rate": 0.003, "loss": 4.1477, "step": 6915 }, { "epoch": 0.06916, "grad_norm": 0.8302058218315981, "learning_rate": 0.003, "loss": 4.1137, "step": 6916 }, { "epoch": 0.06917, "grad_norm": 0.8449547905827482, "learning_rate": 0.003, "loss": 4.0982, "step": 6917 }, { "epoch": 0.06918, "grad_norm": 1.0042057313137678, "learning_rate": 0.003, "loss": 4.1405, "step": 6918 }, { "epoch": 0.06919, "grad_norm": 1.0948578415550971, "learning_rate": 0.003, "loss": 4.1374, "step": 6919 }, { "epoch": 0.0692, "grad_norm": 0.8651976841261361, "learning_rate": 0.003, "loss": 4.1192, "step": 6920 }, { "epoch": 0.06921, "grad_norm": 0.843906688583314, "learning_rate": 0.003, "loss": 4.1275, "step": 6921 }, { "epoch": 0.06922, "grad_norm": 0.7494458953280497, "learning_rate": 0.003, "loss": 4.1176, "step": 6922 }, { "epoch": 0.06923, "grad_norm": 0.6493697075436607, "learning_rate": 0.003, "loss": 4.0968, "step": 6923 }, { "epoch": 0.06924, "grad_norm": 0.5960115740450397, "learning_rate": 0.003, "loss": 4.1368, "step": 6924 }, { "epoch": 0.06925, "grad_norm": 0.5439383509954068, "learning_rate": 0.003, "loss": 4.1275, "step": 6925 }, { "epoch": 0.06926, "grad_norm": 0.5566009950142305, "learning_rate": 0.003, "loss": 4.1205, "step": 6926 }, { "epoch": 0.06927, "grad_norm": 0.5929207701665649, "learning_rate": 0.003, "loss": 4.1134, "step": 6927 }, { "epoch": 0.06928, "grad_norm": 0.6959312150674015, "learning_rate": 0.003, "loss": 4.0911, "step": 6928 }, { "epoch": 0.06929, "grad_norm": 0.9859706292355523, "learning_rate": 0.003, "loss": 4.1045, "step": 6929 }, { "epoch": 0.0693, "grad_norm": 1.2296463232455344, "learning_rate": 0.003, "loss": 4.1608, "step": 6930 }, { "epoch": 0.06931, "grad_norm": 0.610382372425946, "learning_rate": 0.003, "loss": 4.1152, "step": 6931 }, { "epoch": 0.06932, "grad_norm": 0.6524546502395882, "learning_rate": 0.003, "loss": 4.0908, "step": 6932 }, { "epoch": 0.06933, "grad_norm": 0.7923321668003368, "learning_rate": 0.003, "loss": 4.1315, "step": 6933 }, { "epoch": 0.06934, "grad_norm": 0.824582431481253, "learning_rate": 0.003, "loss": 4.0968, "step": 6934 }, { "epoch": 0.06935, "grad_norm": 0.8727764667568022, "learning_rate": 0.003, "loss": 4.1197, "step": 6935 }, { "epoch": 0.06936, "grad_norm": 0.8326712177028426, "learning_rate": 0.003, "loss": 4.0741, "step": 6936 }, { "epoch": 0.06937, "grad_norm": 0.6666993557326405, "learning_rate": 0.003, "loss": 4.1008, "step": 6937 }, { "epoch": 0.06938, "grad_norm": 0.7059003515766912, "learning_rate": 0.003, "loss": 4.0762, "step": 6938 }, { "epoch": 0.06939, "grad_norm": 0.7910834610590093, "learning_rate": 0.003, "loss": 4.1098, "step": 6939 }, { "epoch": 0.0694, "grad_norm": 0.755416937286377, "learning_rate": 0.003, "loss": 4.0981, "step": 6940 }, { "epoch": 0.06941, "grad_norm": 0.6585307567475693, "learning_rate": 0.003, "loss": 4.1137, "step": 6941 }, { "epoch": 0.06942, "grad_norm": 0.686786611772453, "learning_rate": 0.003, "loss": 4.0949, "step": 6942 }, { "epoch": 0.06943, "grad_norm": 0.6748173855765259, "learning_rate": 0.003, "loss": 4.1205, "step": 6943 }, { "epoch": 0.06944, "grad_norm": 0.8309412479867045, "learning_rate": 0.003, "loss": 4.1066, "step": 6944 }, { "epoch": 0.06945, "grad_norm": 0.8694006981278489, "learning_rate": 0.003, "loss": 4.1363, "step": 6945 }, { "epoch": 0.06946, "grad_norm": 0.9735883766446164, "learning_rate": 0.003, "loss": 4.1176, "step": 6946 }, { "epoch": 0.06947, "grad_norm": 0.9569405363329243, "learning_rate": 0.003, "loss": 4.1025, "step": 6947 }, { "epoch": 0.06948, "grad_norm": 0.9911481171151727, "learning_rate": 0.003, "loss": 4.1157, "step": 6948 }, { "epoch": 0.06949, "grad_norm": 1.1434234191553132, "learning_rate": 0.003, "loss": 4.1296, "step": 6949 }, { "epoch": 0.0695, "grad_norm": 0.8225443853899382, "learning_rate": 0.003, "loss": 4.1008, "step": 6950 }, { "epoch": 0.06951, "grad_norm": 0.7771139496183902, "learning_rate": 0.003, "loss": 4.1334, "step": 6951 }, { "epoch": 0.06952, "grad_norm": 0.8827808082345756, "learning_rate": 0.003, "loss": 4.1382, "step": 6952 }, { "epoch": 0.06953, "grad_norm": 1.0344317147347284, "learning_rate": 0.003, "loss": 4.1402, "step": 6953 }, { "epoch": 0.06954, "grad_norm": 0.9087885968248024, "learning_rate": 0.003, "loss": 4.0844, "step": 6954 }, { "epoch": 0.06955, "grad_norm": 0.8948872903159871, "learning_rate": 0.003, "loss": 4.129, "step": 6955 }, { "epoch": 0.06956, "grad_norm": 0.8801466457771367, "learning_rate": 0.003, "loss": 4.133, "step": 6956 }, { "epoch": 0.06957, "grad_norm": 0.7407954654118085, "learning_rate": 0.003, "loss": 4.1092, "step": 6957 }, { "epoch": 0.06958, "grad_norm": 0.6110627923040244, "learning_rate": 0.003, "loss": 4.122, "step": 6958 }, { "epoch": 0.06959, "grad_norm": 0.640461355001315, "learning_rate": 0.003, "loss": 4.1159, "step": 6959 }, { "epoch": 0.0696, "grad_norm": 0.7327706567363643, "learning_rate": 0.003, "loss": 4.1274, "step": 6960 }, { "epoch": 0.06961, "grad_norm": 0.8343628226365843, "learning_rate": 0.003, "loss": 4.1274, "step": 6961 }, { "epoch": 0.06962, "grad_norm": 0.8816790067550263, "learning_rate": 0.003, "loss": 4.1198, "step": 6962 }, { "epoch": 0.06963, "grad_norm": 1.029137591999145, "learning_rate": 0.003, "loss": 4.1277, "step": 6963 }, { "epoch": 0.06964, "grad_norm": 1.083546288789342, "learning_rate": 0.003, "loss": 4.1255, "step": 6964 }, { "epoch": 0.06965, "grad_norm": 1.0972587017397695, "learning_rate": 0.003, "loss": 4.1103, "step": 6965 }, { "epoch": 0.06966, "grad_norm": 0.8505331999829698, "learning_rate": 0.003, "loss": 4.1279, "step": 6966 }, { "epoch": 0.06967, "grad_norm": 0.6926913786836009, "learning_rate": 0.003, "loss": 4.1058, "step": 6967 }, { "epoch": 0.06968, "grad_norm": 0.7470348630991248, "learning_rate": 0.003, "loss": 4.1134, "step": 6968 }, { "epoch": 0.06969, "grad_norm": 0.7260002360079123, "learning_rate": 0.003, "loss": 4.1279, "step": 6969 }, { "epoch": 0.0697, "grad_norm": 0.6766317452834478, "learning_rate": 0.003, "loss": 4.095, "step": 6970 }, { "epoch": 0.06971, "grad_norm": 0.6911223172142554, "learning_rate": 0.003, "loss": 4.1041, "step": 6971 }, { "epoch": 0.06972, "grad_norm": 0.6699177971563128, "learning_rate": 0.003, "loss": 4.0665, "step": 6972 }, { "epoch": 0.06973, "grad_norm": 0.694687314086879, "learning_rate": 0.003, "loss": 4.1132, "step": 6973 }, { "epoch": 0.06974, "grad_norm": 0.9488840337040796, "learning_rate": 0.003, "loss": 4.1058, "step": 6974 }, { "epoch": 0.06975, "grad_norm": 1.2258549533377359, "learning_rate": 0.003, "loss": 4.1239, "step": 6975 }, { "epoch": 0.06976, "grad_norm": 0.7740323563047606, "learning_rate": 0.003, "loss": 4.0806, "step": 6976 }, { "epoch": 0.06977, "grad_norm": 0.575041977095538, "learning_rate": 0.003, "loss": 4.1458, "step": 6977 }, { "epoch": 0.06978, "grad_norm": 0.728998390696451, "learning_rate": 0.003, "loss": 4.0815, "step": 6978 }, { "epoch": 0.06979, "grad_norm": 1.013780311861753, "learning_rate": 0.003, "loss": 4.1357, "step": 6979 }, { "epoch": 0.0698, "grad_norm": 1.1763620482776194, "learning_rate": 0.003, "loss": 4.1272, "step": 6980 }, { "epoch": 0.06981, "grad_norm": 0.815900592117822, "learning_rate": 0.003, "loss": 4.0939, "step": 6981 }, { "epoch": 0.06982, "grad_norm": 0.7122151392371082, "learning_rate": 0.003, "loss": 4.1027, "step": 6982 }, { "epoch": 0.06983, "grad_norm": 0.6205206572999914, "learning_rate": 0.003, "loss": 4.0848, "step": 6983 }, { "epoch": 0.06984, "grad_norm": 0.6501948602208925, "learning_rate": 0.003, "loss": 4.0876, "step": 6984 }, { "epoch": 0.06985, "grad_norm": 0.6936542630188876, "learning_rate": 0.003, "loss": 4.1203, "step": 6985 }, { "epoch": 0.06986, "grad_norm": 0.7630402930183048, "learning_rate": 0.003, "loss": 4.1136, "step": 6986 }, { "epoch": 0.06987, "grad_norm": 0.8307718661807078, "learning_rate": 0.003, "loss": 4.1145, "step": 6987 }, { "epoch": 0.06988, "grad_norm": 0.8259567088608353, "learning_rate": 0.003, "loss": 4.0991, "step": 6988 }, { "epoch": 0.06989, "grad_norm": 0.8383919214928979, "learning_rate": 0.003, "loss": 4.0891, "step": 6989 }, { "epoch": 0.0699, "grad_norm": 0.8394764313790253, "learning_rate": 0.003, "loss": 4.0876, "step": 6990 }, { "epoch": 0.06991, "grad_norm": 0.7966606342946648, "learning_rate": 0.003, "loss": 4.1182, "step": 6991 }, { "epoch": 0.06992, "grad_norm": 0.6596406930588895, "learning_rate": 0.003, "loss": 4.0823, "step": 6992 }, { "epoch": 0.06993, "grad_norm": 0.6295320798498751, "learning_rate": 0.003, "loss": 4.0855, "step": 6993 }, { "epoch": 0.06994, "grad_norm": 0.6843612644051112, "learning_rate": 0.003, "loss": 4.0919, "step": 6994 }, { "epoch": 0.06995, "grad_norm": 0.7889219514350806, "learning_rate": 0.003, "loss": 4.1197, "step": 6995 }, { "epoch": 0.06996, "grad_norm": 0.8402572785911753, "learning_rate": 0.003, "loss": 4.1067, "step": 6996 }, { "epoch": 0.06997, "grad_norm": 0.7656897240238402, "learning_rate": 0.003, "loss": 4.0807, "step": 6997 }, { "epoch": 0.06998, "grad_norm": 0.8838339815381696, "learning_rate": 0.003, "loss": 4.1032, "step": 6998 }, { "epoch": 0.06999, "grad_norm": 1.0521884576052078, "learning_rate": 0.003, "loss": 4.0955, "step": 6999 }, { "epoch": 0.07, "grad_norm": 1.3428947555527946, "learning_rate": 0.003, "loss": 4.1077, "step": 7000 }, { "epoch": 0.07001, "grad_norm": 0.7773020396247668, "learning_rate": 0.003, "loss": 4.152, "step": 7001 }, { "epoch": 0.07002, "grad_norm": 0.6360859962376562, "learning_rate": 0.003, "loss": 4.0991, "step": 7002 }, { "epoch": 0.07003, "grad_norm": 0.71049436819903, "learning_rate": 0.003, "loss": 4.0899, "step": 7003 }, { "epoch": 0.07004, "grad_norm": 0.7207589244507122, "learning_rate": 0.003, "loss": 4.0773, "step": 7004 }, { "epoch": 0.07005, "grad_norm": 0.8029274080334843, "learning_rate": 0.003, "loss": 4.1037, "step": 7005 }, { "epoch": 0.07006, "grad_norm": 0.9621397059134545, "learning_rate": 0.003, "loss": 4.1107, "step": 7006 }, { "epoch": 0.07007, "grad_norm": 1.0548395954148817, "learning_rate": 0.003, "loss": 4.1166, "step": 7007 }, { "epoch": 0.07008, "grad_norm": 0.9292949868098707, "learning_rate": 0.003, "loss": 4.1088, "step": 7008 }, { "epoch": 0.07009, "grad_norm": 1.1487800774682821, "learning_rate": 0.003, "loss": 4.1326, "step": 7009 }, { "epoch": 0.0701, "grad_norm": 1.0137599916747433, "learning_rate": 0.003, "loss": 4.1066, "step": 7010 }, { "epoch": 0.07011, "grad_norm": 0.9682428887494761, "learning_rate": 0.003, "loss": 4.1013, "step": 7011 }, { "epoch": 0.07012, "grad_norm": 0.8914447047092996, "learning_rate": 0.003, "loss": 4.1336, "step": 7012 }, { "epoch": 0.07013, "grad_norm": 0.9039463647228102, "learning_rate": 0.003, "loss": 4.1038, "step": 7013 }, { "epoch": 0.07014, "grad_norm": 1.134635997819684, "learning_rate": 0.003, "loss": 4.1003, "step": 7014 }, { "epoch": 0.07015, "grad_norm": 1.031076929207217, "learning_rate": 0.003, "loss": 4.1057, "step": 7015 }, { "epoch": 0.07016, "grad_norm": 1.0116310847444454, "learning_rate": 0.003, "loss": 4.1117, "step": 7016 }, { "epoch": 0.07017, "grad_norm": 1.005782143604759, "learning_rate": 0.003, "loss": 4.0986, "step": 7017 }, { "epoch": 0.07018, "grad_norm": 0.9578767335403211, "learning_rate": 0.003, "loss": 4.1274, "step": 7018 }, { "epoch": 0.07019, "grad_norm": 0.8935381942583269, "learning_rate": 0.003, "loss": 4.1152, "step": 7019 }, { "epoch": 0.0702, "grad_norm": 0.744623485765837, "learning_rate": 0.003, "loss": 4.1031, "step": 7020 }, { "epoch": 0.07021, "grad_norm": 0.801305747893465, "learning_rate": 0.003, "loss": 4.121, "step": 7021 }, { "epoch": 0.07022, "grad_norm": 0.7083760005593353, "learning_rate": 0.003, "loss": 4.1213, "step": 7022 }, { "epoch": 0.07023, "grad_norm": 0.621039433188525, "learning_rate": 0.003, "loss": 4.1125, "step": 7023 }, { "epoch": 0.07024, "grad_norm": 0.5806951914320432, "learning_rate": 0.003, "loss": 4.0904, "step": 7024 }, { "epoch": 0.07025, "grad_norm": 0.6152547374263034, "learning_rate": 0.003, "loss": 4.1165, "step": 7025 }, { "epoch": 0.07026, "grad_norm": 0.6694178964744939, "learning_rate": 0.003, "loss": 4.0887, "step": 7026 }, { "epoch": 0.07027, "grad_norm": 0.9841630681827739, "learning_rate": 0.003, "loss": 4.1042, "step": 7027 }, { "epoch": 0.07028, "grad_norm": 1.200254843619916, "learning_rate": 0.003, "loss": 4.115, "step": 7028 }, { "epoch": 0.07029, "grad_norm": 0.7174951451462476, "learning_rate": 0.003, "loss": 4.0925, "step": 7029 }, { "epoch": 0.0703, "grad_norm": 0.6728970196461604, "learning_rate": 0.003, "loss": 4.1239, "step": 7030 }, { "epoch": 0.07031, "grad_norm": 0.7070157774496603, "learning_rate": 0.003, "loss": 4.0802, "step": 7031 }, { "epoch": 0.07032, "grad_norm": 0.7387898161770746, "learning_rate": 0.003, "loss": 4.1128, "step": 7032 }, { "epoch": 0.07033, "grad_norm": 0.8901816616568644, "learning_rate": 0.003, "loss": 4.1179, "step": 7033 }, { "epoch": 0.07034, "grad_norm": 1.0284878793130177, "learning_rate": 0.003, "loss": 4.1098, "step": 7034 }, { "epoch": 0.07035, "grad_norm": 1.058686540675132, "learning_rate": 0.003, "loss": 4.1186, "step": 7035 }, { "epoch": 0.07036, "grad_norm": 0.9359391304214265, "learning_rate": 0.003, "loss": 4.1052, "step": 7036 }, { "epoch": 0.07037, "grad_norm": 0.9481158673322754, "learning_rate": 0.003, "loss": 4.0974, "step": 7037 }, { "epoch": 0.07038, "grad_norm": 0.8334487757118696, "learning_rate": 0.003, "loss": 4.1106, "step": 7038 }, { "epoch": 0.07039, "grad_norm": 0.6326431662983385, "learning_rate": 0.003, "loss": 4.1031, "step": 7039 }, { "epoch": 0.0704, "grad_norm": 0.702514824071445, "learning_rate": 0.003, "loss": 4.1267, "step": 7040 }, { "epoch": 0.07041, "grad_norm": 0.7400349502050564, "learning_rate": 0.003, "loss": 4.1032, "step": 7041 }, { "epoch": 0.07042, "grad_norm": 0.7099648121514438, "learning_rate": 0.003, "loss": 4.0884, "step": 7042 }, { "epoch": 0.07043, "grad_norm": 0.6483290540107833, "learning_rate": 0.003, "loss": 4.121, "step": 7043 }, { "epoch": 0.07044, "grad_norm": 0.6620145629117125, "learning_rate": 0.003, "loss": 4.1202, "step": 7044 }, { "epoch": 0.07045, "grad_norm": 0.5978749302357278, "learning_rate": 0.003, "loss": 4.1042, "step": 7045 }, { "epoch": 0.07046, "grad_norm": 0.5888111174272556, "learning_rate": 0.003, "loss": 4.1089, "step": 7046 }, { "epoch": 0.07047, "grad_norm": 0.5784250123506132, "learning_rate": 0.003, "loss": 4.1062, "step": 7047 }, { "epoch": 0.07048, "grad_norm": 0.6072433980268588, "learning_rate": 0.003, "loss": 4.0959, "step": 7048 }, { "epoch": 0.07049, "grad_norm": 0.7094471050898912, "learning_rate": 0.003, "loss": 4.097, "step": 7049 }, { "epoch": 0.0705, "grad_norm": 0.953711232001365, "learning_rate": 0.003, "loss": 4.1206, "step": 7050 }, { "epoch": 0.07051, "grad_norm": 1.4359270563806132, "learning_rate": 0.003, "loss": 4.1404, "step": 7051 }, { "epoch": 0.07052, "grad_norm": 0.6731996114775755, "learning_rate": 0.003, "loss": 4.0814, "step": 7052 }, { "epoch": 0.07053, "grad_norm": 0.8452128279853137, "learning_rate": 0.003, "loss": 4.0998, "step": 7053 }, { "epoch": 0.07054, "grad_norm": 0.9361316507438332, "learning_rate": 0.003, "loss": 4.1027, "step": 7054 }, { "epoch": 0.07055, "grad_norm": 1.0791674499784152, "learning_rate": 0.003, "loss": 4.1101, "step": 7055 }, { "epoch": 0.07056, "grad_norm": 0.9840050657142604, "learning_rate": 0.003, "loss": 4.0995, "step": 7056 }, { "epoch": 0.07057, "grad_norm": 0.7477218913081056, "learning_rate": 0.003, "loss": 4.118, "step": 7057 }, { "epoch": 0.07058, "grad_norm": 0.6446497473373758, "learning_rate": 0.003, "loss": 4.1011, "step": 7058 }, { "epoch": 0.07059, "grad_norm": 0.7865830553034692, "learning_rate": 0.003, "loss": 4.1074, "step": 7059 }, { "epoch": 0.0706, "grad_norm": 1.0013324953332357, "learning_rate": 0.003, "loss": 4.1092, "step": 7060 }, { "epoch": 0.07061, "grad_norm": 0.9809126840231154, "learning_rate": 0.003, "loss": 4.1046, "step": 7061 }, { "epoch": 0.07062, "grad_norm": 1.0431446716134858, "learning_rate": 0.003, "loss": 4.1436, "step": 7062 }, { "epoch": 0.07063, "grad_norm": 0.9216298810073756, "learning_rate": 0.003, "loss": 4.1163, "step": 7063 }, { "epoch": 0.07064, "grad_norm": 0.9309447457188175, "learning_rate": 0.003, "loss": 4.1322, "step": 7064 }, { "epoch": 0.07065, "grad_norm": 0.9478643723606038, "learning_rate": 0.003, "loss": 4.1255, "step": 7065 }, { "epoch": 0.07066, "grad_norm": 0.8659841976692164, "learning_rate": 0.003, "loss": 4.1274, "step": 7066 }, { "epoch": 0.07067, "grad_norm": 0.820962997351535, "learning_rate": 0.003, "loss": 4.1571, "step": 7067 }, { "epoch": 0.07068, "grad_norm": 0.8086119331768552, "learning_rate": 0.003, "loss": 4.0912, "step": 7068 }, { "epoch": 0.07069, "grad_norm": 0.7230672452463993, "learning_rate": 0.003, "loss": 4.0936, "step": 7069 }, { "epoch": 0.0707, "grad_norm": 0.6999038291572481, "learning_rate": 0.003, "loss": 4.1175, "step": 7070 }, { "epoch": 0.07071, "grad_norm": 0.646981573011097, "learning_rate": 0.003, "loss": 4.0935, "step": 7071 }, { "epoch": 0.07072, "grad_norm": 0.6484005206388396, "learning_rate": 0.003, "loss": 4.0994, "step": 7072 }, { "epoch": 0.07073, "grad_norm": 0.650657762847781, "learning_rate": 0.003, "loss": 4.1142, "step": 7073 }, { "epoch": 0.07074, "grad_norm": 0.6775125603608778, "learning_rate": 0.003, "loss": 4.0996, "step": 7074 }, { "epoch": 0.07075, "grad_norm": 0.6083872877550612, "learning_rate": 0.003, "loss": 4.1356, "step": 7075 }, { "epoch": 0.07076, "grad_norm": 0.6452208544105434, "learning_rate": 0.003, "loss": 4.0825, "step": 7076 }, { "epoch": 0.07077, "grad_norm": 0.6649673970366402, "learning_rate": 0.003, "loss": 4.0958, "step": 7077 }, { "epoch": 0.07078, "grad_norm": 0.6783088752414806, "learning_rate": 0.003, "loss": 4.1197, "step": 7078 }, { "epoch": 0.07079, "grad_norm": 0.7710592718558568, "learning_rate": 0.003, "loss": 4.1241, "step": 7079 }, { "epoch": 0.0708, "grad_norm": 0.958668145233532, "learning_rate": 0.003, "loss": 4.071, "step": 7080 }, { "epoch": 0.07081, "grad_norm": 1.2533157696452137, "learning_rate": 0.003, "loss": 4.1191, "step": 7081 }, { "epoch": 0.07082, "grad_norm": 0.7341633652839094, "learning_rate": 0.003, "loss": 4.1009, "step": 7082 }, { "epoch": 0.07083, "grad_norm": 0.7125445429868206, "learning_rate": 0.003, "loss": 4.0874, "step": 7083 }, { "epoch": 0.07084, "grad_norm": 0.8323202978555945, "learning_rate": 0.003, "loss": 4.1028, "step": 7084 }, { "epoch": 0.07085, "grad_norm": 0.8775075656235146, "learning_rate": 0.003, "loss": 4.1231, "step": 7085 }, { "epoch": 0.07086, "grad_norm": 0.8415506665597984, "learning_rate": 0.003, "loss": 4.1061, "step": 7086 }, { "epoch": 0.07087, "grad_norm": 0.8886268254263243, "learning_rate": 0.003, "loss": 4.1043, "step": 7087 }, { "epoch": 0.07088, "grad_norm": 1.1007254498444334, "learning_rate": 0.003, "loss": 4.083, "step": 7088 }, { "epoch": 0.07089, "grad_norm": 1.077560245637077, "learning_rate": 0.003, "loss": 4.1117, "step": 7089 }, { "epoch": 0.0709, "grad_norm": 1.0164885049934373, "learning_rate": 0.003, "loss": 4.1116, "step": 7090 }, { "epoch": 0.07091, "grad_norm": 0.9408820922271797, "learning_rate": 0.003, "loss": 4.1018, "step": 7091 }, { "epoch": 0.07092, "grad_norm": 0.9903430667983368, "learning_rate": 0.003, "loss": 4.1063, "step": 7092 }, { "epoch": 0.07093, "grad_norm": 1.0927513972609133, "learning_rate": 0.003, "loss": 4.1388, "step": 7093 }, { "epoch": 0.07094, "grad_norm": 0.9057260963699147, "learning_rate": 0.003, "loss": 4.1507, "step": 7094 }, { "epoch": 0.07095, "grad_norm": 0.961594432402731, "learning_rate": 0.003, "loss": 4.1407, "step": 7095 }, { "epoch": 0.07096, "grad_norm": 0.9337093837213043, "learning_rate": 0.003, "loss": 4.1069, "step": 7096 }, { "epoch": 0.07097, "grad_norm": 0.8978322204376042, "learning_rate": 0.003, "loss": 4.1303, "step": 7097 }, { "epoch": 0.07098, "grad_norm": 0.8250215892500493, "learning_rate": 0.003, "loss": 4.1318, "step": 7098 }, { "epoch": 0.07099, "grad_norm": 0.7161931173541564, "learning_rate": 0.003, "loss": 4.1273, "step": 7099 }, { "epoch": 0.071, "grad_norm": 0.7049995268154481, "learning_rate": 0.003, "loss": 4.1194, "step": 7100 }, { "epoch": 0.07101, "grad_norm": 0.7656370792822328, "learning_rate": 0.003, "loss": 4.0995, "step": 7101 }, { "epoch": 0.07102, "grad_norm": 0.7875547696719094, "learning_rate": 0.003, "loss": 4.1158, "step": 7102 }, { "epoch": 0.07103, "grad_norm": 0.8916531396092162, "learning_rate": 0.003, "loss": 4.1292, "step": 7103 }, { "epoch": 0.07104, "grad_norm": 1.0114205653924595, "learning_rate": 0.003, "loss": 4.1284, "step": 7104 }, { "epoch": 0.07105, "grad_norm": 1.1741497513128247, "learning_rate": 0.003, "loss": 4.1241, "step": 7105 }, { "epoch": 0.07106, "grad_norm": 0.7625197875421197, "learning_rate": 0.003, "loss": 4.1261, "step": 7106 }, { "epoch": 0.07107, "grad_norm": 0.6257457801610031, "learning_rate": 0.003, "loss": 4.1208, "step": 7107 }, { "epoch": 0.07108, "grad_norm": 0.722291112408107, "learning_rate": 0.003, "loss": 4.1298, "step": 7108 }, { "epoch": 0.07109, "grad_norm": 0.8199337468043636, "learning_rate": 0.003, "loss": 4.1157, "step": 7109 }, { "epoch": 0.0711, "grad_norm": 0.9809541172196407, "learning_rate": 0.003, "loss": 4.093, "step": 7110 }, { "epoch": 0.07111, "grad_norm": 1.3461787795853564, "learning_rate": 0.003, "loss": 4.1208, "step": 7111 }, { "epoch": 0.07112, "grad_norm": 0.8890390770134524, "learning_rate": 0.003, "loss": 4.1142, "step": 7112 }, { "epoch": 0.07113, "grad_norm": 0.795752170005651, "learning_rate": 0.003, "loss": 4.1472, "step": 7113 }, { "epoch": 0.07114, "grad_norm": 0.7764623741527109, "learning_rate": 0.003, "loss": 4.1218, "step": 7114 }, { "epoch": 0.07115, "grad_norm": 0.7626873101755763, "learning_rate": 0.003, "loss": 4.1063, "step": 7115 }, { "epoch": 0.07116, "grad_norm": 0.709900661004303, "learning_rate": 0.003, "loss": 4.1096, "step": 7116 }, { "epoch": 0.07117, "grad_norm": 0.7447218585864936, "learning_rate": 0.003, "loss": 4.1227, "step": 7117 }, { "epoch": 0.07118, "grad_norm": 0.8145999556594223, "learning_rate": 0.003, "loss": 4.1298, "step": 7118 }, { "epoch": 0.07119, "grad_norm": 0.969296790631849, "learning_rate": 0.003, "loss": 4.1101, "step": 7119 }, { "epoch": 0.0712, "grad_norm": 1.110298419738427, "learning_rate": 0.003, "loss": 4.1131, "step": 7120 }, { "epoch": 0.07121, "grad_norm": 0.7765027521208008, "learning_rate": 0.003, "loss": 4.1076, "step": 7121 }, { "epoch": 0.07122, "grad_norm": 0.6976330840832459, "learning_rate": 0.003, "loss": 4.0984, "step": 7122 }, { "epoch": 0.07123, "grad_norm": 0.7282209002398681, "learning_rate": 0.003, "loss": 4.0927, "step": 7123 }, { "epoch": 0.07124, "grad_norm": 0.8034457748599896, "learning_rate": 0.003, "loss": 4.0898, "step": 7124 }, { "epoch": 0.07125, "grad_norm": 0.9445995479508306, "learning_rate": 0.003, "loss": 4.0992, "step": 7125 }, { "epoch": 0.07126, "grad_norm": 1.1268646555573658, "learning_rate": 0.003, "loss": 4.1371, "step": 7126 }, { "epoch": 0.07127, "grad_norm": 0.8675499183490605, "learning_rate": 0.003, "loss": 4.0989, "step": 7127 }, { "epoch": 0.07128, "grad_norm": 0.7495770001240798, "learning_rate": 0.003, "loss": 4.0839, "step": 7128 }, { "epoch": 0.07129, "grad_norm": 0.7927069334940464, "learning_rate": 0.003, "loss": 4.1188, "step": 7129 }, { "epoch": 0.0713, "grad_norm": 0.8376734219621176, "learning_rate": 0.003, "loss": 4.1083, "step": 7130 }, { "epoch": 0.07131, "grad_norm": 0.9019229212023059, "learning_rate": 0.003, "loss": 4.0929, "step": 7131 }, { "epoch": 0.07132, "grad_norm": 0.958492302536442, "learning_rate": 0.003, "loss": 4.1359, "step": 7132 }, { "epoch": 0.07133, "grad_norm": 0.9703595503667748, "learning_rate": 0.003, "loss": 4.1434, "step": 7133 }, { "epoch": 0.07134, "grad_norm": 1.0020602010967854, "learning_rate": 0.003, "loss": 4.0864, "step": 7134 }, { "epoch": 0.07135, "grad_norm": 0.8801413035960939, "learning_rate": 0.003, "loss": 4.1129, "step": 7135 }, { "epoch": 0.07136, "grad_norm": 0.7990610722753618, "learning_rate": 0.003, "loss": 4.147, "step": 7136 }, { "epoch": 0.07137, "grad_norm": 0.793252959668317, "learning_rate": 0.003, "loss": 4.11, "step": 7137 }, { "epoch": 0.07138, "grad_norm": 0.9362895477284106, "learning_rate": 0.003, "loss": 4.1098, "step": 7138 }, { "epoch": 0.07139, "grad_norm": 1.0323509656605805, "learning_rate": 0.003, "loss": 4.1163, "step": 7139 }, { "epoch": 0.0714, "grad_norm": 0.9495162380496068, "learning_rate": 0.003, "loss": 4.1211, "step": 7140 }, { "epoch": 0.07141, "grad_norm": 0.9575955550409052, "learning_rate": 0.003, "loss": 4.101, "step": 7141 }, { "epoch": 0.07142, "grad_norm": 0.8744238864306674, "learning_rate": 0.003, "loss": 4.0901, "step": 7142 }, { "epoch": 0.07143, "grad_norm": 0.8469591921258507, "learning_rate": 0.003, "loss": 4.0801, "step": 7143 }, { "epoch": 0.07144, "grad_norm": 0.8348336779926869, "learning_rate": 0.003, "loss": 4.0936, "step": 7144 }, { "epoch": 0.07145, "grad_norm": 1.20426435156208, "learning_rate": 0.003, "loss": 4.1252, "step": 7145 }, { "epoch": 0.07146, "grad_norm": 0.9331877874118307, "learning_rate": 0.003, "loss": 4.1185, "step": 7146 }, { "epoch": 0.07147, "grad_norm": 0.7956424973642607, "learning_rate": 0.003, "loss": 4.1237, "step": 7147 }, { "epoch": 0.07148, "grad_norm": 0.6333045591280563, "learning_rate": 0.003, "loss": 4.1239, "step": 7148 }, { "epoch": 0.07149, "grad_norm": 0.6566262136153223, "learning_rate": 0.003, "loss": 4.1273, "step": 7149 }, { "epoch": 0.0715, "grad_norm": 0.7318802572187021, "learning_rate": 0.003, "loss": 4.1087, "step": 7150 }, { "epoch": 0.07151, "grad_norm": 0.7829355969313044, "learning_rate": 0.003, "loss": 4.1096, "step": 7151 }, { "epoch": 0.07152, "grad_norm": 0.8583613260253945, "learning_rate": 0.003, "loss": 4.1282, "step": 7152 }, { "epoch": 0.07153, "grad_norm": 0.92982352203367, "learning_rate": 0.003, "loss": 4.1147, "step": 7153 }, { "epoch": 0.07154, "grad_norm": 0.9644437837333677, "learning_rate": 0.003, "loss": 4.1498, "step": 7154 }, { "epoch": 0.07155, "grad_norm": 0.978358778668515, "learning_rate": 0.003, "loss": 4.1149, "step": 7155 }, { "epoch": 0.07156, "grad_norm": 0.7507926446560859, "learning_rate": 0.003, "loss": 4.1172, "step": 7156 }, { "epoch": 0.07157, "grad_norm": 0.6945396009037457, "learning_rate": 0.003, "loss": 4.1004, "step": 7157 }, { "epoch": 0.07158, "grad_norm": 0.6202573792874552, "learning_rate": 0.003, "loss": 4.1053, "step": 7158 }, { "epoch": 0.07159, "grad_norm": 0.6406745858919619, "learning_rate": 0.003, "loss": 4.1111, "step": 7159 }, { "epoch": 0.0716, "grad_norm": 0.6759721738325275, "learning_rate": 0.003, "loss": 4.0822, "step": 7160 }, { "epoch": 0.07161, "grad_norm": 0.6773350977546571, "learning_rate": 0.003, "loss": 4.1124, "step": 7161 }, { "epoch": 0.07162, "grad_norm": 0.8134936196821616, "learning_rate": 0.003, "loss": 4.1165, "step": 7162 }, { "epoch": 0.07163, "grad_norm": 1.0238124509153523, "learning_rate": 0.003, "loss": 4.1117, "step": 7163 }, { "epoch": 0.07164, "grad_norm": 1.0689438667971007, "learning_rate": 0.003, "loss": 4.0973, "step": 7164 }, { "epoch": 0.07165, "grad_norm": 0.7928160801351044, "learning_rate": 0.003, "loss": 4.0961, "step": 7165 }, { "epoch": 0.07166, "grad_norm": 0.7238005610299342, "learning_rate": 0.003, "loss": 4.0966, "step": 7166 }, { "epoch": 0.07167, "grad_norm": 0.7943353284460141, "learning_rate": 0.003, "loss": 4.1138, "step": 7167 }, { "epoch": 0.07168, "grad_norm": 0.9366138493792299, "learning_rate": 0.003, "loss": 4.1346, "step": 7168 }, { "epoch": 0.07169, "grad_norm": 0.9317917412995604, "learning_rate": 0.003, "loss": 4.1022, "step": 7169 }, { "epoch": 0.0717, "grad_norm": 0.9498234616222482, "learning_rate": 0.003, "loss": 4.0961, "step": 7170 }, { "epoch": 0.07171, "grad_norm": 1.0293570458877537, "learning_rate": 0.003, "loss": 4.1234, "step": 7171 }, { "epoch": 0.07172, "grad_norm": 0.9825020297184559, "learning_rate": 0.003, "loss": 4.1273, "step": 7172 }, { "epoch": 0.07173, "grad_norm": 0.8679097068915133, "learning_rate": 0.003, "loss": 4.1242, "step": 7173 }, { "epoch": 0.07174, "grad_norm": 0.8695176732805833, "learning_rate": 0.003, "loss": 4.0988, "step": 7174 }, { "epoch": 0.07175, "grad_norm": 0.9453745541920484, "learning_rate": 0.003, "loss": 4.1366, "step": 7175 }, { "epoch": 0.07176, "grad_norm": 0.9192625870625667, "learning_rate": 0.003, "loss": 4.1125, "step": 7176 }, { "epoch": 0.07177, "grad_norm": 0.9892316081377227, "learning_rate": 0.003, "loss": 4.1181, "step": 7177 }, { "epoch": 0.07178, "grad_norm": 1.1173320244326939, "learning_rate": 0.003, "loss": 4.0841, "step": 7178 }, { "epoch": 0.07179, "grad_norm": 0.9062788363818038, "learning_rate": 0.003, "loss": 4.1165, "step": 7179 }, { "epoch": 0.0718, "grad_norm": 0.8000050025573026, "learning_rate": 0.003, "loss": 4.1228, "step": 7180 }, { "epoch": 0.07181, "grad_norm": 0.873878962358314, "learning_rate": 0.003, "loss": 4.117, "step": 7181 }, { "epoch": 0.07182, "grad_norm": 0.8940125268309896, "learning_rate": 0.003, "loss": 4.128, "step": 7182 }, { "epoch": 0.07183, "grad_norm": 0.9553838594697793, "learning_rate": 0.003, "loss": 4.1263, "step": 7183 }, { "epoch": 0.07184, "grad_norm": 0.8409034686100991, "learning_rate": 0.003, "loss": 4.1007, "step": 7184 }, { "epoch": 0.07185, "grad_norm": 0.8965763690380733, "learning_rate": 0.003, "loss": 4.1097, "step": 7185 }, { "epoch": 0.07186, "grad_norm": 1.0738828307599333, "learning_rate": 0.003, "loss": 4.1354, "step": 7186 }, { "epoch": 0.07187, "grad_norm": 1.121879584063851, "learning_rate": 0.003, "loss": 4.1261, "step": 7187 }, { "epoch": 0.07188, "grad_norm": 0.7967451613183767, "learning_rate": 0.003, "loss": 4.0828, "step": 7188 }, { "epoch": 0.07189, "grad_norm": 0.8216455172743733, "learning_rate": 0.003, "loss": 4.1332, "step": 7189 }, { "epoch": 0.0719, "grad_norm": 0.8712547518094813, "learning_rate": 0.003, "loss": 4.1318, "step": 7190 }, { "epoch": 0.07191, "grad_norm": 0.7876295918843358, "learning_rate": 0.003, "loss": 4.136, "step": 7191 }, { "epoch": 0.07192, "grad_norm": 0.8009874929822709, "learning_rate": 0.003, "loss": 4.0981, "step": 7192 }, { "epoch": 0.07193, "grad_norm": 0.8465208583922181, "learning_rate": 0.003, "loss": 4.1224, "step": 7193 }, { "epoch": 0.07194, "grad_norm": 0.919178707859649, "learning_rate": 0.003, "loss": 4.1214, "step": 7194 }, { "epoch": 0.07195, "grad_norm": 1.0483728768665097, "learning_rate": 0.003, "loss": 4.1011, "step": 7195 }, { "epoch": 0.07196, "grad_norm": 1.0529880709135624, "learning_rate": 0.003, "loss": 4.1114, "step": 7196 }, { "epoch": 0.07197, "grad_norm": 0.8929728066328368, "learning_rate": 0.003, "loss": 4.1305, "step": 7197 }, { "epoch": 0.07198, "grad_norm": 0.8510797490053401, "learning_rate": 0.003, "loss": 4.0894, "step": 7198 }, { "epoch": 0.07199, "grad_norm": 0.7701251238667544, "learning_rate": 0.003, "loss": 4.0992, "step": 7199 }, { "epoch": 0.072, "grad_norm": 0.5992356375970112, "learning_rate": 0.003, "loss": 4.1285, "step": 7200 }, { "epoch": 0.07201, "grad_norm": 0.6127454964693743, "learning_rate": 0.003, "loss": 4.0914, "step": 7201 }, { "epoch": 0.07202, "grad_norm": 0.5750069845188995, "learning_rate": 0.003, "loss": 4.1225, "step": 7202 }, { "epoch": 0.07203, "grad_norm": 0.5644356454850563, "learning_rate": 0.003, "loss": 4.0882, "step": 7203 }, { "epoch": 0.07204, "grad_norm": 0.607948596229715, "learning_rate": 0.003, "loss": 4.1266, "step": 7204 }, { "epoch": 0.07205, "grad_norm": 0.575336251782507, "learning_rate": 0.003, "loss": 4.1212, "step": 7205 }, { "epoch": 0.07206, "grad_norm": 0.6206756982347824, "learning_rate": 0.003, "loss": 4.1002, "step": 7206 }, { "epoch": 0.07207, "grad_norm": 0.6744817053594698, "learning_rate": 0.003, "loss": 4.0991, "step": 7207 }, { "epoch": 0.07208, "grad_norm": 0.7220807677197786, "learning_rate": 0.003, "loss": 4.0955, "step": 7208 }, { "epoch": 0.07209, "grad_norm": 0.8377268449123019, "learning_rate": 0.003, "loss": 4.0889, "step": 7209 }, { "epoch": 0.0721, "grad_norm": 1.0721743343425194, "learning_rate": 0.003, "loss": 4.0992, "step": 7210 }, { "epoch": 0.07211, "grad_norm": 0.9661201701564104, "learning_rate": 0.003, "loss": 4.1127, "step": 7211 }, { "epoch": 0.07212, "grad_norm": 0.7690576613887181, "learning_rate": 0.003, "loss": 4.1186, "step": 7212 }, { "epoch": 0.07213, "grad_norm": 0.581038878715819, "learning_rate": 0.003, "loss": 4.1265, "step": 7213 }, { "epoch": 0.07214, "grad_norm": 0.6435911406628065, "learning_rate": 0.003, "loss": 4.0731, "step": 7214 }, { "epoch": 0.07215, "grad_norm": 0.8429785330520374, "learning_rate": 0.003, "loss": 4.1038, "step": 7215 }, { "epoch": 0.07216, "grad_norm": 1.0387688679965494, "learning_rate": 0.003, "loss": 4.1198, "step": 7216 }, { "epoch": 0.07217, "grad_norm": 1.0443475300961371, "learning_rate": 0.003, "loss": 4.1133, "step": 7217 }, { "epoch": 0.07218, "grad_norm": 0.8473614781155715, "learning_rate": 0.003, "loss": 4.112, "step": 7218 }, { "epoch": 0.07219, "grad_norm": 0.8315491811072131, "learning_rate": 0.003, "loss": 4.0906, "step": 7219 }, { "epoch": 0.0722, "grad_norm": 1.0296189666178754, "learning_rate": 0.003, "loss": 4.122, "step": 7220 }, { "epoch": 0.07221, "grad_norm": 1.058643563273535, "learning_rate": 0.003, "loss": 4.1232, "step": 7221 }, { "epoch": 0.07222, "grad_norm": 0.8979290937756685, "learning_rate": 0.003, "loss": 4.1093, "step": 7222 }, { "epoch": 0.07223, "grad_norm": 0.7842228557729956, "learning_rate": 0.003, "loss": 4.1202, "step": 7223 }, { "epoch": 0.07224, "grad_norm": 0.774784015172213, "learning_rate": 0.003, "loss": 4.0867, "step": 7224 }, { "epoch": 0.07225, "grad_norm": 0.77416880062085, "learning_rate": 0.003, "loss": 4.1137, "step": 7225 }, { "epoch": 0.07226, "grad_norm": 0.8594028278800052, "learning_rate": 0.003, "loss": 4.1196, "step": 7226 }, { "epoch": 0.07227, "grad_norm": 0.9339883501438095, "learning_rate": 0.003, "loss": 4.1026, "step": 7227 }, { "epoch": 0.07228, "grad_norm": 1.009238406341426, "learning_rate": 0.003, "loss": 4.1009, "step": 7228 }, { "epoch": 0.07229, "grad_norm": 0.9248237524022925, "learning_rate": 0.003, "loss": 4.1162, "step": 7229 }, { "epoch": 0.0723, "grad_norm": 0.7879348638628303, "learning_rate": 0.003, "loss": 4.0849, "step": 7230 }, { "epoch": 0.07231, "grad_norm": 0.8125590881523109, "learning_rate": 0.003, "loss": 4.1249, "step": 7231 }, { "epoch": 0.07232, "grad_norm": 0.7397244009053785, "learning_rate": 0.003, "loss": 4.1143, "step": 7232 }, { "epoch": 0.07233, "grad_norm": 0.8631670605854277, "learning_rate": 0.003, "loss": 4.1198, "step": 7233 }, { "epoch": 0.07234, "grad_norm": 0.9590374718321888, "learning_rate": 0.003, "loss": 4.0979, "step": 7234 }, { "epoch": 0.07235, "grad_norm": 1.1821140388987355, "learning_rate": 0.003, "loss": 4.152, "step": 7235 }, { "epoch": 0.07236, "grad_norm": 1.0912993445758663, "learning_rate": 0.003, "loss": 4.1468, "step": 7236 }, { "epoch": 0.07237, "grad_norm": 1.0186661817583151, "learning_rate": 0.003, "loss": 4.1103, "step": 7237 }, { "epoch": 0.07238, "grad_norm": 0.8827875731907374, "learning_rate": 0.003, "loss": 4.132, "step": 7238 }, { "epoch": 0.07239, "grad_norm": 1.0101708637839817, "learning_rate": 0.003, "loss": 4.1397, "step": 7239 }, { "epoch": 0.0724, "grad_norm": 0.9436996667559744, "learning_rate": 0.003, "loss": 4.0983, "step": 7240 }, { "epoch": 0.07241, "grad_norm": 0.924594063482519, "learning_rate": 0.003, "loss": 4.1432, "step": 7241 }, { "epoch": 0.07242, "grad_norm": 1.0534829249112354, "learning_rate": 0.003, "loss": 4.1018, "step": 7242 }, { "epoch": 0.07243, "grad_norm": 0.9856885879894006, "learning_rate": 0.003, "loss": 4.1349, "step": 7243 }, { "epoch": 0.07244, "grad_norm": 0.8968354162645473, "learning_rate": 0.003, "loss": 4.1167, "step": 7244 }, { "epoch": 0.07245, "grad_norm": 0.7432909812609705, "learning_rate": 0.003, "loss": 4.1134, "step": 7245 }, { "epoch": 0.07246, "grad_norm": 0.7581401567213042, "learning_rate": 0.003, "loss": 4.1226, "step": 7246 }, { "epoch": 0.07247, "grad_norm": 0.7827779191397117, "learning_rate": 0.003, "loss": 4.141, "step": 7247 }, { "epoch": 0.07248, "grad_norm": 0.8301715949958813, "learning_rate": 0.003, "loss": 4.1055, "step": 7248 }, { "epoch": 0.07249, "grad_norm": 0.8525741406819738, "learning_rate": 0.003, "loss": 4.0884, "step": 7249 }, { "epoch": 0.0725, "grad_norm": 0.8907335082596, "learning_rate": 0.003, "loss": 4.1023, "step": 7250 }, { "epoch": 0.07251, "grad_norm": 0.8802924249074248, "learning_rate": 0.003, "loss": 4.1195, "step": 7251 }, { "epoch": 0.07252, "grad_norm": 0.9494861985507634, "learning_rate": 0.003, "loss": 4.059, "step": 7252 }, { "epoch": 0.07253, "grad_norm": 0.9816725505864071, "learning_rate": 0.003, "loss": 4.0898, "step": 7253 }, { "epoch": 0.07254, "grad_norm": 1.0443814177090267, "learning_rate": 0.003, "loss": 4.139, "step": 7254 }, { "epoch": 0.07255, "grad_norm": 0.9747578800346165, "learning_rate": 0.003, "loss": 4.1331, "step": 7255 }, { "epoch": 0.07256, "grad_norm": 0.9623975370695055, "learning_rate": 0.003, "loss": 4.1353, "step": 7256 }, { "epoch": 0.07257, "grad_norm": 0.8844084755688933, "learning_rate": 0.003, "loss": 4.1195, "step": 7257 }, { "epoch": 0.07258, "grad_norm": 0.7873250461581538, "learning_rate": 0.003, "loss": 4.0847, "step": 7258 }, { "epoch": 0.07259, "grad_norm": 0.7048185689672526, "learning_rate": 0.003, "loss": 4.1117, "step": 7259 }, { "epoch": 0.0726, "grad_norm": 0.7212279689840713, "learning_rate": 0.003, "loss": 4.088, "step": 7260 }, { "epoch": 0.07261, "grad_norm": 0.6900512375900392, "learning_rate": 0.003, "loss": 4.1137, "step": 7261 }, { "epoch": 0.07262, "grad_norm": 0.7136129301757733, "learning_rate": 0.003, "loss": 4.1071, "step": 7262 }, { "epoch": 0.07263, "grad_norm": 0.7469633157532889, "learning_rate": 0.003, "loss": 4.1134, "step": 7263 }, { "epoch": 0.07264, "grad_norm": 0.8012211214960027, "learning_rate": 0.003, "loss": 4.0687, "step": 7264 }, { "epoch": 0.07265, "grad_norm": 0.7552911178426823, "learning_rate": 0.003, "loss": 4.1091, "step": 7265 }, { "epoch": 0.07266, "grad_norm": 0.7077347824210926, "learning_rate": 0.003, "loss": 4.1258, "step": 7266 }, { "epoch": 0.07267, "grad_norm": 0.6760989582415581, "learning_rate": 0.003, "loss": 4.1103, "step": 7267 }, { "epoch": 0.07268, "grad_norm": 0.7437332060079201, "learning_rate": 0.003, "loss": 4.118, "step": 7268 }, { "epoch": 0.07269, "grad_norm": 0.9444827297437564, "learning_rate": 0.003, "loss": 4.1084, "step": 7269 }, { "epoch": 0.0727, "grad_norm": 1.1220594233552328, "learning_rate": 0.003, "loss": 4.0989, "step": 7270 }, { "epoch": 0.07271, "grad_norm": 0.8200379597299912, "learning_rate": 0.003, "loss": 4.1044, "step": 7271 }, { "epoch": 0.07272, "grad_norm": 0.7596490807886606, "learning_rate": 0.003, "loss": 4.0966, "step": 7272 }, { "epoch": 0.07273, "grad_norm": 0.7884580146314756, "learning_rate": 0.003, "loss": 4.1131, "step": 7273 }, { "epoch": 0.07274, "grad_norm": 0.7179271579858106, "learning_rate": 0.003, "loss": 4.0728, "step": 7274 }, { "epoch": 0.07275, "grad_norm": 0.6409693363218573, "learning_rate": 0.003, "loss": 4.0857, "step": 7275 }, { "epoch": 0.07276, "grad_norm": 0.561214096824011, "learning_rate": 0.003, "loss": 4.0634, "step": 7276 }, { "epoch": 0.07277, "grad_norm": 0.5256694600157773, "learning_rate": 0.003, "loss": 4.0947, "step": 7277 }, { "epoch": 0.07278, "grad_norm": 0.513949681102309, "learning_rate": 0.003, "loss": 4.1015, "step": 7278 }, { "epoch": 0.07279, "grad_norm": 0.49570529414604636, "learning_rate": 0.003, "loss": 4.0803, "step": 7279 }, { "epoch": 0.0728, "grad_norm": 0.6204492806369919, "learning_rate": 0.003, "loss": 4.0658, "step": 7280 }, { "epoch": 0.07281, "grad_norm": 0.9083869384535228, "learning_rate": 0.003, "loss": 4.0885, "step": 7281 }, { "epoch": 0.07282, "grad_norm": 1.2773527610872655, "learning_rate": 0.003, "loss": 4.1165, "step": 7282 }, { "epoch": 0.07283, "grad_norm": 0.7627102174193735, "learning_rate": 0.003, "loss": 4.1234, "step": 7283 }, { "epoch": 0.07284, "grad_norm": 0.6303760922439519, "learning_rate": 0.003, "loss": 4.0801, "step": 7284 }, { "epoch": 0.07285, "grad_norm": 0.703121523726995, "learning_rate": 0.003, "loss": 4.1106, "step": 7285 }, { "epoch": 0.07286, "grad_norm": 0.7516732999345842, "learning_rate": 0.003, "loss": 4.0849, "step": 7286 }, { "epoch": 0.07287, "grad_norm": 0.8402680521780728, "learning_rate": 0.003, "loss": 4.1095, "step": 7287 }, { "epoch": 0.07288, "grad_norm": 0.8639792163628891, "learning_rate": 0.003, "loss": 4.0897, "step": 7288 }, { "epoch": 0.07289, "grad_norm": 0.9106770231204078, "learning_rate": 0.003, "loss": 4.111, "step": 7289 }, { "epoch": 0.0729, "grad_norm": 0.8569039805553732, "learning_rate": 0.003, "loss": 4.133, "step": 7290 }, { "epoch": 0.07291, "grad_norm": 0.8741502241373251, "learning_rate": 0.003, "loss": 4.0951, "step": 7291 }, { "epoch": 0.07292, "grad_norm": 0.9093181247050962, "learning_rate": 0.003, "loss": 4.1104, "step": 7292 }, { "epoch": 0.07293, "grad_norm": 1.0546113868958757, "learning_rate": 0.003, "loss": 4.1031, "step": 7293 }, { "epoch": 0.07294, "grad_norm": 1.0193664307418668, "learning_rate": 0.003, "loss": 4.092, "step": 7294 }, { "epoch": 0.07295, "grad_norm": 1.0910195655187844, "learning_rate": 0.003, "loss": 4.0877, "step": 7295 }, { "epoch": 0.07296, "grad_norm": 0.8335499181973589, "learning_rate": 0.003, "loss": 4.0951, "step": 7296 }, { "epoch": 0.07297, "grad_norm": 0.6580676089467326, "learning_rate": 0.003, "loss": 4.126, "step": 7297 }, { "epoch": 0.07298, "grad_norm": 0.5832643079122204, "learning_rate": 0.003, "loss": 4.0715, "step": 7298 }, { "epoch": 0.07299, "grad_norm": 0.6303194883819615, "learning_rate": 0.003, "loss": 4.0988, "step": 7299 }, { "epoch": 0.073, "grad_norm": 0.6469636223827426, "learning_rate": 0.003, "loss": 4.0847, "step": 7300 }, { "epoch": 0.07301, "grad_norm": 0.6597994798980442, "learning_rate": 0.003, "loss": 4.0689, "step": 7301 }, { "epoch": 0.07302, "grad_norm": 0.6982812827344924, "learning_rate": 0.003, "loss": 4.0771, "step": 7302 }, { "epoch": 0.07303, "grad_norm": 0.7027447626804699, "learning_rate": 0.003, "loss": 4.1082, "step": 7303 }, { "epoch": 0.07304, "grad_norm": 0.7898566798558829, "learning_rate": 0.003, "loss": 4.0905, "step": 7304 }, { "epoch": 0.07305, "grad_norm": 0.8619601714981096, "learning_rate": 0.003, "loss": 4.0858, "step": 7305 }, { "epoch": 0.07306, "grad_norm": 1.0874296421242524, "learning_rate": 0.003, "loss": 4.1274, "step": 7306 }, { "epoch": 0.07307, "grad_norm": 1.1513689835246426, "learning_rate": 0.003, "loss": 4.1405, "step": 7307 }, { "epoch": 0.07308, "grad_norm": 0.8638122639112095, "learning_rate": 0.003, "loss": 4.1022, "step": 7308 }, { "epoch": 0.07309, "grad_norm": 0.8255250333131889, "learning_rate": 0.003, "loss": 4.1088, "step": 7309 }, { "epoch": 0.0731, "grad_norm": 0.9337338907492057, "learning_rate": 0.003, "loss": 4.0683, "step": 7310 }, { "epoch": 0.07311, "grad_norm": 0.9258041196216139, "learning_rate": 0.003, "loss": 4.1147, "step": 7311 }, { "epoch": 0.07312, "grad_norm": 0.951937894172149, "learning_rate": 0.003, "loss": 4.1268, "step": 7312 }, { "epoch": 0.07313, "grad_norm": 1.0078253665907535, "learning_rate": 0.003, "loss": 4.1176, "step": 7313 }, { "epoch": 0.07314, "grad_norm": 0.901224372401486, "learning_rate": 0.003, "loss": 4.1279, "step": 7314 }, { "epoch": 0.07315, "grad_norm": 0.7944852254095126, "learning_rate": 0.003, "loss": 4.1111, "step": 7315 }, { "epoch": 0.07316, "grad_norm": 0.783421376743893, "learning_rate": 0.003, "loss": 4.1285, "step": 7316 }, { "epoch": 0.07317, "grad_norm": 0.8368936906337524, "learning_rate": 0.003, "loss": 4.1126, "step": 7317 }, { "epoch": 0.07318, "grad_norm": 1.033174150909009, "learning_rate": 0.003, "loss": 4.1026, "step": 7318 }, { "epoch": 0.07319, "grad_norm": 1.27108174493455, "learning_rate": 0.003, "loss": 4.1282, "step": 7319 }, { "epoch": 0.0732, "grad_norm": 0.7406462978297551, "learning_rate": 0.003, "loss": 4.0973, "step": 7320 }, { "epoch": 0.07321, "grad_norm": 0.7025965454631923, "learning_rate": 0.003, "loss": 4.1042, "step": 7321 }, { "epoch": 0.07322, "grad_norm": 0.8144605025587333, "learning_rate": 0.003, "loss": 4.1241, "step": 7322 }, { "epoch": 0.07323, "grad_norm": 0.8053334048255597, "learning_rate": 0.003, "loss": 4.1074, "step": 7323 }, { "epoch": 0.07324, "grad_norm": 0.775372831647096, "learning_rate": 0.003, "loss": 4.104, "step": 7324 }, { "epoch": 0.07325, "grad_norm": 0.8357997029570158, "learning_rate": 0.003, "loss": 4.0848, "step": 7325 }, { "epoch": 0.07326, "grad_norm": 0.8489307022162469, "learning_rate": 0.003, "loss": 4.1221, "step": 7326 }, { "epoch": 0.07327, "grad_norm": 0.8228446709367002, "learning_rate": 0.003, "loss": 4.1236, "step": 7327 }, { "epoch": 0.07328, "grad_norm": 0.7838796840554372, "learning_rate": 0.003, "loss": 4.0788, "step": 7328 }, { "epoch": 0.07329, "grad_norm": 0.7251399802311324, "learning_rate": 0.003, "loss": 4.1018, "step": 7329 }, { "epoch": 0.0733, "grad_norm": 0.766770759186389, "learning_rate": 0.003, "loss": 4.0919, "step": 7330 }, { "epoch": 0.07331, "grad_norm": 0.943369746260282, "learning_rate": 0.003, "loss": 4.0916, "step": 7331 }, { "epoch": 0.07332, "grad_norm": 1.106302603979319, "learning_rate": 0.003, "loss": 4.1229, "step": 7332 }, { "epoch": 0.07333, "grad_norm": 0.9429802103066166, "learning_rate": 0.003, "loss": 4.1125, "step": 7333 }, { "epoch": 0.07334, "grad_norm": 1.0320307959064938, "learning_rate": 0.003, "loss": 4.1108, "step": 7334 }, { "epoch": 0.07335, "grad_norm": 0.9938214659378108, "learning_rate": 0.003, "loss": 4.0842, "step": 7335 }, { "epoch": 0.07336, "grad_norm": 1.0914354610999535, "learning_rate": 0.003, "loss": 4.1311, "step": 7336 }, { "epoch": 0.07337, "grad_norm": 1.1269724240813392, "learning_rate": 0.003, "loss": 4.1287, "step": 7337 }, { "epoch": 0.07338, "grad_norm": 0.8570775179663218, "learning_rate": 0.003, "loss": 4.1056, "step": 7338 }, { "epoch": 0.07339, "grad_norm": 0.8031931079538867, "learning_rate": 0.003, "loss": 4.138, "step": 7339 }, { "epoch": 0.0734, "grad_norm": 0.8935576371259307, "learning_rate": 0.003, "loss": 4.0947, "step": 7340 }, { "epoch": 0.07341, "grad_norm": 0.9083095502174247, "learning_rate": 0.003, "loss": 4.1152, "step": 7341 }, { "epoch": 0.07342, "grad_norm": 0.9576486900412259, "learning_rate": 0.003, "loss": 4.1373, "step": 7342 }, { "epoch": 0.07343, "grad_norm": 0.9182147356345189, "learning_rate": 0.003, "loss": 4.1243, "step": 7343 }, { "epoch": 0.07344, "grad_norm": 0.8421322703250478, "learning_rate": 0.003, "loss": 4.1232, "step": 7344 }, { "epoch": 0.07345, "grad_norm": 0.7537817256778269, "learning_rate": 0.003, "loss": 4.0943, "step": 7345 }, { "epoch": 0.07346, "grad_norm": 0.7033688234808623, "learning_rate": 0.003, "loss": 4.0789, "step": 7346 }, { "epoch": 0.07347, "grad_norm": 0.7413990535788959, "learning_rate": 0.003, "loss": 4.105, "step": 7347 }, { "epoch": 0.07348, "grad_norm": 0.7773942391883343, "learning_rate": 0.003, "loss": 4.1073, "step": 7348 }, { "epoch": 0.07349, "grad_norm": 0.9329173997319489, "learning_rate": 0.003, "loss": 4.1157, "step": 7349 }, { "epoch": 0.0735, "grad_norm": 1.093903209357276, "learning_rate": 0.003, "loss": 4.1047, "step": 7350 }, { "epoch": 0.07351, "grad_norm": 0.8109483676035741, "learning_rate": 0.003, "loss": 4.0939, "step": 7351 }, { "epoch": 0.07352, "grad_norm": 0.7939494918683481, "learning_rate": 0.003, "loss": 4.114, "step": 7352 }, { "epoch": 0.07353, "grad_norm": 0.7869344084686208, "learning_rate": 0.003, "loss": 4.1333, "step": 7353 }, { "epoch": 0.07354, "grad_norm": 0.7788006156502201, "learning_rate": 0.003, "loss": 4.0946, "step": 7354 }, { "epoch": 0.07355, "grad_norm": 0.8552053349081179, "learning_rate": 0.003, "loss": 4.1168, "step": 7355 }, { "epoch": 0.07356, "grad_norm": 1.0518730489917372, "learning_rate": 0.003, "loss": 4.1226, "step": 7356 }, { "epoch": 0.07357, "grad_norm": 1.3412824434070465, "learning_rate": 0.003, "loss": 4.1117, "step": 7357 }, { "epoch": 0.07358, "grad_norm": 0.9208430362293964, "learning_rate": 0.003, "loss": 4.1214, "step": 7358 }, { "epoch": 0.07359, "grad_norm": 0.8344769448824529, "learning_rate": 0.003, "loss": 4.1195, "step": 7359 }, { "epoch": 0.0736, "grad_norm": 0.6793410562113039, "learning_rate": 0.003, "loss": 4.1272, "step": 7360 }, { "epoch": 0.07361, "grad_norm": 0.6267991527813198, "learning_rate": 0.003, "loss": 4.0986, "step": 7361 }, { "epoch": 0.07362, "grad_norm": 0.690161715844307, "learning_rate": 0.003, "loss": 4.1237, "step": 7362 }, { "epoch": 0.07363, "grad_norm": 0.7869927998336663, "learning_rate": 0.003, "loss": 4.0909, "step": 7363 }, { "epoch": 0.07364, "grad_norm": 0.8027534998077103, "learning_rate": 0.003, "loss": 4.1247, "step": 7364 }, { "epoch": 0.07365, "grad_norm": 0.6955286691239817, "learning_rate": 0.003, "loss": 4.0722, "step": 7365 }, { "epoch": 0.07366, "grad_norm": 0.7372804832228109, "learning_rate": 0.003, "loss": 4.1043, "step": 7366 }, { "epoch": 0.07367, "grad_norm": 0.6394799490289566, "learning_rate": 0.003, "loss": 4.1128, "step": 7367 }, { "epoch": 0.07368, "grad_norm": 0.509023284762404, "learning_rate": 0.003, "loss": 4.1263, "step": 7368 }, { "epoch": 0.07369, "grad_norm": 0.5616436196591941, "learning_rate": 0.003, "loss": 4.0731, "step": 7369 }, { "epoch": 0.0737, "grad_norm": 0.6425475395508851, "learning_rate": 0.003, "loss": 4.1058, "step": 7370 }, { "epoch": 0.07371, "grad_norm": 0.7368588731675243, "learning_rate": 0.003, "loss": 4.1263, "step": 7371 }, { "epoch": 0.07372, "grad_norm": 0.9085719201340855, "learning_rate": 0.003, "loss": 4.1092, "step": 7372 }, { "epoch": 0.07373, "grad_norm": 1.2408545702032314, "learning_rate": 0.003, "loss": 4.1187, "step": 7373 }, { "epoch": 0.07374, "grad_norm": 0.7374902067189273, "learning_rate": 0.003, "loss": 4.0977, "step": 7374 }, { "epoch": 0.07375, "grad_norm": 0.6163993980166736, "learning_rate": 0.003, "loss": 4.1025, "step": 7375 }, { "epoch": 0.07376, "grad_norm": 0.6423268145367569, "learning_rate": 0.003, "loss": 4.1068, "step": 7376 }, { "epoch": 0.07377, "grad_norm": 0.7533167250559268, "learning_rate": 0.003, "loss": 4.1055, "step": 7377 }, { "epoch": 0.07378, "grad_norm": 0.8584299528911417, "learning_rate": 0.003, "loss": 4.1159, "step": 7378 }, { "epoch": 0.07379, "grad_norm": 1.0096294225047744, "learning_rate": 0.003, "loss": 4.0922, "step": 7379 }, { "epoch": 0.0738, "grad_norm": 1.0223154674326658, "learning_rate": 0.003, "loss": 4.0808, "step": 7380 }, { "epoch": 0.07381, "grad_norm": 0.8907126146182821, "learning_rate": 0.003, "loss": 4.0911, "step": 7381 }, { "epoch": 0.07382, "grad_norm": 0.7207053071921802, "learning_rate": 0.003, "loss": 4.1337, "step": 7382 }, { "epoch": 0.07383, "grad_norm": 0.9145179387624196, "learning_rate": 0.003, "loss": 4.1137, "step": 7383 }, { "epoch": 0.07384, "grad_norm": 1.0098905122982873, "learning_rate": 0.003, "loss": 4.0914, "step": 7384 }, { "epoch": 0.07385, "grad_norm": 1.198216983974359, "learning_rate": 0.003, "loss": 4.1127, "step": 7385 }, { "epoch": 0.07386, "grad_norm": 0.7055995485988885, "learning_rate": 0.003, "loss": 4.1143, "step": 7386 }, { "epoch": 0.07387, "grad_norm": 0.6341184291168993, "learning_rate": 0.003, "loss": 4.0867, "step": 7387 }, { "epoch": 0.07388, "grad_norm": 0.6305102045443429, "learning_rate": 0.003, "loss": 4.1095, "step": 7388 }, { "epoch": 0.07389, "grad_norm": 0.7729018243590356, "learning_rate": 0.003, "loss": 4.0967, "step": 7389 }, { "epoch": 0.0739, "grad_norm": 1.046784353921616, "learning_rate": 0.003, "loss": 4.093, "step": 7390 }, { "epoch": 0.07391, "grad_norm": 1.003334204394043, "learning_rate": 0.003, "loss": 4.1185, "step": 7391 }, { "epoch": 0.07392, "grad_norm": 0.867653951162738, "learning_rate": 0.003, "loss": 4.0738, "step": 7392 }, { "epoch": 0.07393, "grad_norm": 0.7970634630521443, "learning_rate": 0.003, "loss": 4.1089, "step": 7393 }, { "epoch": 0.07394, "grad_norm": 0.9057360666683115, "learning_rate": 0.003, "loss": 4.1067, "step": 7394 }, { "epoch": 0.07395, "grad_norm": 0.9547098856968925, "learning_rate": 0.003, "loss": 4.0904, "step": 7395 }, { "epoch": 0.07396, "grad_norm": 0.9234933056161622, "learning_rate": 0.003, "loss": 4.1095, "step": 7396 }, { "epoch": 0.07397, "grad_norm": 1.0368747780491556, "learning_rate": 0.003, "loss": 4.1127, "step": 7397 }, { "epoch": 0.07398, "grad_norm": 1.0403397267434613, "learning_rate": 0.003, "loss": 4.1108, "step": 7398 }, { "epoch": 0.07399, "grad_norm": 0.9544666764070975, "learning_rate": 0.003, "loss": 4.0884, "step": 7399 }, { "epoch": 0.074, "grad_norm": 0.8872295611618851, "learning_rate": 0.003, "loss": 4.1285, "step": 7400 }, { "epoch": 0.07401, "grad_norm": 0.9340892887170521, "learning_rate": 0.003, "loss": 4.0935, "step": 7401 }, { "epoch": 0.07402, "grad_norm": 0.8784569523984895, "learning_rate": 0.003, "loss": 4.1129, "step": 7402 }, { "epoch": 0.07403, "grad_norm": 0.8660676350594653, "learning_rate": 0.003, "loss": 4.1211, "step": 7403 }, { "epoch": 0.07404, "grad_norm": 0.9009419907324966, "learning_rate": 0.003, "loss": 4.1556, "step": 7404 }, { "epoch": 0.07405, "grad_norm": 0.8106715007698777, "learning_rate": 0.003, "loss": 4.1102, "step": 7405 }, { "epoch": 0.07406, "grad_norm": 0.807586011085066, "learning_rate": 0.003, "loss": 4.1109, "step": 7406 }, { "epoch": 0.07407, "grad_norm": 0.8628667672682003, "learning_rate": 0.003, "loss": 4.1302, "step": 7407 }, { "epoch": 0.07408, "grad_norm": 0.9415613388154968, "learning_rate": 0.003, "loss": 4.1262, "step": 7408 }, { "epoch": 0.07409, "grad_norm": 0.9481965047839528, "learning_rate": 0.003, "loss": 4.1119, "step": 7409 }, { "epoch": 0.0741, "grad_norm": 1.0223140305985579, "learning_rate": 0.003, "loss": 4.1325, "step": 7410 }, { "epoch": 0.07411, "grad_norm": 1.1261250470212472, "learning_rate": 0.003, "loss": 4.1286, "step": 7411 }, { "epoch": 0.07412, "grad_norm": 0.8452821990703466, "learning_rate": 0.003, "loss": 4.1093, "step": 7412 }, { "epoch": 0.07413, "grad_norm": 0.7070647319303838, "learning_rate": 0.003, "loss": 4.106, "step": 7413 }, { "epoch": 0.07414, "grad_norm": 0.6602409738515731, "learning_rate": 0.003, "loss": 4.1058, "step": 7414 }, { "epoch": 0.07415, "grad_norm": 0.6813711085208377, "learning_rate": 0.003, "loss": 4.1048, "step": 7415 }, { "epoch": 0.07416, "grad_norm": 0.7813248550198105, "learning_rate": 0.003, "loss": 4.0889, "step": 7416 }, { "epoch": 0.07417, "grad_norm": 0.7903284361595316, "learning_rate": 0.003, "loss": 4.111, "step": 7417 }, { "epoch": 0.07418, "grad_norm": 0.8776997591398797, "learning_rate": 0.003, "loss": 4.0908, "step": 7418 }, { "epoch": 0.07419, "grad_norm": 0.9346374935990277, "learning_rate": 0.003, "loss": 4.0782, "step": 7419 }, { "epoch": 0.0742, "grad_norm": 0.8167496983343525, "learning_rate": 0.003, "loss": 4.1193, "step": 7420 }, { "epoch": 0.07421, "grad_norm": 0.7740749507997752, "learning_rate": 0.003, "loss": 4.0893, "step": 7421 }, { "epoch": 0.07422, "grad_norm": 0.7797923880019133, "learning_rate": 0.003, "loss": 4.111, "step": 7422 }, { "epoch": 0.07423, "grad_norm": 0.7246765372540556, "learning_rate": 0.003, "loss": 4.0866, "step": 7423 }, { "epoch": 0.07424, "grad_norm": 0.7739364594038591, "learning_rate": 0.003, "loss": 4.1145, "step": 7424 }, { "epoch": 0.07425, "grad_norm": 0.8561033384244009, "learning_rate": 0.003, "loss": 4.0791, "step": 7425 }, { "epoch": 0.07426, "grad_norm": 1.0873991072382838, "learning_rate": 0.003, "loss": 4.0837, "step": 7426 }, { "epoch": 0.07427, "grad_norm": 1.0969759409269335, "learning_rate": 0.003, "loss": 4.0897, "step": 7427 }, { "epoch": 0.07428, "grad_norm": 0.7321830844983641, "learning_rate": 0.003, "loss": 4.1327, "step": 7428 }, { "epoch": 0.07429, "grad_norm": 0.6644474298927815, "learning_rate": 0.003, "loss": 4.1055, "step": 7429 }, { "epoch": 0.0743, "grad_norm": 0.6883336057236158, "learning_rate": 0.003, "loss": 4.0949, "step": 7430 }, { "epoch": 0.07431, "grad_norm": 0.7448399088605852, "learning_rate": 0.003, "loss": 4.0903, "step": 7431 }, { "epoch": 0.07432, "grad_norm": 0.8377015855878724, "learning_rate": 0.003, "loss": 4.1129, "step": 7432 }, { "epoch": 0.07433, "grad_norm": 0.8455466117001035, "learning_rate": 0.003, "loss": 4.0954, "step": 7433 }, { "epoch": 0.07434, "grad_norm": 0.8038998713277352, "learning_rate": 0.003, "loss": 4.069, "step": 7434 }, { "epoch": 0.07435, "grad_norm": 0.7628824415481451, "learning_rate": 0.003, "loss": 4.1063, "step": 7435 }, { "epoch": 0.07436, "grad_norm": 0.8471666681896792, "learning_rate": 0.003, "loss": 4.1293, "step": 7436 }, { "epoch": 0.07437, "grad_norm": 1.022193249171131, "learning_rate": 0.003, "loss": 4.0747, "step": 7437 }, { "epoch": 0.07438, "grad_norm": 0.954279749528698, "learning_rate": 0.003, "loss": 4.1191, "step": 7438 }, { "epoch": 0.07439, "grad_norm": 0.9359744466674262, "learning_rate": 0.003, "loss": 4.1056, "step": 7439 }, { "epoch": 0.0744, "grad_norm": 0.985750832333834, "learning_rate": 0.003, "loss": 4.1168, "step": 7440 }, { "epoch": 0.07441, "grad_norm": 1.0387568082669436, "learning_rate": 0.003, "loss": 4.1552, "step": 7441 }, { "epoch": 0.07442, "grad_norm": 0.9354486578980633, "learning_rate": 0.003, "loss": 4.1282, "step": 7442 }, { "epoch": 0.07443, "grad_norm": 0.8937562700947757, "learning_rate": 0.003, "loss": 4.1114, "step": 7443 }, { "epoch": 0.07444, "grad_norm": 0.7862746074501281, "learning_rate": 0.003, "loss": 4.1268, "step": 7444 }, { "epoch": 0.07445, "grad_norm": 0.7580214165836215, "learning_rate": 0.003, "loss": 4.1077, "step": 7445 }, { "epoch": 0.07446, "grad_norm": 0.826490781957247, "learning_rate": 0.003, "loss": 4.0912, "step": 7446 }, { "epoch": 0.07447, "grad_norm": 0.78561092743045, "learning_rate": 0.003, "loss": 4.0969, "step": 7447 }, { "epoch": 0.07448, "grad_norm": 0.8720738755090752, "learning_rate": 0.003, "loss": 4.0934, "step": 7448 }, { "epoch": 0.07449, "grad_norm": 1.1356579541218612, "learning_rate": 0.003, "loss": 4.1163, "step": 7449 }, { "epoch": 0.0745, "grad_norm": 1.194362940520485, "learning_rate": 0.003, "loss": 4.1263, "step": 7450 }, { "epoch": 0.07451, "grad_norm": 0.8425811609172174, "learning_rate": 0.003, "loss": 4.115, "step": 7451 }, { "epoch": 0.07452, "grad_norm": 0.9146392058972959, "learning_rate": 0.003, "loss": 4.0891, "step": 7452 }, { "epoch": 0.07453, "grad_norm": 1.0026955605148151, "learning_rate": 0.003, "loss": 4.1274, "step": 7453 }, { "epoch": 0.07454, "grad_norm": 1.0112038508522885, "learning_rate": 0.003, "loss": 4.1031, "step": 7454 }, { "epoch": 0.07455, "grad_norm": 1.168191155504265, "learning_rate": 0.003, "loss": 4.1204, "step": 7455 }, { "epoch": 0.07456, "grad_norm": 0.7449918001899162, "learning_rate": 0.003, "loss": 4.1173, "step": 7456 }, { "epoch": 0.07457, "grad_norm": 0.7986467621349783, "learning_rate": 0.003, "loss": 4.1105, "step": 7457 }, { "epoch": 0.07458, "grad_norm": 0.6606779110991354, "learning_rate": 0.003, "loss": 4.1158, "step": 7458 }, { "epoch": 0.07459, "grad_norm": 0.74600487207374, "learning_rate": 0.003, "loss": 4.0996, "step": 7459 }, { "epoch": 0.0746, "grad_norm": 0.7705606291540048, "learning_rate": 0.003, "loss": 4.1096, "step": 7460 }, { "epoch": 0.07461, "grad_norm": 0.8369060675928409, "learning_rate": 0.003, "loss": 4.1182, "step": 7461 }, { "epoch": 0.07462, "grad_norm": 0.9604504411667463, "learning_rate": 0.003, "loss": 4.0834, "step": 7462 }, { "epoch": 0.07463, "grad_norm": 1.018164117686845, "learning_rate": 0.003, "loss": 4.1448, "step": 7463 }, { "epoch": 0.07464, "grad_norm": 0.8462415172519894, "learning_rate": 0.003, "loss": 4.0874, "step": 7464 }, { "epoch": 0.07465, "grad_norm": 0.7280333713094582, "learning_rate": 0.003, "loss": 4.1067, "step": 7465 }, { "epoch": 0.07466, "grad_norm": 0.6527117875066514, "learning_rate": 0.003, "loss": 4.1199, "step": 7466 }, { "epoch": 0.07467, "grad_norm": 0.7283692800044493, "learning_rate": 0.003, "loss": 4.0912, "step": 7467 }, { "epoch": 0.07468, "grad_norm": 0.7286232701262282, "learning_rate": 0.003, "loss": 4.1121, "step": 7468 }, { "epoch": 0.07469, "grad_norm": 0.7554001099044895, "learning_rate": 0.003, "loss": 4.0758, "step": 7469 }, { "epoch": 0.0747, "grad_norm": 0.6494769448278457, "learning_rate": 0.003, "loss": 4.1238, "step": 7470 }, { "epoch": 0.07471, "grad_norm": 0.582239848281011, "learning_rate": 0.003, "loss": 4.1034, "step": 7471 }, { "epoch": 0.07472, "grad_norm": 0.6054171618519452, "learning_rate": 0.003, "loss": 4.1059, "step": 7472 }, { "epoch": 0.07473, "grad_norm": 0.5800597140442034, "learning_rate": 0.003, "loss": 4.1072, "step": 7473 }, { "epoch": 0.07474, "grad_norm": 0.6418775672195469, "learning_rate": 0.003, "loss": 4.0892, "step": 7474 }, { "epoch": 0.07475, "grad_norm": 0.8209219098567517, "learning_rate": 0.003, "loss": 4.1065, "step": 7475 }, { "epoch": 0.07476, "grad_norm": 1.0927372326738478, "learning_rate": 0.003, "loss": 4.0789, "step": 7476 }, { "epoch": 0.07477, "grad_norm": 1.1813159129321458, "learning_rate": 0.003, "loss": 4.1158, "step": 7477 }, { "epoch": 0.07478, "grad_norm": 0.7297682403726699, "learning_rate": 0.003, "loss": 4.0565, "step": 7478 }, { "epoch": 0.07479, "grad_norm": 0.6410942452556853, "learning_rate": 0.003, "loss": 4.074, "step": 7479 }, { "epoch": 0.0748, "grad_norm": 0.8698329147222141, "learning_rate": 0.003, "loss": 4.0961, "step": 7480 }, { "epoch": 0.07481, "grad_norm": 1.0082944565315157, "learning_rate": 0.003, "loss": 4.1108, "step": 7481 }, { "epoch": 0.07482, "grad_norm": 0.9628391840892444, "learning_rate": 0.003, "loss": 4.1099, "step": 7482 }, { "epoch": 0.07483, "grad_norm": 0.8641925302412802, "learning_rate": 0.003, "loss": 4.1281, "step": 7483 }, { "epoch": 0.07484, "grad_norm": 0.7533282082846949, "learning_rate": 0.003, "loss": 4.1103, "step": 7484 }, { "epoch": 0.07485, "grad_norm": 0.7703693706278185, "learning_rate": 0.003, "loss": 4.1, "step": 7485 }, { "epoch": 0.07486, "grad_norm": 0.8291299861969627, "learning_rate": 0.003, "loss": 4.0883, "step": 7486 }, { "epoch": 0.07487, "grad_norm": 0.9151584554564175, "learning_rate": 0.003, "loss": 4.107, "step": 7487 }, { "epoch": 0.07488, "grad_norm": 1.0172079023902525, "learning_rate": 0.003, "loss": 4.1177, "step": 7488 }, { "epoch": 0.07489, "grad_norm": 0.985794974411023, "learning_rate": 0.003, "loss": 4.1284, "step": 7489 }, { "epoch": 0.0749, "grad_norm": 0.7878742848581212, "learning_rate": 0.003, "loss": 4.093, "step": 7490 }, { "epoch": 0.07491, "grad_norm": 0.7562126231512927, "learning_rate": 0.003, "loss": 4.0998, "step": 7491 }, { "epoch": 0.07492, "grad_norm": 0.7316445879371943, "learning_rate": 0.003, "loss": 4.1174, "step": 7492 }, { "epoch": 0.07493, "grad_norm": 0.7015263206527975, "learning_rate": 0.003, "loss": 4.1081, "step": 7493 }, { "epoch": 0.07494, "grad_norm": 0.7094674720974151, "learning_rate": 0.003, "loss": 4.1219, "step": 7494 }, { "epoch": 0.07495, "grad_norm": 0.8005067312090948, "learning_rate": 0.003, "loss": 4.0987, "step": 7495 }, { "epoch": 0.07496, "grad_norm": 1.0221046723780693, "learning_rate": 0.003, "loss": 4.0976, "step": 7496 }, { "epoch": 0.07497, "grad_norm": 1.1782769790208016, "learning_rate": 0.003, "loss": 4.1157, "step": 7497 }, { "epoch": 0.07498, "grad_norm": 0.8031446202765408, "learning_rate": 0.003, "loss": 4.0992, "step": 7498 }, { "epoch": 0.07499, "grad_norm": 0.7861635761156994, "learning_rate": 0.003, "loss": 4.103, "step": 7499 }, { "epoch": 0.075, "grad_norm": 0.7398205403713403, "learning_rate": 0.003, "loss": 4.1212, "step": 7500 }, { "epoch": 0.07501, "grad_norm": 0.8734378274009751, "learning_rate": 0.003, "loss": 4.0961, "step": 7501 }, { "epoch": 0.07502, "grad_norm": 0.8830879959651403, "learning_rate": 0.003, "loss": 4.095, "step": 7502 }, { "epoch": 0.07503, "grad_norm": 0.8281346250649004, "learning_rate": 0.003, "loss": 4.0943, "step": 7503 }, { "epoch": 0.07504, "grad_norm": 0.8072671794855871, "learning_rate": 0.003, "loss": 4.0967, "step": 7504 }, { "epoch": 0.07505, "grad_norm": 0.8666820809776766, "learning_rate": 0.003, "loss": 4.0843, "step": 7505 }, { "epoch": 0.07506, "grad_norm": 1.0055552931832379, "learning_rate": 0.003, "loss": 4.101, "step": 7506 }, { "epoch": 0.07507, "grad_norm": 0.9690975893829946, "learning_rate": 0.003, "loss": 4.0889, "step": 7507 }, { "epoch": 0.07508, "grad_norm": 0.9561250009010052, "learning_rate": 0.003, "loss": 4.1303, "step": 7508 }, { "epoch": 0.07509, "grad_norm": 1.0877559050066379, "learning_rate": 0.003, "loss": 4.1123, "step": 7509 }, { "epoch": 0.0751, "grad_norm": 1.0027154307652972, "learning_rate": 0.003, "loss": 4.1077, "step": 7510 }, { "epoch": 0.07511, "grad_norm": 1.0165739974701828, "learning_rate": 0.003, "loss": 4.1, "step": 7511 }, { "epoch": 0.07512, "grad_norm": 1.0057419295722907, "learning_rate": 0.003, "loss": 4.1394, "step": 7512 }, { "epoch": 0.07513, "grad_norm": 0.9433174355296526, "learning_rate": 0.003, "loss": 4.12, "step": 7513 }, { "epoch": 0.07514, "grad_norm": 1.0313875668855543, "learning_rate": 0.003, "loss": 4.0825, "step": 7514 }, { "epoch": 0.07515, "grad_norm": 1.0147222353644576, "learning_rate": 0.003, "loss": 4.1449, "step": 7515 }, { "epoch": 0.07516, "grad_norm": 0.9245083215799488, "learning_rate": 0.003, "loss": 4.0957, "step": 7516 }, { "epoch": 0.07517, "grad_norm": 0.8259475443651306, "learning_rate": 0.003, "loss": 4.1198, "step": 7517 }, { "epoch": 0.07518, "grad_norm": 0.7587280215699812, "learning_rate": 0.003, "loss": 4.0948, "step": 7518 }, { "epoch": 0.07519, "grad_norm": 0.8033857629693775, "learning_rate": 0.003, "loss": 4.1064, "step": 7519 }, { "epoch": 0.0752, "grad_norm": 0.6891155253835879, "learning_rate": 0.003, "loss": 4.1064, "step": 7520 }, { "epoch": 0.07521, "grad_norm": 0.6668629114515775, "learning_rate": 0.003, "loss": 4.1085, "step": 7521 }, { "epoch": 0.07522, "grad_norm": 0.6646628095819561, "learning_rate": 0.003, "loss": 4.0908, "step": 7522 }, { "epoch": 0.07523, "grad_norm": 0.6810426992617306, "learning_rate": 0.003, "loss": 4.0872, "step": 7523 }, { "epoch": 0.07524, "grad_norm": 0.5957519856201996, "learning_rate": 0.003, "loss": 4.112, "step": 7524 }, { "epoch": 0.07525, "grad_norm": 0.6304965471697975, "learning_rate": 0.003, "loss": 4.122, "step": 7525 }, { "epoch": 0.07526, "grad_norm": 0.7044075744869454, "learning_rate": 0.003, "loss": 4.1186, "step": 7526 }, { "epoch": 0.07527, "grad_norm": 0.8184433815359451, "learning_rate": 0.003, "loss": 4.1181, "step": 7527 }, { "epoch": 0.07528, "grad_norm": 0.8298479343150946, "learning_rate": 0.003, "loss": 4.1225, "step": 7528 }, { "epoch": 0.07529, "grad_norm": 0.7555105418948039, "learning_rate": 0.003, "loss": 4.0916, "step": 7529 }, { "epoch": 0.0753, "grad_norm": 0.62755290323034, "learning_rate": 0.003, "loss": 4.0864, "step": 7530 }, { "epoch": 0.07531, "grad_norm": 0.6259328988343698, "learning_rate": 0.003, "loss": 4.1175, "step": 7531 }, { "epoch": 0.07532, "grad_norm": 0.7024264501793085, "learning_rate": 0.003, "loss": 4.1066, "step": 7532 }, { "epoch": 0.07533, "grad_norm": 0.7976246816821072, "learning_rate": 0.003, "loss": 4.092, "step": 7533 }, { "epoch": 0.07534, "grad_norm": 0.9491149711829591, "learning_rate": 0.003, "loss": 4.1022, "step": 7534 }, { "epoch": 0.07535, "grad_norm": 0.9702246438073734, "learning_rate": 0.003, "loss": 4.1152, "step": 7535 }, { "epoch": 0.07536, "grad_norm": 0.8833175400088759, "learning_rate": 0.003, "loss": 4.0986, "step": 7536 }, { "epoch": 0.07537, "grad_norm": 1.0566640021173932, "learning_rate": 0.003, "loss": 4.0928, "step": 7537 }, { "epoch": 0.07538, "grad_norm": 1.0524050892778398, "learning_rate": 0.003, "loss": 4.1158, "step": 7538 }, { "epoch": 0.07539, "grad_norm": 0.9461058512204453, "learning_rate": 0.003, "loss": 4.087, "step": 7539 }, { "epoch": 0.0754, "grad_norm": 0.8671896332276945, "learning_rate": 0.003, "loss": 4.1062, "step": 7540 }, { "epoch": 0.07541, "grad_norm": 0.9370741051157787, "learning_rate": 0.003, "loss": 4.0907, "step": 7541 }, { "epoch": 0.07542, "grad_norm": 0.7655033736787794, "learning_rate": 0.003, "loss": 4.1052, "step": 7542 }, { "epoch": 0.07543, "grad_norm": 0.9638854244860205, "learning_rate": 0.003, "loss": 4.1295, "step": 7543 }, { "epoch": 0.07544, "grad_norm": 1.151245583846751, "learning_rate": 0.003, "loss": 4.1281, "step": 7544 }, { "epoch": 0.07545, "grad_norm": 1.045720500701298, "learning_rate": 0.003, "loss": 4.1496, "step": 7545 }, { "epoch": 0.07546, "grad_norm": 0.8508437267643452, "learning_rate": 0.003, "loss": 4.1, "step": 7546 }, { "epoch": 0.07547, "grad_norm": 0.8758265191573648, "learning_rate": 0.003, "loss": 4.1213, "step": 7547 }, { "epoch": 0.07548, "grad_norm": 1.0308655234346675, "learning_rate": 0.003, "loss": 4.1395, "step": 7548 }, { "epoch": 0.07549, "grad_norm": 1.1222648672525313, "learning_rate": 0.003, "loss": 4.1407, "step": 7549 }, { "epoch": 0.0755, "grad_norm": 1.0347446053481122, "learning_rate": 0.003, "loss": 4.1338, "step": 7550 }, { "epoch": 0.07551, "grad_norm": 0.8951008390390673, "learning_rate": 0.003, "loss": 4.1237, "step": 7551 }, { "epoch": 0.07552, "grad_norm": 0.867925078672574, "learning_rate": 0.003, "loss": 4.1268, "step": 7552 }, { "epoch": 0.07553, "grad_norm": 0.8525722259854441, "learning_rate": 0.003, "loss": 4.1311, "step": 7553 }, { "epoch": 0.07554, "grad_norm": 0.9114938094018381, "learning_rate": 0.003, "loss": 4.1279, "step": 7554 }, { "epoch": 0.07555, "grad_norm": 0.9344191657970983, "learning_rate": 0.003, "loss": 4.1158, "step": 7555 }, { "epoch": 0.07556, "grad_norm": 1.0145169044440256, "learning_rate": 0.003, "loss": 4.1376, "step": 7556 }, { "epoch": 0.07557, "grad_norm": 1.008268955978688, "learning_rate": 0.003, "loss": 4.1073, "step": 7557 }, { "epoch": 0.07558, "grad_norm": 0.9718709803852565, "learning_rate": 0.003, "loss": 4.1114, "step": 7558 }, { "epoch": 0.07559, "grad_norm": 1.056325136300485, "learning_rate": 0.003, "loss": 4.082, "step": 7559 }, { "epoch": 0.0756, "grad_norm": 0.9740547207080791, "learning_rate": 0.003, "loss": 4.1236, "step": 7560 }, { "epoch": 0.07561, "grad_norm": 0.9083193763007056, "learning_rate": 0.003, "loss": 4.1057, "step": 7561 }, { "epoch": 0.07562, "grad_norm": 0.8448439316271258, "learning_rate": 0.003, "loss": 4.0831, "step": 7562 }, { "epoch": 0.07563, "grad_norm": 0.9065754257807024, "learning_rate": 0.003, "loss": 4.1157, "step": 7563 }, { "epoch": 0.07564, "grad_norm": 0.8560571418992496, "learning_rate": 0.003, "loss": 4.1322, "step": 7564 }, { "epoch": 0.07565, "grad_norm": 0.8274853334246817, "learning_rate": 0.003, "loss": 4.1122, "step": 7565 }, { "epoch": 0.07566, "grad_norm": 0.8273427382041143, "learning_rate": 0.003, "loss": 4.1077, "step": 7566 }, { "epoch": 0.07567, "grad_norm": 0.8118867625373324, "learning_rate": 0.003, "loss": 4.1221, "step": 7567 }, { "epoch": 0.07568, "grad_norm": 0.6681343989273923, "learning_rate": 0.003, "loss": 4.1111, "step": 7568 }, { "epoch": 0.07569, "grad_norm": 0.5803814722946369, "learning_rate": 0.003, "loss": 4.0888, "step": 7569 }, { "epoch": 0.0757, "grad_norm": 0.5510764495582413, "learning_rate": 0.003, "loss": 4.0851, "step": 7570 }, { "epoch": 0.07571, "grad_norm": 0.6627010068288498, "learning_rate": 0.003, "loss": 4.0794, "step": 7571 }, { "epoch": 0.07572, "grad_norm": 0.6455742336359287, "learning_rate": 0.003, "loss": 4.0783, "step": 7572 }, { "epoch": 0.07573, "grad_norm": 0.7410211759586838, "learning_rate": 0.003, "loss": 4.0982, "step": 7573 }, { "epoch": 0.07574, "grad_norm": 0.7937533836416206, "learning_rate": 0.003, "loss": 4.1165, "step": 7574 }, { "epoch": 0.07575, "grad_norm": 0.7151578069937585, "learning_rate": 0.003, "loss": 4.0948, "step": 7575 }, { "epoch": 0.07576, "grad_norm": 0.6757485878022385, "learning_rate": 0.003, "loss": 4.0884, "step": 7576 }, { "epoch": 0.07577, "grad_norm": 0.7377828856053927, "learning_rate": 0.003, "loss": 4.122, "step": 7577 }, { "epoch": 0.07578, "grad_norm": 0.9295354995808791, "learning_rate": 0.003, "loss": 4.1085, "step": 7578 }, { "epoch": 0.07579, "grad_norm": 1.1308441721629587, "learning_rate": 0.003, "loss": 4.1034, "step": 7579 }, { "epoch": 0.0758, "grad_norm": 0.9537849948969392, "learning_rate": 0.003, "loss": 4.1163, "step": 7580 }, { "epoch": 0.07581, "grad_norm": 0.9648991721783183, "learning_rate": 0.003, "loss": 4.1097, "step": 7581 }, { "epoch": 0.07582, "grad_norm": 0.926845739832675, "learning_rate": 0.003, "loss": 4.1037, "step": 7582 }, { "epoch": 0.07583, "grad_norm": 0.894884269204141, "learning_rate": 0.003, "loss": 4.1021, "step": 7583 }, { "epoch": 0.07584, "grad_norm": 0.8924573565922338, "learning_rate": 0.003, "loss": 4.1477, "step": 7584 }, { "epoch": 0.07585, "grad_norm": 0.9888084614023944, "learning_rate": 0.003, "loss": 4.1151, "step": 7585 }, { "epoch": 0.07586, "grad_norm": 1.1114233056422533, "learning_rate": 0.003, "loss": 4.1001, "step": 7586 }, { "epoch": 0.07587, "grad_norm": 0.8144693201284348, "learning_rate": 0.003, "loss": 4.1327, "step": 7587 }, { "epoch": 0.07588, "grad_norm": 0.8305575966505442, "learning_rate": 0.003, "loss": 4.1078, "step": 7588 }, { "epoch": 0.07589, "grad_norm": 0.8498435104769524, "learning_rate": 0.003, "loss": 4.1152, "step": 7589 }, { "epoch": 0.0759, "grad_norm": 0.8188810092794875, "learning_rate": 0.003, "loss": 4.1095, "step": 7590 }, { "epoch": 0.07591, "grad_norm": 0.7698614390712419, "learning_rate": 0.003, "loss": 4.1173, "step": 7591 }, { "epoch": 0.07592, "grad_norm": 0.5909542261154095, "learning_rate": 0.003, "loss": 4.1137, "step": 7592 }, { "epoch": 0.07593, "grad_norm": 0.6169915126769421, "learning_rate": 0.003, "loss": 4.0775, "step": 7593 }, { "epoch": 0.07594, "grad_norm": 0.6183226824646945, "learning_rate": 0.003, "loss": 4.0821, "step": 7594 }, { "epoch": 0.07595, "grad_norm": 0.6295258139827735, "learning_rate": 0.003, "loss": 4.0829, "step": 7595 }, { "epoch": 0.07596, "grad_norm": 0.5647538604253549, "learning_rate": 0.003, "loss": 4.0732, "step": 7596 }, { "epoch": 0.07597, "grad_norm": 0.5488719464323173, "learning_rate": 0.003, "loss": 4.1142, "step": 7597 }, { "epoch": 0.07598, "grad_norm": 0.5190318492649775, "learning_rate": 0.003, "loss": 4.1075, "step": 7598 }, { "epoch": 0.07599, "grad_norm": 0.5944308410673468, "learning_rate": 0.003, "loss": 4.1082, "step": 7599 }, { "epoch": 0.076, "grad_norm": 0.6645839697886495, "learning_rate": 0.003, "loss": 4.0989, "step": 7600 }, { "epoch": 0.07601, "grad_norm": 0.9191079894373272, "learning_rate": 0.003, "loss": 4.1219, "step": 7601 }, { "epoch": 0.07602, "grad_norm": 1.2588970299218996, "learning_rate": 0.003, "loss": 4.1049, "step": 7602 }, { "epoch": 0.07603, "grad_norm": 0.9641328600344435, "learning_rate": 0.003, "loss": 4.1006, "step": 7603 }, { "epoch": 0.07604, "grad_norm": 1.0002475506914794, "learning_rate": 0.003, "loss": 4.1197, "step": 7604 }, { "epoch": 0.07605, "grad_norm": 0.9041839404022576, "learning_rate": 0.003, "loss": 4.11, "step": 7605 }, { "epoch": 0.07606, "grad_norm": 0.8495943134663567, "learning_rate": 0.003, "loss": 4.1653, "step": 7606 }, { "epoch": 0.07607, "grad_norm": 0.800744853771563, "learning_rate": 0.003, "loss": 4.1199, "step": 7607 }, { "epoch": 0.07608, "grad_norm": 0.7315846867215643, "learning_rate": 0.003, "loss": 4.13, "step": 7608 }, { "epoch": 0.07609, "grad_norm": 0.6443317008016733, "learning_rate": 0.003, "loss": 4.1027, "step": 7609 }, { "epoch": 0.0761, "grad_norm": 0.6907982352452112, "learning_rate": 0.003, "loss": 4.098, "step": 7610 }, { "epoch": 0.07611, "grad_norm": 0.857723111168813, "learning_rate": 0.003, "loss": 4.1294, "step": 7611 }, { "epoch": 0.07612, "grad_norm": 1.0992578425673403, "learning_rate": 0.003, "loss": 4.1009, "step": 7612 }, { "epoch": 0.07613, "grad_norm": 0.9526866939763878, "learning_rate": 0.003, "loss": 4.0971, "step": 7613 }, { "epoch": 0.07614, "grad_norm": 0.7803177657645249, "learning_rate": 0.003, "loss": 4.0788, "step": 7614 }, { "epoch": 0.07615, "grad_norm": 0.7081390563241489, "learning_rate": 0.003, "loss": 4.1014, "step": 7615 }, { "epoch": 0.07616, "grad_norm": 0.7252993228592656, "learning_rate": 0.003, "loss": 4.1019, "step": 7616 }, { "epoch": 0.07617, "grad_norm": 0.7475989787312242, "learning_rate": 0.003, "loss": 4.0668, "step": 7617 }, { "epoch": 0.07618, "grad_norm": 0.7573711078639473, "learning_rate": 0.003, "loss": 4.1336, "step": 7618 }, { "epoch": 0.07619, "grad_norm": 1.0541262061063688, "learning_rate": 0.003, "loss": 4.1161, "step": 7619 }, { "epoch": 0.0762, "grad_norm": 1.1895056232233716, "learning_rate": 0.003, "loss": 4.0943, "step": 7620 }, { "epoch": 0.07621, "grad_norm": 0.8742054868789058, "learning_rate": 0.003, "loss": 4.1209, "step": 7621 }, { "epoch": 0.07622, "grad_norm": 0.8444291423398331, "learning_rate": 0.003, "loss": 4.1195, "step": 7622 }, { "epoch": 0.07623, "grad_norm": 0.8922019849127748, "learning_rate": 0.003, "loss": 4.1048, "step": 7623 }, { "epoch": 0.07624, "grad_norm": 0.803083822831012, "learning_rate": 0.003, "loss": 4.1052, "step": 7624 }, { "epoch": 0.07625, "grad_norm": 0.7385150275748038, "learning_rate": 0.003, "loss": 4.0924, "step": 7625 }, { "epoch": 0.07626, "grad_norm": 0.6711142915115071, "learning_rate": 0.003, "loss": 4.1038, "step": 7626 }, { "epoch": 0.07627, "grad_norm": 0.812731441964542, "learning_rate": 0.003, "loss": 4.1105, "step": 7627 }, { "epoch": 0.07628, "grad_norm": 0.8485441320364617, "learning_rate": 0.003, "loss": 4.0814, "step": 7628 }, { "epoch": 0.07629, "grad_norm": 0.8442461477013375, "learning_rate": 0.003, "loss": 4.1021, "step": 7629 }, { "epoch": 0.0763, "grad_norm": 0.8784878480520506, "learning_rate": 0.003, "loss": 4.0907, "step": 7630 }, { "epoch": 0.07631, "grad_norm": 0.8831785338162262, "learning_rate": 0.003, "loss": 4.094, "step": 7631 }, { "epoch": 0.07632, "grad_norm": 0.8267222772195404, "learning_rate": 0.003, "loss": 4.0843, "step": 7632 }, { "epoch": 0.07633, "grad_norm": 0.8474903779702966, "learning_rate": 0.003, "loss": 4.0926, "step": 7633 }, { "epoch": 0.07634, "grad_norm": 0.8527662645558675, "learning_rate": 0.003, "loss": 4.0845, "step": 7634 }, { "epoch": 0.07635, "grad_norm": 0.7539827705409249, "learning_rate": 0.003, "loss": 4.1066, "step": 7635 }, { "epoch": 0.07636, "grad_norm": 0.7979423225524935, "learning_rate": 0.003, "loss": 4.0814, "step": 7636 }, { "epoch": 0.07637, "grad_norm": 0.9896367410513814, "learning_rate": 0.003, "loss": 4.1276, "step": 7637 }, { "epoch": 0.07638, "grad_norm": 1.23693930151839, "learning_rate": 0.003, "loss": 4.1123, "step": 7638 }, { "epoch": 0.07639, "grad_norm": 0.8951052515565378, "learning_rate": 0.003, "loss": 4.1123, "step": 7639 }, { "epoch": 0.0764, "grad_norm": 0.9661295603094655, "learning_rate": 0.003, "loss": 4.1432, "step": 7640 }, { "epoch": 0.07641, "grad_norm": 1.0381140300663276, "learning_rate": 0.003, "loss": 4.1436, "step": 7641 }, { "epoch": 0.07642, "grad_norm": 0.8657178028651203, "learning_rate": 0.003, "loss": 4.132, "step": 7642 }, { "epoch": 0.07643, "grad_norm": 0.7585312382956262, "learning_rate": 0.003, "loss": 4.1041, "step": 7643 }, { "epoch": 0.07644, "grad_norm": 0.8424021918604633, "learning_rate": 0.003, "loss": 4.0905, "step": 7644 }, { "epoch": 0.07645, "grad_norm": 1.0547127355883397, "learning_rate": 0.003, "loss": 4.114, "step": 7645 }, { "epoch": 0.07646, "grad_norm": 1.0501940720210954, "learning_rate": 0.003, "loss": 4.1182, "step": 7646 }, { "epoch": 0.07647, "grad_norm": 1.0089732479233036, "learning_rate": 0.003, "loss": 4.0995, "step": 7647 }, { "epoch": 0.07648, "grad_norm": 0.9700747028421894, "learning_rate": 0.003, "loss": 4.1172, "step": 7648 }, { "epoch": 0.07649, "grad_norm": 0.8831977035938575, "learning_rate": 0.003, "loss": 4.1133, "step": 7649 }, { "epoch": 0.0765, "grad_norm": 0.7985816216412632, "learning_rate": 0.003, "loss": 4.0975, "step": 7650 }, { "epoch": 0.07651, "grad_norm": 0.7812576551419401, "learning_rate": 0.003, "loss": 4.1046, "step": 7651 }, { "epoch": 0.07652, "grad_norm": 0.863619677787746, "learning_rate": 0.003, "loss": 4.1032, "step": 7652 }, { "epoch": 0.07653, "grad_norm": 1.033937451094598, "learning_rate": 0.003, "loss": 4.088, "step": 7653 }, { "epoch": 0.07654, "grad_norm": 1.0055330888443008, "learning_rate": 0.003, "loss": 4.1136, "step": 7654 }, { "epoch": 0.07655, "grad_norm": 0.961819876357498, "learning_rate": 0.003, "loss": 4.1176, "step": 7655 }, { "epoch": 0.07656, "grad_norm": 0.8931684820162469, "learning_rate": 0.003, "loss": 4.1241, "step": 7656 }, { "epoch": 0.07657, "grad_norm": 0.8072740880367048, "learning_rate": 0.003, "loss": 4.1257, "step": 7657 }, { "epoch": 0.07658, "grad_norm": 0.8027134754140552, "learning_rate": 0.003, "loss": 4.1072, "step": 7658 }, { "epoch": 0.07659, "grad_norm": 0.8125620251732755, "learning_rate": 0.003, "loss": 4.1028, "step": 7659 }, { "epoch": 0.0766, "grad_norm": 0.7220469429506398, "learning_rate": 0.003, "loss": 4.14, "step": 7660 }, { "epoch": 0.07661, "grad_norm": 0.715054421826916, "learning_rate": 0.003, "loss": 4.0987, "step": 7661 }, { "epoch": 0.07662, "grad_norm": 0.7493362311346902, "learning_rate": 0.003, "loss": 4.1261, "step": 7662 }, { "epoch": 0.07663, "grad_norm": 0.8714009180373697, "learning_rate": 0.003, "loss": 4.1258, "step": 7663 }, { "epoch": 0.07664, "grad_norm": 0.9901602291385457, "learning_rate": 0.003, "loss": 4.1237, "step": 7664 }, { "epoch": 0.07665, "grad_norm": 1.0074073013289961, "learning_rate": 0.003, "loss": 4.0996, "step": 7665 }, { "epoch": 0.07666, "grad_norm": 0.953985834092227, "learning_rate": 0.003, "loss": 4.1152, "step": 7666 }, { "epoch": 0.07667, "grad_norm": 0.9324987692217996, "learning_rate": 0.003, "loss": 4.1243, "step": 7667 }, { "epoch": 0.07668, "grad_norm": 0.9181312329680767, "learning_rate": 0.003, "loss": 4.1146, "step": 7668 }, { "epoch": 0.07669, "grad_norm": 1.0636826877445138, "learning_rate": 0.003, "loss": 4.1336, "step": 7669 }, { "epoch": 0.0767, "grad_norm": 1.1446064632763406, "learning_rate": 0.003, "loss": 4.101, "step": 7670 }, { "epoch": 0.07671, "grad_norm": 1.1178580340464093, "learning_rate": 0.003, "loss": 4.138, "step": 7671 }, { "epoch": 0.07672, "grad_norm": 1.0178300133444778, "learning_rate": 0.003, "loss": 4.116, "step": 7672 }, { "epoch": 0.07673, "grad_norm": 1.2016623636923163, "learning_rate": 0.003, "loss": 4.151, "step": 7673 }, { "epoch": 0.07674, "grad_norm": 0.8240382566249935, "learning_rate": 0.003, "loss": 4.1123, "step": 7674 }, { "epoch": 0.07675, "grad_norm": 0.8028023964885046, "learning_rate": 0.003, "loss": 4.1011, "step": 7675 }, { "epoch": 0.07676, "grad_norm": 0.8231955050487717, "learning_rate": 0.003, "loss": 4.1009, "step": 7676 }, { "epoch": 0.07677, "grad_norm": 0.7115485042666164, "learning_rate": 0.003, "loss": 4.0987, "step": 7677 }, { "epoch": 0.07678, "grad_norm": 0.6936604353261299, "learning_rate": 0.003, "loss": 4.1315, "step": 7678 }, { "epoch": 0.07679, "grad_norm": 0.8220436974564044, "learning_rate": 0.003, "loss": 4.1265, "step": 7679 }, { "epoch": 0.0768, "grad_norm": 0.8824999345056386, "learning_rate": 0.003, "loss": 4.0991, "step": 7680 }, { "epoch": 0.07681, "grad_norm": 1.074897714216047, "learning_rate": 0.003, "loss": 4.1173, "step": 7681 }, { "epoch": 0.07682, "grad_norm": 1.1253938246986106, "learning_rate": 0.003, "loss": 4.106, "step": 7682 }, { "epoch": 0.07683, "grad_norm": 0.8445187360108007, "learning_rate": 0.003, "loss": 4.1142, "step": 7683 }, { "epoch": 0.07684, "grad_norm": 0.6711564131616432, "learning_rate": 0.003, "loss": 4.1229, "step": 7684 }, { "epoch": 0.07685, "grad_norm": 0.8607141168125007, "learning_rate": 0.003, "loss": 4.1041, "step": 7685 }, { "epoch": 0.07686, "grad_norm": 0.990613718786834, "learning_rate": 0.003, "loss": 4.1055, "step": 7686 }, { "epoch": 0.07687, "grad_norm": 1.2572290057789604, "learning_rate": 0.003, "loss": 4.1289, "step": 7687 }, { "epoch": 0.07688, "grad_norm": 0.8215779647578441, "learning_rate": 0.003, "loss": 4.1125, "step": 7688 }, { "epoch": 0.07689, "grad_norm": 0.6891001411750275, "learning_rate": 0.003, "loss": 4.0926, "step": 7689 }, { "epoch": 0.0769, "grad_norm": 0.6166117212029528, "learning_rate": 0.003, "loss": 4.1167, "step": 7690 }, { "epoch": 0.07691, "grad_norm": 0.5677700647993942, "learning_rate": 0.003, "loss": 4.1085, "step": 7691 }, { "epoch": 0.07692, "grad_norm": 0.7724897932547341, "learning_rate": 0.003, "loss": 4.1195, "step": 7692 }, { "epoch": 0.07693, "grad_norm": 1.0012551819046691, "learning_rate": 0.003, "loss": 4.0968, "step": 7693 }, { "epoch": 0.07694, "grad_norm": 1.1907585124420144, "learning_rate": 0.003, "loss": 4.1061, "step": 7694 }, { "epoch": 0.07695, "grad_norm": 0.7010072537738787, "learning_rate": 0.003, "loss": 4.1189, "step": 7695 }, { "epoch": 0.07696, "grad_norm": 0.6208430723305157, "learning_rate": 0.003, "loss": 4.0526, "step": 7696 }, { "epoch": 0.07697, "grad_norm": 0.6196777653103698, "learning_rate": 0.003, "loss": 4.134, "step": 7697 }, { "epoch": 0.07698, "grad_norm": 0.700128726476087, "learning_rate": 0.003, "loss": 4.1068, "step": 7698 }, { "epoch": 0.07699, "grad_norm": 0.6686751131176947, "learning_rate": 0.003, "loss": 4.1315, "step": 7699 }, { "epoch": 0.077, "grad_norm": 0.7342255589833349, "learning_rate": 0.003, "loss": 4.1056, "step": 7700 }, { "epoch": 0.07701, "grad_norm": 0.756854445847, "learning_rate": 0.003, "loss": 4.0834, "step": 7701 }, { "epoch": 0.07702, "grad_norm": 0.88266150116856, "learning_rate": 0.003, "loss": 4.0866, "step": 7702 }, { "epoch": 0.07703, "grad_norm": 0.9388488634222688, "learning_rate": 0.003, "loss": 4.1075, "step": 7703 }, { "epoch": 0.07704, "grad_norm": 0.8712150471498338, "learning_rate": 0.003, "loss": 4.1084, "step": 7704 }, { "epoch": 0.07705, "grad_norm": 0.7905979661669048, "learning_rate": 0.003, "loss": 4.0911, "step": 7705 }, { "epoch": 0.07706, "grad_norm": 0.8554435529838232, "learning_rate": 0.003, "loss": 4.1148, "step": 7706 }, { "epoch": 0.07707, "grad_norm": 0.950211545207407, "learning_rate": 0.003, "loss": 4.1114, "step": 7707 }, { "epoch": 0.07708, "grad_norm": 0.9941562838099525, "learning_rate": 0.003, "loss": 4.1271, "step": 7708 }, { "epoch": 0.07709, "grad_norm": 0.9405303791524773, "learning_rate": 0.003, "loss": 4.1115, "step": 7709 }, { "epoch": 0.0771, "grad_norm": 0.8363759541957757, "learning_rate": 0.003, "loss": 4.0856, "step": 7710 }, { "epoch": 0.07711, "grad_norm": 0.733875954007421, "learning_rate": 0.003, "loss": 4.0902, "step": 7711 }, { "epoch": 0.07712, "grad_norm": 0.6560253796119527, "learning_rate": 0.003, "loss": 4.0968, "step": 7712 }, { "epoch": 0.07713, "grad_norm": 0.6759517680276719, "learning_rate": 0.003, "loss": 4.0718, "step": 7713 }, { "epoch": 0.07714, "grad_norm": 1.0143647874314274, "learning_rate": 0.003, "loss": 4.0756, "step": 7714 }, { "epoch": 0.07715, "grad_norm": 1.232675077798191, "learning_rate": 0.003, "loss": 4.1085, "step": 7715 }, { "epoch": 0.07716, "grad_norm": 0.8270400357958588, "learning_rate": 0.003, "loss": 4.0902, "step": 7716 }, { "epoch": 0.07717, "grad_norm": 0.7208914160909232, "learning_rate": 0.003, "loss": 4.0834, "step": 7717 }, { "epoch": 0.07718, "grad_norm": 0.7014829688376748, "learning_rate": 0.003, "loss": 4.1251, "step": 7718 }, { "epoch": 0.07719, "grad_norm": 0.7014046527951289, "learning_rate": 0.003, "loss": 4.0924, "step": 7719 }, { "epoch": 0.0772, "grad_norm": 0.746499130996974, "learning_rate": 0.003, "loss": 4.1248, "step": 7720 }, { "epoch": 0.07721, "grad_norm": 0.7004391345875081, "learning_rate": 0.003, "loss": 4.0829, "step": 7721 }, { "epoch": 0.07722, "grad_norm": 0.8408283424603382, "learning_rate": 0.003, "loss": 4.1201, "step": 7722 }, { "epoch": 0.07723, "grad_norm": 1.110388676041958, "learning_rate": 0.003, "loss": 4.1266, "step": 7723 }, { "epoch": 0.07724, "grad_norm": 0.9338754060953655, "learning_rate": 0.003, "loss": 4.1047, "step": 7724 }, { "epoch": 0.07725, "grad_norm": 0.8624819322150851, "learning_rate": 0.003, "loss": 4.0946, "step": 7725 }, { "epoch": 0.07726, "grad_norm": 0.8365430711688485, "learning_rate": 0.003, "loss": 4.1189, "step": 7726 }, { "epoch": 0.07727, "grad_norm": 0.8249395305754468, "learning_rate": 0.003, "loss": 4.0822, "step": 7727 }, { "epoch": 0.07728, "grad_norm": 0.7844147351205248, "learning_rate": 0.003, "loss": 4.0894, "step": 7728 }, { "epoch": 0.07729, "grad_norm": 0.7973591326922866, "learning_rate": 0.003, "loss": 4.1152, "step": 7729 }, { "epoch": 0.0773, "grad_norm": 0.7225338885856214, "learning_rate": 0.003, "loss": 4.1046, "step": 7730 }, { "epoch": 0.07731, "grad_norm": 0.73397785517986, "learning_rate": 0.003, "loss": 4.0953, "step": 7731 }, { "epoch": 0.07732, "grad_norm": 0.7538960206734072, "learning_rate": 0.003, "loss": 4.1068, "step": 7732 }, { "epoch": 0.07733, "grad_norm": 0.9299182384721361, "learning_rate": 0.003, "loss": 4.1229, "step": 7733 }, { "epoch": 0.07734, "grad_norm": 1.1092844376628306, "learning_rate": 0.003, "loss": 4.1306, "step": 7734 }, { "epoch": 0.07735, "grad_norm": 0.9361685548610317, "learning_rate": 0.003, "loss": 4.0998, "step": 7735 }, { "epoch": 0.07736, "grad_norm": 0.8101474819136633, "learning_rate": 0.003, "loss": 4.119, "step": 7736 }, { "epoch": 0.07737, "grad_norm": 0.8203573852291894, "learning_rate": 0.003, "loss": 4.0808, "step": 7737 }, { "epoch": 0.07738, "grad_norm": 1.169891214994567, "learning_rate": 0.003, "loss": 4.1043, "step": 7738 }, { "epoch": 0.07739, "grad_norm": 1.2019762033842287, "learning_rate": 0.003, "loss": 4.1176, "step": 7739 }, { "epoch": 0.0774, "grad_norm": 0.9191096276408175, "learning_rate": 0.003, "loss": 4.0938, "step": 7740 }, { "epoch": 0.07741, "grad_norm": 0.8019281817921025, "learning_rate": 0.003, "loss": 4.0951, "step": 7741 }, { "epoch": 0.07742, "grad_norm": 0.9593283919942299, "learning_rate": 0.003, "loss": 4.0909, "step": 7742 }, { "epoch": 0.07743, "grad_norm": 1.0190652900336852, "learning_rate": 0.003, "loss": 4.112, "step": 7743 }, { "epoch": 0.07744, "grad_norm": 1.1716932492490297, "learning_rate": 0.003, "loss": 4.1179, "step": 7744 }, { "epoch": 0.07745, "grad_norm": 1.0937600539399255, "learning_rate": 0.003, "loss": 4.1167, "step": 7745 }, { "epoch": 0.07746, "grad_norm": 0.9602047963501511, "learning_rate": 0.003, "loss": 4.1159, "step": 7746 }, { "epoch": 0.07747, "grad_norm": 0.7943597561692926, "learning_rate": 0.003, "loss": 4.1141, "step": 7747 }, { "epoch": 0.07748, "grad_norm": 0.7757213618554147, "learning_rate": 0.003, "loss": 4.1393, "step": 7748 }, { "epoch": 0.07749, "grad_norm": 0.791302635220645, "learning_rate": 0.003, "loss": 4.132, "step": 7749 }, { "epoch": 0.0775, "grad_norm": 0.794756302153131, "learning_rate": 0.003, "loss": 4.0899, "step": 7750 }, { "epoch": 0.07751, "grad_norm": 0.8515667360808754, "learning_rate": 0.003, "loss": 4.105, "step": 7751 }, { "epoch": 0.07752, "grad_norm": 0.8271299496217495, "learning_rate": 0.003, "loss": 4.1247, "step": 7752 }, { "epoch": 0.07753, "grad_norm": 0.8707061104805746, "learning_rate": 0.003, "loss": 4.0965, "step": 7753 }, { "epoch": 0.07754, "grad_norm": 0.8698105492883605, "learning_rate": 0.003, "loss": 4.1031, "step": 7754 }, { "epoch": 0.07755, "grad_norm": 0.9757058824840338, "learning_rate": 0.003, "loss": 4.1107, "step": 7755 }, { "epoch": 0.07756, "grad_norm": 1.0996218275953191, "learning_rate": 0.003, "loss": 4.1267, "step": 7756 }, { "epoch": 0.07757, "grad_norm": 1.0046296348538857, "learning_rate": 0.003, "loss": 4.0942, "step": 7757 }, { "epoch": 0.07758, "grad_norm": 1.088474067480404, "learning_rate": 0.003, "loss": 4.0957, "step": 7758 }, { "epoch": 0.07759, "grad_norm": 0.6838363547835872, "learning_rate": 0.003, "loss": 4.0978, "step": 7759 }, { "epoch": 0.0776, "grad_norm": 0.6387940327331244, "learning_rate": 0.003, "loss": 4.0755, "step": 7760 }, { "epoch": 0.07761, "grad_norm": 0.6801202292166391, "learning_rate": 0.003, "loss": 4.1163, "step": 7761 }, { "epoch": 0.07762, "grad_norm": 0.7524204138228072, "learning_rate": 0.003, "loss": 4.0995, "step": 7762 }, { "epoch": 0.07763, "grad_norm": 0.8422682117877314, "learning_rate": 0.003, "loss": 4.1163, "step": 7763 }, { "epoch": 0.07764, "grad_norm": 0.9464082829191608, "learning_rate": 0.003, "loss": 4.1254, "step": 7764 }, { "epoch": 0.07765, "grad_norm": 1.0400578267063363, "learning_rate": 0.003, "loss": 4.1103, "step": 7765 }, { "epoch": 0.07766, "grad_norm": 0.8570240484946164, "learning_rate": 0.003, "loss": 4.0896, "step": 7766 }, { "epoch": 0.07767, "grad_norm": 0.7886939048717178, "learning_rate": 0.003, "loss": 4.0876, "step": 7767 }, { "epoch": 0.07768, "grad_norm": 0.7327426017118384, "learning_rate": 0.003, "loss": 4.0971, "step": 7768 }, { "epoch": 0.07769, "grad_norm": 0.7224926065792775, "learning_rate": 0.003, "loss": 4.102, "step": 7769 }, { "epoch": 0.0777, "grad_norm": 0.6812310474755818, "learning_rate": 0.003, "loss": 4.1155, "step": 7770 }, { "epoch": 0.07771, "grad_norm": 0.6297454383181756, "learning_rate": 0.003, "loss": 4.1123, "step": 7771 }, { "epoch": 0.07772, "grad_norm": 0.727035568405193, "learning_rate": 0.003, "loss": 4.1049, "step": 7772 }, { "epoch": 0.07773, "grad_norm": 0.7440750703020748, "learning_rate": 0.003, "loss": 4.0751, "step": 7773 }, { "epoch": 0.07774, "grad_norm": 0.6795627443083461, "learning_rate": 0.003, "loss": 4.1006, "step": 7774 }, { "epoch": 0.07775, "grad_norm": 0.6784508298159274, "learning_rate": 0.003, "loss": 4.1091, "step": 7775 }, { "epoch": 0.07776, "grad_norm": 0.6385790918136247, "learning_rate": 0.003, "loss": 4.0967, "step": 7776 }, { "epoch": 0.07777, "grad_norm": 0.6872009152821507, "learning_rate": 0.003, "loss": 4.1109, "step": 7777 }, { "epoch": 0.07778, "grad_norm": 0.6675405617900448, "learning_rate": 0.003, "loss": 4.1189, "step": 7778 }, { "epoch": 0.07779, "grad_norm": 0.7357317176753875, "learning_rate": 0.003, "loss": 4.0785, "step": 7779 }, { "epoch": 0.0778, "grad_norm": 0.8022677370632643, "learning_rate": 0.003, "loss": 4.0774, "step": 7780 }, { "epoch": 0.07781, "grad_norm": 0.9434523581478621, "learning_rate": 0.003, "loss": 4.0682, "step": 7781 }, { "epoch": 0.07782, "grad_norm": 1.5623982009752648, "learning_rate": 0.003, "loss": 4.0998, "step": 7782 }, { "epoch": 0.07783, "grad_norm": 0.7044738718569362, "learning_rate": 0.003, "loss": 4.0939, "step": 7783 }, { "epoch": 0.07784, "grad_norm": 0.7245143997153488, "learning_rate": 0.003, "loss": 4.0936, "step": 7784 }, { "epoch": 0.07785, "grad_norm": 0.716698916381954, "learning_rate": 0.003, "loss": 4.1197, "step": 7785 }, { "epoch": 0.07786, "grad_norm": 0.6921187171985951, "learning_rate": 0.003, "loss": 4.0837, "step": 7786 }, { "epoch": 0.07787, "grad_norm": 0.6045099384961314, "learning_rate": 0.003, "loss": 4.0682, "step": 7787 }, { "epoch": 0.07788, "grad_norm": 0.6083002796628949, "learning_rate": 0.003, "loss": 4.0855, "step": 7788 }, { "epoch": 0.07789, "grad_norm": 0.6450261280850079, "learning_rate": 0.003, "loss": 4.0698, "step": 7789 }, { "epoch": 0.0779, "grad_norm": 0.7723456314060048, "learning_rate": 0.003, "loss": 4.1255, "step": 7790 }, { "epoch": 0.07791, "grad_norm": 0.9626934092811175, "learning_rate": 0.003, "loss": 4.0936, "step": 7791 }, { "epoch": 0.07792, "grad_norm": 1.0213202281359437, "learning_rate": 0.003, "loss": 4.0826, "step": 7792 }, { "epoch": 0.07793, "grad_norm": 0.9823991856862937, "learning_rate": 0.003, "loss": 4.0858, "step": 7793 }, { "epoch": 0.07794, "grad_norm": 1.0324749912970457, "learning_rate": 0.003, "loss": 4.1365, "step": 7794 }, { "epoch": 0.07795, "grad_norm": 1.2976167109237968, "learning_rate": 0.003, "loss": 4.0864, "step": 7795 }, { "epoch": 0.07796, "grad_norm": 0.8902788450024939, "learning_rate": 0.003, "loss": 4.0992, "step": 7796 }, { "epoch": 0.07797, "grad_norm": 0.9291964989058917, "learning_rate": 0.003, "loss": 4.0915, "step": 7797 }, { "epoch": 0.07798, "grad_norm": 0.9627682686730562, "learning_rate": 0.003, "loss": 4.132, "step": 7798 }, { "epoch": 0.07799, "grad_norm": 1.1456962518858615, "learning_rate": 0.003, "loss": 4.1401, "step": 7799 }, { "epoch": 0.078, "grad_norm": 1.0251765436245048, "learning_rate": 0.003, "loss": 4.0986, "step": 7800 }, { "epoch": 0.07801, "grad_norm": 0.9973687128151619, "learning_rate": 0.003, "loss": 4.1117, "step": 7801 }, { "epoch": 0.07802, "grad_norm": 0.948086135583704, "learning_rate": 0.003, "loss": 4.0913, "step": 7802 }, { "epoch": 0.07803, "grad_norm": 1.1277990578479882, "learning_rate": 0.003, "loss": 4.1049, "step": 7803 }, { "epoch": 0.07804, "grad_norm": 0.9774623897022733, "learning_rate": 0.003, "loss": 4.1292, "step": 7804 }, { "epoch": 0.07805, "grad_norm": 0.8320985331874762, "learning_rate": 0.003, "loss": 4.1153, "step": 7805 }, { "epoch": 0.07806, "grad_norm": 0.8657196694590319, "learning_rate": 0.003, "loss": 4.1292, "step": 7806 }, { "epoch": 0.07807, "grad_norm": 0.9492463229383535, "learning_rate": 0.003, "loss": 4.1325, "step": 7807 }, { "epoch": 0.07808, "grad_norm": 1.0394516274093006, "learning_rate": 0.003, "loss": 4.1039, "step": 7808 }, { "epoch": 0.07809, "grad_norm": 0.9335393900367771, "learning_rate": 0.003, "loss": 4.1157, "step": 7809 }, { "epoch": 0.0781, "grad_norm": 0.9443482076739536, "learning_rate": 0.003, "loss": 4.0987, "step": 7810 }, { "epoch": 0.07811, "grad_norm": 0.9123874992413225, "learning_rate": 0.003, "loss": 4.1105, "step": 7811 }, { "epoch": 0.07812, "grad_norm": 1.1264587870913032, "learning_rate": 0.003, "loss": 4.1125, "step": 7812 }, { "epoch": 0.07813, "grad_norm": 1.1305952516582176, "learning_rate": 0.003, "loss": 4.1298, "step": 7813 }, { "epoch": 0.07814, "grad_norm": 0.8414805101214488, "learning_rate": 0.003, "loss": 4.1511, "step": 7814 }, { "epoch": 0.07815, "grad_norm": 0.9502914015578153, "learning_rate": 0.003, "loss": 4.1164, "step": 7815 }, { "epoch": 0.07816, "grad_norm": 1.1760246778581924, "learning_rate": 0.003, "loss": 4.1092, "step": 7816 }, { "epoch": 0.07817, "grad_norm": 0.9408621866890481, "learning_rate": 0.003, "loss": 4.1087, "step": 7817 }, { "epoch": 0.07818, "grad_norm": 0.8296428274925994, "learning_rate": 0.003, "loss": 4.0989, "step": 7818 }, { "epoch": 0.07819, "grad_norm": 0.7920801052079187, "learning_rate": 0.003, "loss": 4.0843, "step": 7819 }, { "epoch": 0.0782, "grad_norm": 0.8823722442311102, "learning_rate": 0.003, "loss": 4.1285, "step": 7820 }, { "epoch": 0.07821, "grad_norm": 0.9516339518218203, "learning_rate": 0.003, "loss": 4.0937, "step": 7821 }, { "epoch": 0.07822, "grad_norm": 1.0547110747707245, "learning_rate": 0.003, "loss": 4.0876, "step": 7822 }, { "epoch": 0.07823, "grad_norm": 0.989883022467342, "learning_rate": 0.003, "loss": 4.1007, "step": 7823 }, { "epoch": 0.07824, "grad_norm": 0.8914027725354553, "learning_rate": 0.003, "loss": 4.131, "step": 7824 }, { "epoch": 0.07825, "grad_norm": 0.8076840807799915, "learning_rate": 0.003, "loss": 4.0934, "step": 7825 }, { "epoch": 0.07826, "grad_norm": 0.7415708695700829, "learning_rate": 0.003, "loss": 4.0826, "step": 7826 }, { "epoch": 0.07827, "grad_norm": 0.7214257388849613, "learning_rate": 0.003, "loss": 4.1111, "step": 7827 }, { "epoch": 0.07828, "grad_norm": 0.8136232450263843, "learning_rate": 0.003, "loss": 4.1138, "step": 7828 }, { "epoch": 0.07829, "grad_norm": 0.9199584413946614, "learning_rate": 0.003, "loss": 4.1221, "step": 7829 }, { "epoch": 0.0783, "grad_norm": 0.8115857466773752, "learning_rate": 0.003, "loss": 4.0837, "step": 7830 }, { "epoch": 0.07831, "grad_norm": 0.7591405213641907, "learning_rate": 0.003, "loss": 4.0935, "step": 7831 }, { "epoch": 0.07832, "grad_norm": 0.6490760526643984, "learning_rate": 0.003, "loss": 4.0997, "step": 7832 }, { "epoch": 0.07833, "grad_norm": 0.703850585915276, "learning_rate": 0.003, "loss": 4.099, "step": 7833 }, { "epoch": 0.07834, "grad_norm": 0.8284668702922542, "learning_rate": 0.003, "loss": 4.1513, "step": 7834 }, { "epoch": 0.07835, "grad_norm": 0.8475382307293997, "learning_rate": 0.003, "loss": 4.1145, "step": 7835 }, { "epoch": 0.07836, "grad_norm": 0.800674520894362, "learning_rate": 0.003, "loss": 4.0968, "step": 7836 }, { "epoch": 0.07837, "grad_norm": 0.7804816932384945, "learning_rate": 0.003, "loss": 4.107, "step": 7837 }, { "epoch": 0.07838, "grad_norm": 0.7719832162406756, "learning_rate": 0.003, "loss": 4.0638, "step": 7838 }, { "epoch": 0.07839, "grad_norm": 0.8103102203464976, "learning_rate": 0.003, "loss": 4.0871, "step": 7839 }, { "epoch": 0.0784, "grad_norm": 0.847847320739481, "learning_rate": 0.003, "loss": 4.1155, "step": 7840 }, { "epoch": 0.07841, "grad_norm": 0.9217204569862923, "learning_rate": 0.003, "loss": 4.0931, "step": 7841 }, { "epoch": 0.07842, "grad_norm": 1.0755759756691639, "learning_rate": 0.003, "loss": 4.0903, "step": 7842 }, { "epoch": 0.07843, "grad_norm": 1.0372058631482444, "learning_rate": 0.003, "loss": 4.1073, "step": 7843 }, { "epoch": 0.07844, "grad_norm": 0.8852971276699896, "learning_rate": 0.003, "loss": 4.1061, "step": 7844 }, { "epoch": 0.07845, "grad_norm": 0.7984782327828842, "learning_rate": 0.003, "loss": 4.1157, "step": 7845 }, { "epoch": 0.07846, "grad_norm": 0.8806483710377667, "learning_rate": 0.003, "loss": 4.1337, "step": 7846 }, { "epoch": 0.07847, "grad_norm": 1.0378492921152178, "learning_rate": 0.003, "loss": 4.0843, "step": 7847 }, { "epoch": 0.07848, "grad_norm": 0.895360780588699, "learning_rate": 0.003, "loss": 4.1303, "step": 7848 }, { "epoch": 0.07849, "grad_norm": 0.8289639586531552, "learning_rate": 0.003, "loss": 4.1355, "step": 7849 }, { "epoch": 0.0785, "grad_norm": 0.8305063229582818, "learning_rate": 0.003, "loss": 4.1039, "step": 7850 }, { "epoch": 0.07851, "grad_norm": 0.7058129266076502, "learning_rate": 0.003, "loss": 4.0919, "step": 7851 }, { "epoch": 0.07852, "grad_norm": 0.6418192413394789, "learning_rate": 0.003, "loss": 4.1283, "step": 7852 }, { "epoch": 0.07853, "grad_norm": 0.7531346386515196, "learning_rate": 0.003, "loss": 4.0957, "step": 7853 }, { "epoch": 0.07854, "grad_norm": 0.8398583249558569, "learning_rate": 0.003, "loss": 4.1222, "step": 7854 }, { "epoch": 0.07855, "grad_norm": 0.7774066949599959, "learning_rate": 0.003, "loss": 4.1053, "step": 7855 }, { "epoch": 0.07856, "grad_norm": 1.0200945417324516, "learning_rate": 0.003, "loss": 4.1135, "step": 7856 }, { "epoch": 0.07857, "grad_norm": 1.075009695811694, "learning_rate": 0.003, "loss": 4.1381, "step": 7857 }, { "epoch": 0.07858, "grad_norm": 0.7910404278344594, "learning_rate": 0.003, "loss": 4.1217, "step": 7858 }, { "epoch": 0.07859, "grad_norm": 0.7300829664682413, "learning_rate": 0.003, "loss": 4.0898, "step": 7859 }, { "epoch": 0.0786, "grad_norm": 0.7197539823867891, "learning_rate": 0.003, "loss": 4.102, "step": 7860 }, { "epoch": 0.07861, "grad_norm": 0.9620546769547311, "learning_rate": 0.003, "loss": 4.1242, "step": 7861 }, { "epoch": 0.07862, "grad_norm": 1.490251553426132, "learning_rate": 0.003, "loss": 4.1592, "step": 7862 }, { "epoch": 0.07863, "grad_norm": 0.7857679852194924, "learning_rate": 0.003, "loss": 4.0792, "step": 7863 }, { "epoch": 0.07864, "grad_norm": 0.8634479975354046, "learning_rate": 0.003, "loss": 4.118, "step": 7864 }, { "epoch": 0.07865, "grad_norm": 0.7964685970693094, "learning_rate": 0.003, "loss": 4.0956, "step": 7865 }, { "epoch": 0.07866, "grad_norm": 0.6828020542596382, "learning_rate": 0.003, "loss": 4.0973, "step": 7866 }, { "epoch": 0.07867, "grad_norm": 0.6092921432164138, "learning_rate": 0.003, "loss": 4.0848, "step": 7867 }, { "epoch": 0.07868, "grad_norm": 0.5633101550595739, "learning_rate": 0.003, "loss": 4.1168, "step": 7868 }, { "epoch": 0.07869, "grad_norm": 0.6431111329973238, "learning_rate": 0.003, "loss": 4.0962, "step": 7869 }, { "epoch": 0.0787, "grad_norm": 0.901303356812522, "learning_rate": 0.003, "loss": 4.0837, "step": 7870 }, { "epoch": 0.07871, "grad_norm": 1.2191547805657925, "learning_rate": 0.003, "loss": 4.1382, "step": 7871 }, { "epoch": 0.07872, "grad_norm": 0.8262977027782229, "learning_rate": 0.003, "loss": 4.1284, "step": 7872 }, { "epoch": 0.07873, "grad_norm": 0.6724946366349702, "learning_rate": 0.003, "loss": 4.0832, "step": 7873 }, { "epoch": 0.07874, "grad_norm": 0.7244287405483971, "learning_rate": 0.003, "loss": 4.0977, "step": 7874 }, { "epoch": 0.07875, "grad_norm": 0.8227798028339546, "learning_rate": 0.003, "loss": 4.1129, "step": 7875 }, { "epoch": 0.07876, "grad_norm": 0.9349875722236806, "learning_rate": 0.003, "loss": 4.1097, "step": 7876 }, { "epoch": 0.07877, "grad_norm": 1.0794801674884056, "learning_rate": 0.003, "loss": 4.099, "step": 7877 }, { "epoch": 0.07878, "grad_norm": 0.9276580973578815, "learning_rate": 0.003, "loss": 4.1074, "step": 7878 }, { "epoch": 0.07879, "grad_norm": 0.8251333456721903, "learning_rate": 0.003, "loss": 4.1068, "step": 7879 }, { "epoch": 0.0788, "grad_norm": 0.7060382543658212, "learning_rate": 0.003, "loss": 4.0883, "step": 7880 }, { "epoch": 0.07881, "grad_norm": 0.67303988544837, "learning_rate": 0.003, "loss": 4.0959, "step": 7881 }, { "epoch": 0.07882, "grad_norm": 0.6233971310328372, "learning_rate": 0.003, "loss": 4.0906, "step": 7882 }, { "epoch": 0.07883, "grad_norm": 0.6298409951352602, "learning_rate": 0.003, "loss": 4.0701, "step": 7883 }, { "epoch": 0.07884, "grad_norm": 0.7947655306250354, "learning_rate": 0.003, "loss": 4.0973, "step": 7884 }, { "epoch": 0.07885, "grad_norm": 0.8497398170009447, "learning_rate": 0.003, "loss": 4.0875, "step": 7885 }, { "epoch": 0.07886, "grad_norm": 0.8619830961416387, "learning_rate": 0.003, "loss": 4.1118, "step": 7886 }, { "epoch": 0.07887, "grad_norm": 1.0484039664176932, "learning_rate": 0.003, "loss": 4.0927, "step": 7887 }, { "epoch": 0.07888, "grad_norm": 1.1847395590743321, "learning_rate": 0.003, "loss": 4.1116, "step": 7888 }, { "epoch": 0.07889, "grad_norm": 0.8493272011871066, "learning_rate": 0.003, "loss": 4.0759, "step": 7889 }, { "epoch": 0.0789, "grad_norm": 0.8008301667726648, "learning_rate": 0.003, "loss": 4.0909, "step": 7890 }, { "epoch": 0.07891, "grad_norm": 0.930077598807836, "learning_rate": 0.003, "loss": 4.0961, "step": 7891 }, { "epoch": 0.07892, "grad_norm": 1.0989482964593247, "learning_rate": 0.003, "loss": 4.1012, "step": 7892 }, { "epoch": 0.07893, "grad_norm": 1.1363252018029342, "learning_rate": 0.003, "loss": 4.1224, "step": 7893 }, { "epoch": 0.07894, "grad_norm": 1.0228512594068824, "learning_rate": 0.003, "loss": 4.0919, "step": 7894 }, { "epoch": 0.07895, "grad_norm": 0.9218498361406187, "learning_rate": 0.003, "loss": 4.0849, "step": 7895 }, { "epoch": 0.07896, "grad_norm": 0.9251940784692912, "learning_rate": 0.003, "loss": 4.1029, "step": 7896 }, { "epoch": 0.07897, "grad_norm": 0.931960102854229, "learning_rate": 0.003, "loss": 4.1241, "step": 7897 }, { "epoch": 0.07898, "grad_norm": 0.8464573363229492, "learning_rate": 0.003, "loss": 4.1031, "step": 7898 }, { "epoch": 0.07899, "grad_norm": 0.7028868962428811, "learning_rate": 0.003, "loss": 4.0833, "step": 7899 }, { "epoch": 0.079, "grad_norm": 0.7441885103587236, "learning_rate": 0.003, "loss": 4.0898, "step": 7900 }, { "epoch": 0.07901, "grad_norm": 0.8664872116804634, "learning_rate": 0.003, "loss": 4.1245, "step": 7901 }, { "epoch": 0.07902, "grad_norm": 0.9184607477010853, "learning_rate": 0.003, "loss": 4.0813, "step": 7902 }, { "epoch": 0.07903, "grad_norm": 0.7590321607729549, "learning_rate": 0.003, "loss": 4.0939, "step": 7903 }, { "epoch": 0.07904, "grad_norm": 0.7531494902293769, "learning_rate": 0.003, "loss": 4.0753, "step": 7904 }, { "epoch": 0.07905, "grad_norm": 0.70438167321181, "learning_rate": 0.003, "loss": 4.0653, "step": 7905 }, { "epoch": 0.07906, "grad_norm": 0.6511721703071448, "learning_rate": 0.003, "loss": 4.0797, "step": 7906 }, { "epoch": 0.07907, "grad_norm": 0.716900467492343, "learning_rate": 0.003, "loss": 4.1048, "step": 7907 }, { "epoch": 0.07908, "grad_norm": 0.7085281032769419, "learning_rate": 0.003, "loss": 4.1069, "step": 7908 }, { "epoch": 0.07909, "grad_norm": 0.8416217578074326, "learning_rate": 0.003, "loss": 4.1123, "step": 7909 }, { "epoch": 0.0791, "grad_norm": 1.1433040371010987, "learning_rate": 0.003, "loss": 4.0978, "step": 7910 }, { "epoch": 0.07911, "grad_norm": 1.1835699988724921, "learning_rate": 0.003, "loss": 4.1226, "step": 7911 }, { "epoch": 0.07912, "grad_norm": 0.9223643758933799, "learning_rate": 0.003, "loss": 4.0931, "step": 7912 }, { "epoch": 0.07913, "grad_norm": 1.089125989023986, "learning_rate": 0.003, "loss": 4.0897, "step": 7913 }, { "epoch": 0.07914, "grad_norm": 0.8390123983066416, "learning_rate": 0.003, "loss": 4.0968, "step": 7914 }, { "epoch": 0.07915, "grad_norm": 0.672914923150026, "learning_rate": 0.003, "loss": 4.0816, "step": 7915 }, { "epoch": 0.07916, "grad_norm": 0.7017447792175915, "learning_rate": 0.003, "loss": 4.108, "step": 7916 }, { "epoch": 0.07917, "grad_norm": 0.7051019103527106, "learning_rate": 0.003, "loss": 4.1085, "step": 7917 }, { "epoch": 0.07918, "grad_norm": 0.9855647114410215, "learning_rate": 0.003, "loss": 4.1062, "step": 7918 }, { "epoch": 0.07919, "grad_norm": 1.3116973662720455, "learning_rate": 0.003, "loss": 4.0776, "step": 7919 }, { "epoch": 0.0792, "grad_norm": 0.7402493790216023, "learning_rate": 0.003, "loss": 4.0949, "step": 7920 }, { "epoch": 0.07921, "grad_norm": 0.6988160486580384, "learning_rate": 0.003, "loss": 4.1027, "step": 7921 }, { "epoch": 0.07922, "grad_norm": 0.7456591280144469, "learning_rate": 0.003, "loss": 4.0831, "step": 7922 }, { "epoch": 0.07923, "grad_norm": 0.8033173795113849, "learning_rate": 0.003, "loss": 4.0847, "step": 7923 }, { "epoch": 0.07924, "grad_norm": 0.7216426094382513, "learning_rate": 0.003, "loss": 4.0803, "step": 7924 }, { "epoch": 0.07925, "grad_norm": 0.7373410578068488, "learning_rate": 0.003, "loss": 4.1117, "step": 7925 }, { "epoch": 0.07926, "grad_norm": 0.9580792267793108, "learning_rate": 0.003, "loss": 4.1202, "step": 7926 }, { "epoch": 0.07927, "grad_norm": 1.1508484267353751, "learning_rate": 0.003, "loss": 4.0717, "step": 7927 }, { "epoch": 0.07928, "grad_norm": 0.8413926446028052, "learning_rate": 0.003, "loss": 4.1065, "step": 7928 }, { "epoch": 0.07929, "grad_norm": 0.8045322762280693, "learning_rate": 0.003, "loss": 4.0904, "step": 7929 }, { "epoch": 0.0793, "grad_norm": 0.7334642856257453, "learning_rate": 0.003, "loss": 4.0704, "step": 7930 }, { "epoch": 0.07931, "grad_norm": 0.7666083653707652, "learning_rate": 0.003, "loss": 4.1106, "step": 7931 }, { "epoch": 0.07932, "grad_norm": 0.845631543538524, "learning_rate": 0.003, "loss": 4.1118, "step": 7932 }, { "epoch": 0.07933, "grad_norm": 1.1053771346780619, "learning_rate": 0.003, "loss": 4.1051, "step": 7933 }, { "epoch": 0.07934, "grad_norm": 1.0697166718848607, "learning_rate": 0.003, "loss": 4.1234, "step": 7934 }, { "epoch": 0.07935, "grad_norm": 0.780788335506962, "learning_rate": 0.003, "loss": 4.0996, "step": 7935 }, { "epoch": 0.07936, "grad_norm": 0.6719369309095249, "learning_rate": 0.003, "loss": 4.0947, "step": 7936 }, { "epoch": 0.07937, "grad_norm": 0.7129234476370259, "learning_rate": 0.003, "loss": 4.0586, "step": 7937 }, { "epoch": 0.07938, "grad_norm": 0.7579710871562575, "learning_rate": 0.003, "loss": 4.0749, "step": 7938 }, { "epoch": 0.07939, "grad_norm": 0.8414949927890002, "learning_rate": 0.003, "loss": 4.0802, "step": 7939 }, { "epoch": 0.0794, "grad_norm": 0.873475867512058, "learning_rate": 0.003, "loss": 4.0896, "step": 7940 }, { "epoch": 0.07941, "grad_norm": 0.7753297409475207, "learning_rate": 0.003, "loss": 4.0794, "step": 7941 }, { "epoch": 0.07942, "grad_norm": 0.7946685821619549, "learning_rate": 0.003, "loss": 4.073, "step": 7942 }, { "epoch": 0.07943, "grad_norm": 0.9007048298260407, "learning_rate": 0.003, "loss": 4.0814, "step": 7943 }, { "epoch": 0.07944, "grad_norm": 1.071812362091642, "learning_rate": 0.003, "loss": 4.1278, "step": 7944 }, { "epoch": 0.07945, "grad_norm": 1.0693712200612246, "learning_rate": 0.003, "loss": 4.1057, "step": 7945 }, { "epoch": 0.07946, "grad_norm": 0.8795848429684096, "learning_rate": 0.003, "loss": 4.1052, "step": 7946 }, { "epoch": 0.07947, "grad_norm": 0.800378114738204, "learning_rate": 0.003, "loss": 4.1186, "step": 7947 }, { "epoch": 0.07948, "grad_norm": 0.9321588061699683, "learning_rate": 0.003, "loss": 4.1055, "step": 7948 }, { "epoch": 0.07949, "grad_norm": 0.991434583531255, "learning_rate": 0.003, "loss": 4.1241, "step": 7949 }, { "epoch": 0.0795, "grad_norm": 0.9816512046959835, "learning_rate": 0.003, "loss": 4.0794, "step": 7950 }, { "epoch": 0.07951, "grad_norm": 0.7677971687728063, "learning_rate": 0.003, "loss": 4.0774, "step": 7951 }, { "epoch": 0.07952, "grad_norm": 0.8051868602715272, "learning_rate": 0.003, "loss": 4.1082, "step": 7952 }, { "epoch": 0.07953, "grad_norm": 0.9001609812725201, "learning_rate": 0.003, "loss": 4.0829, "step": 7953 }, { "epoch": 0.07954, "grad_norm": 1.1106187952067725, "learning_rate": 0.003, "loss": 4.1309, "step": 7954 }, { "epoch": 0.07955, "grad_norm": 0.9649928236706383, "learning_rate": 0.003, "loss": 4.1288, "step": 7955 }, { "epoch": 0.07956, "grad_norm": 1.1226837346899992, "learning_rate": 0.003, "loss": 4.1112, "step": 7956 }, { "epoch": 0.07957, "grad_norm": 1.1705169222423675, "learning_rate": 0.003, "loss": 4.1027, "step": 7957 }, { "epoch": 0.07958, "grad_norm": 0.8899304842940169, "learning_rate": 0.003, "loss": 4.1091, "step": 7958 }, { "epoch": 0.07959, "grad_norm": 0.7210543206553331, "learning_rate": 0.003, "loss": 4.1089, "step": 7959 }, { "epoch": 0.0796, "grad_norm": 0.8474860169251741, "learning_rate": 0.003, "loss": 4.1099, "step": 7960 }, { "epoch": 0.07961, "grad_norm": 0.8510145192053272, "learning_rate": 0.003, "loss": 4.0956, "step": 7961 }, { "epoch": 0.07962, "grad_norm": 0.9477244770512336, "learning_rate": 0.003, "loss": 4.1214, "step": 7962 }, { "epoch": 0.07963, "grad_norm": 0.9694960562391948, "learning_rate": 0.003, "loss": 4.0859, "step": 7963 }, { "epoch": 0.07964, "grad_norm": 1.0096222672390198, "learning_rate": 0.003, "loss": 4.1071, "step": 7964 }, { "epoch": 0.07965, "grad_norm": 0.9843752313745823, "learning_rate": 0.003, "loss": 4.1184, "step": 7965 }, { "epoch": 0.07966, "grad_norm": 0.9595021912517606, "learning_rate": 0.003, "loss": 4.095, "step": 7966 }, { "epoch": 0.07967, "grad_norm": 1.0016620302362607, "learning_rate": 0.003, "loss": 4.1206, "step": 7967 }, { "epoch": 0.07968, "grad_norm": 0.9784507337152613, "learning_rate": 0.003, "loss": 4.0908, "step": 7968 }, { "epoch": 0.07969, "grad_norm": 0.9283133075078502, "learning_rate": 0.003, "loss": 4.1292, "step": 7969 }, { "epoch": 0.0797, "grad_norm": 0.9854134391658027, "learning_rate": 0.003, "loss": 4.1122, "step": 7970 }, { "epoch": 0.07971, "grad_norm": 1.0491313465828465, "learning_rate": 0.003, "loss": 4.1012, "step": 7971 }, { "epoch": 0.07972, "grad_norm": 1.0232565756589487, "learning_rate": 0.003, "loss": 4.1043, "step": 7972 }, { "epoch": 0.07973, "grad_norm": 0.9389427379261678, "learning_rate": 0.003, "loss": 4.1196, "step": 7973 }, { "epoch": 0.07974, "grad_norm": 0.9196943064318163, "learning_rate": 0.003, "loss": 4.1003, "step": 7974 }, { "epoch": 0.07975, "grad_norm": 0.8757962873318504, "learning_rate": 0.003, "loss": 4.1336, "step": 7975 }, { "epoch": 0.07976, "grad_norm": 0.8223659364709709, "learning_rate": 0.003, "loss": 4.1146, "step": 7976 }, { "epoch": 0.07977, "grad_norm": 0.8719111818094638, "learning_rate": 0.003, "loss": 4.108, "step": 7977 }, { "epoch": 0.07978, "grad_norm": 0.8824234915401362, "learning_rate": 0.003, "loss": 4.1158, "step": 7978 }, { "epoch": 0.07979, "grad_norm": 0.9247964543100683, "learning_rate": 0.003, "loss": 4.099, "step": 7979 }, { "epoch": 0.0798, "grad_norm": 0.9397634918432941, "learning_rate": 0.003, "loss": 4.1299, "step": 7980 }, { "epoch": 0.07981, "grad_norm": 1.0543674923134152, "learning_rate": 0.003, "loss": 4.0836, "step": 7981 }, { "epoch": 0.07982, "grad_norm": 0.9626911947278476, "learning_rate": 0.003, "loss": 4.1173, "step": 7982 }, { "epoch": 0.07983, "grad_norm": 1.0080240905133067, "learning_rate": 0.003, "loss": 4.1443, "step": 7983 }, { "epoch": 0.07984, "grad_norm": 1.2190721528512718, "learning_rate": 0.003, "loss": 4.1319, "step": 7984 }, { "epoch": 0.07985, "grad_norm": 0.8286713865087831, "learning_rate": 0.003, "loss": 4.0826, "step": 7985 }, { "epoch": 0.07986, "grad_norm": 0.8677596520468425, "learning_rate": 0.003, "loss": 4.1251, "step": 7986 }, { "epoch": 0.07987, "grad_norm": 0.839953559356892, "learning_rate": 0.003, "loss": 4.0953, "step": 7987 }, { "epoch": 0.07988, "grad_norm": 0.8616747266554645, "learning_rate": 0.003, "loss": 4.1127, "step": 7988 }, { "epoch": 0.07989, "grad_norm": 0.8768670980849567, "learning_rate": 0.003, "loss": 4.0857, "step": 7989 }, { "epoch": 0.0799, "grad_norm": 1.060320785667829, "learning_rate": 0.003, "loss": 4.1073, "step": 7990 }, { "epoch": 0.07991, "grad_norm": 1.2397246116873737, "learning_rate": 0.003, "loss": 4.1086, "step": 7991 }, { "epoch": 0.07992, "grad_norm": 0.8564943829168591, "learning_rate": 0.003, "loss": 4.1067, "step": 7992 }, { "epoch": 0.07993, "grad_norm": 0.6329653057644131, "learning_rate": 0.003, "loss": 4.0928, "step": 7993 }, { "epoch": 0.07994, "grad_norm": 0.5495879883814838, "learning_rate": 0.003, "loss": 4.1109, "step": 7994 }, { "epoch": 0.07995, "grad_norm": 0.6418923191491803, "learning_rate": 0.003, "loss": 4.1091, "step": 7995 }, { "epoch": 0.07996, "grad_norm": 0.6360153656004878, "learning_rate": 0.003, "loss": 4.1003, "step": 7996 }, { "epoch": 0.07997, "grad_norm": 0.6967070433022678, "learning_rate": 0.003, "loss": 4.0979, "step": 7997 }, { "epoch": 0.07998, "grad_norm": 0.8401425534763325, "learning_rate": 0.003, "loss": 4.1299, "step": 7998 }, { "epoch": 0.07999, "grad_norm": 0.9140910976472046, "learning_rate": 0.003, "loss": 4.0999, "step": 7999 }, { "epoch": 0.08, "grad_norm": 0.8341075710754534, "learning_rate": 0.003, "loss": 4.1081, "step": 8000 }, { "epoch": 0.08001, "grad_norm": 0.6762636347053551, "learning_rate": 0.003, "loss": 4.0902, "step": 8001 }, { "epoch": 0.08002, "grad_norm": 0.7219182957365917, "learning_rate": 0.003, "loss": 4.0817, "step": 8002 }, { "epoch": 0.08003, "grad_norm": 0.7307059911401975, "learning_rate": 0.003, "loss": 4.1094, "step": 8003 }, { "epoch": 0.08004, "grad_norm": 0.7690134766684371, "learning_rate": 0.003, "loss": 4.0892, "step": 8004 }, { "epoch": 0.08005, "grad_norm": 0.7032812824393433, "learning_rate": 0.003, "loss": 4.1121, "step": 8005 }, { "epoch": 0.08006, "grad_norm": 0.8083021905718699, "learning_rate": 0.003, "loss": 4.0911, "step": 8006 }, { "epoch": 0.08007, "grad_norm": 1.0155775443805966, "learning_rate": 0.003, "loss": 4.0881, "step": 8007 }, { "epoch": 0.08008, "grad_norm": 1.2028469175430232, "learning_rate": 0.003, "loss": 4.0796, "step": 8008 }, { "epoch": 0.08009, "grad_norm": 0.7132237916366507, "learning_rate": 0.003, "loss": 4.0904, "step": 8009 }, { "epoch": 0.0801, "grad_norm": 0.5404697854674432, "learning_rate": 0.003, "loss": 4.0813, "step": 8010 }, { "epoch": 0.08011, "grad_norm": 0.5947072885434764, "learning_rate": 0.003, "loss": 4.0933, "step": 8011 }, { "epoch": 0.08012, "grad_norm": 0.7363521666233862, "learning_rate": 0.003, "loss": 4.094, "step": 8012 }, { "epoch": 0.08013, "grad_norm": 1.0063376322547504, "learning_rate": 0.003, "loss": 4.1003, "step": 8013 }, { "epoch": 0.08014, "grad_norm": 1.1160436669300626, "learning_rate": 0.003, "loss": 4.09, "step": 8014 }, { "epoch": 0.08015, "grad_norm": 0.7989320079754313, "learning_rate": 0.003, "loss": 4.1159, "step": 8015 }, { "epoch": 0.08016, "grad_norm": 0.7682760666118215, "learning_rate": 0.003, "loss": 4.0721, "step": 8016 }, { "epoch": 0.08017, "grad_norm": 0.7989671209318877, "learning_rate": 0.003, "loss": 4.0802, "step": 8017 }, { "epoch": 0.08018, "grad_norm": 0.8900732101990633, "learning_rate": 0.003, "loss": 4.0946, "step": 8018 }, { "epoch": 0.08019, "grad_norm": 0.7809140627865704, "learning_rate": 0.003, "loss": 4.1184, "step": 8019 }, { "epoch": 0.0802, "grad_norm": 0.8370791311704688, "learning_rate": 0.003, "loss": 4.0851, "step": 8020 }, { "epoch": 0.08021, "grad_norm": 0.7244688057921231, "learning_rate": 0.003, "loss": 4.0569, "step": 8021 }, { "epoch": 0.08022, "grad_norm": 0.7032261987394863, "learning_rate": 0.003, "loss": 4.0991, "step": 8022 }, { "epoch": 0.08023, "grad_norm": 0.7746372857782606, "learning_rate": 0.003, "loss": 4.0878, "step": 8023 }, { "epoch": 0.08024, "grad_norm": 0.980871731492932, "learning_rate": 0.003, "loss": 4.1094, "step": 8024 }, { "epoch": 0.08025, "grad_norm": 1.1967229295248476, "learning_rate": 0.003, "loss": 4.0813, "step": 8025 }, { "epoch": 0.08026, "grad_norm": 0.8874537036909782, "learning_rate": 0.003, "loss": 4.0908, "step": 8026 }, { "epoch": 0.08027, "grad_norm": 0.7886044707135144, "learning_rate": 0.003, "loss": 4.0741, "step": 8027 }, { "epoch": 0.08028, "grad_norm": 0.7195647660407796, "learning_rate": 0.003, "loss": 4.0861, "step": 8028 }, { "epoch": 0.08029, "grad_norm": 0.8677533765236772, "learning_rate": 0.003, "loss": 4.1126, "step": 8029 }, { "epoch": 0.0803, "grad_norm": 1.006614001349766, "learning_rate": 0.003, "loss": 4.0846, "step": 8030 }, { "epoch": 0.08031, "grad_norm": 1.1001052865444716, "learning_rate": 0.003, "loss": 4.1064, "step": 8031 }, { "epoch": 0.08032, "grad_norm": 0.9468984767405869, "learning_rate": 0.003, "loss": 4.0902, "step": 8032 }, { "epoch": 0.08033, "grad_norm": 0.7392931914642568, "learning_rate": 0.003, "loss": 4.1144, "step": 8033 }, { "epoch": 0.08034, "grad_norm": 0.8147657234401885, "learning_rate": 0.003, "loss": 4.1125, "step": 8034 }, { "epoch": 0.08035, "grad_norm": 0.9473798784474897, "learning_rate": 0.003, "loss": 4.1259, "step": 8035 }, { "epoch": 0.08036, "grad_norm": 0.9663624382781947, "learning_rate": 0.003, "loss": 4.1423, "step": 8036 }, { "epoch": 0.08037, "grad_norm": 0.9851396854632779, "learning_rate": 0.003, "loss": 4.1002, "step": 8037 }, { "epoch": 0.08038, "grad_norm": 0.9226417242283115, "learning_rate": 0.003, "loss": 4.0977, "step": 8038 }, { "epoch": 0.08039, "grad_norm": 0.9417189662091079, "learning_rate": 0.003, "loss": 4.1065, "step": 8039 }, { "epoch": 0.0804, "grad_norm": 1.1112977827509558, "learning_rate": 0.003, "loss": 4.1227, "step": 8040 }, { "epoch": 0.08041, "grad_norm": 0.7739549217316514, "learning_rate": 0.003, "loss": 4.1207, "step": 8041 }, { "epoch": 0.08042, "grad_norm": 0.6673108385892449, "learning_rate": 0.003, "loss": 4.1019, "step": 8042 }, { "epoch": 0.08043, "grad_norm": 0.7412544947569171, "learning_rate": 0.003, "loss": 4.0956, "step": 8043 }, { "epoch": 0.08044, "grad_norm": 0.7934062901474336, "learning_rate": 0.003, "loss": 4.0792, "step": 8044 }, { "epoch": 0.08045, "grad_norm": 0.939659696654444, "learning_rate": 0.003, "loss": 4.0885, "step": 8045 }, { "epoch": 0.08046, "grad_norm": 1.0779216512774807, "learning_rate": 0.003, "loss": 4.1042, "step": 8046 }, { "epoch": 0.08047, "grad_norm": 1.0583855488249307, "learning_rate": 0.003, "loss": 4.101, "step": 8047 }, { "epoch": 0.08048, "grad_norm": 1.010247891168986, "learning_rate": 0.003, "loss": 4.111, "step": 8048 }, { "epoch": 0.08049, "grad_norm": 0.8559934548803988, "learning_rate": 0.003, "loss": 4.091, "step": 8049 }, { "epoch": 0.0805, "grad_norm": 0.785962682018867, "learning_rate": 0.003, "loss": 4.1154, "step": 8050 }, { "epoch": 0.08051, "grad_norm": 0.8243401452192997, "learning_rate": 0.003, "loss": 4.0936, "step": 8051 }, { "epoch": 0.08052, "grad_norm": 0.8118236172127069, "learning_rate": 0.003, "loss": 4.0744, "step": 8052 }, { "epoch": 0.08053, "grad_norm": 0.7919496704600435, "learning_rate": 0.003, "loss": 4.1142, "step": 8053 }, { "epoch": 0.08054, "grad_norm": 0.7931762362063768, "learning_rate": 0.003, "loss": 4.0852, "step": 8054 }, { "epoch": 0.08055, "grad_norm": 0.8667155845252095, "learning_rate": 0.003, "loss": 4.0678, "step": 8055 }, { "epoch": 0.08056, "grad_norm": 1.0124039809076317, "learning_rate": 0.003, "loss": 4.1054, "step": 8056 }, { "epoch": 0.08057, "grad_norm": 1.1336788875699602, "learning_rate": 0.003, "loss": 4.1141, "step": 8057 }, { "epoch": 0.08058, "grad_norm": 0.8138250876786765, "learning_rate": 0.003, "loss": 4.1001, "step": 8058 }, { "epoch": 0.08059, "grad_norm": 0.7527191665858058, "learning_rate": 0.003, "loss": 4.0797, "step": 8059 }, { "epoch": 0.0806, "grad_norm": 0.8398808621055186, "learning_rate": 0.003, "loss": 4.0922, "step": 8060 }, { "epoch": 0.08061, "grad_norm": 0.825612767978505, "learning_rate": 0.003, "loss": 4.0782, "step": 8061 }, { "epoch": 0.08062, "grad_norm": 0.9747793071523012, "learning_rate": 0.003, "loss": 4.11, "step": 8062 }, { "epoch": 0.08063, "grad_norm": 1.1323711262853373, "learning_rate": 0.003, "loss": 4.1409, "step": 8063 }, { "epoch": 0.08064, "grad_norm": 0.9414405685374689, "learning_rate": 0.003, "loss": 4.0977, "step": 8064 }, { "epoch": 0.08065, "grad_norm": 0.877182385950829, "learning_rate": 0.003, "loss": 4.1089, "step": 8065 }, { "epoch": 0.08066, "grad_norm": 0.9823013557480436, "learning_rate": 0.003, "loss": 4.1285, "step": 8066 }, { "epoch": 0.08067, "grad_norm": 1.0076994124987724, "learning_rate": 0.003, "loss": 4.1172, "step": 8067 }, { "epoch": 0.08068, "grad_norm": 0.9543931097825955, "learning_rate": 0.003, "loss": 4.1151, "step": 8068 }, { "epoch": 0.08069, "grad_norm": 0.860639718383474, "learning_rate": 0.003, "loss": 4.1147, "step": 8069 }, { "epoch": 0.0807, "grad_norm": 0.8988959057627193, "learning_rate": 0.003, "loss": 4.1014, "step": 8070 }, { "epoch": 0.08071, "grad_norm": 0.9534116382487039, "learning_rate": 0.003, "loss": 4.1059, "step": 8071 }, { "epoch": 0.08072, "grad_norm": 0.8660070014644025, "learning_rate": 0.003, "loss": 4.0918, "step": 8072 }, { "epoch": 0.08073, "grad_norm": 0.8206611525482264, "learning_rate": 0.003, "loss": 4.0941, "step": 8073 }, { "epoch": 0.08074, "grad_norm": 0.965813130304323, "learning_rate": 0.003, "loss": 4.084, "step": 8074 }, { "epoch": 0.08075, "grad_norm": 1.1442526973314016, "learning_rate": 0.003, "loss": 4.1309, "step": 8075 }, { "epoch": 0.08076, "grad_norm": 0.7971529185891382, "learning_rate": 0.003, "loss": 4.0792, "step": 8076 }, { "epoch": 0.08077, "grad_norm": 0.6441126050923031, "learning_rate": 0.003, "loss": 4.1244, "step": 8077 }, { "epoch": 0.08078, "grad_norm": 0.711624279954515, "learning_rate": 0.003, "loss": 4.111, "step": 8078 }, { "epoch": 0.08079, "grad_norm": 0.8468946223392866, "learning_rate": 0.003, "loss": 4.1025, "step": 8079 }, { "epoch": 0.0808, "grad_norm": 0.955200727057764, "learning_rate": 0.003, "loss": 4.0951, "step": 8080 }, { "epoch": 0.08081, "grad_norm": 1.153055759299516, "learning_rate": 0.003, "loss": 4.1146, "step": 8081 }, { "epoch": 0.08082, "grad_norm": 0.7784581084262547, "learning_rate": 0.003, "loss": 4.0699, "step": 8082 }, { "epoch": 0.08083, "grad_norm": 0.7994938331488759, "learning_rate": 0.003, "loss": 4.0935, "step": 8083 }, { "epoch": 0.08084, "grad_norm": 0.7985218153568882, "learning_rate": 0.003, "loss": 4.0949, "step": 8084 }, { "epoch": 0.08085, "grad_norm": 0.7650441422106858, "learning_rate": 0.003, "loss": 4.1222, "step": 8085 }, { "epoch": 0.08086, "grad_norm": 0.9072526574743619, "learning_rate": 0.003, "loss": 4.0752, "step": 8086 }, { "epoch": 0.08087, "grad_norm": 1.0019552972566608, "learning_rate": 0.003, "loss": 4.0921, "step": 8087 }, { "epoch": 0.08088, "grad_norm": 1.0698186825083669, "learning_rate": 0.003, "loss": 4.1086, "step": 8088 }, { "epoch": 0.08089, "grad_norm": 0.8487971751746695, "learning_rate": 0.003, "loss": 4.0703, "step": 8089 }, { "epoch": 0.0809, "grad_norm": 0.7375163925623175, "learning_rate": 0.003, "loss": 4.096, "step": 8090 }, { "epoch": 0.08091, "grad_norm": 0.8077965976484879, "learning_rate": 0.003, "loss": 4.1169, "step": 8091 }, { "epoch": 0.08092, "grad_norm": 0.8806655777433894, "learning_rate": 0.003, "loss": 4.118, "step": 8092 }, { "epoch": 0.08093, "grad_norm": 0.8846095215059361, "learning_rate": 0.003, "loss": 4.0834, "step": 8093 }, { "epoch": 0.08094, "grad_norm": 1.0263143758049644, "learning_rate": 0.003, "loss": 4.0865, "step": 8094 }, { "epoch": 0.08095, "grad_norm": 0.9975182671338138, "learning_rate": 0.003, "loss": 4.0806, "step": 8095 }, { "epoch": 0.08096, "grad_norm": 0.9243866628279602, "learning_rate": 0.003, "loss": 4.1112, "step": 8096 }, { "epoch": 0.08097, "grad_norm": 0.764690705974969, "learning_rate": 0.003, "loss": 4.0992, "step": 8097 }, { "epoch": 0.08098, "grad_norm": 0.8123826720903898, "learning_rate": 0.003, "loss": 4.1283, "step": 8098 }, { "epoch": 0.08099, "grad_norm": 0.8307382789621239, "learning_rate": 0.003, "loss": 4.1016, "step": 8099 }, { "epoch": 0.081, "grad_norm": 0.683295591217391, "learning_rate": 0.003, "loss": 4.1022, "step": 8100 }, { "epoch": 0.08101, "grad_norm": 0.762072793349242, "learning_rate": 0.003, "loss": 4.1253, "step": 8101 }, { "epoch": 0.08102, "grad_norm": 0.8417363332108121, "learning_rate": 0.003, "loss": 4.1029, "step": 8102 }, { "epoch": 0.08103, "grad_norm": 1.0026749696403685, "learning_rate": 0.003, "loss": 4.1107, "step": 8103 }, { "epoch": 0.08104, "grad_norm": 1.3055013665970854, "learning_rate": 0.003, "loss": 4.1224, "step": 8104 }, { "epoch": 0.08105, "grad_norm": 0.7284350470055041, "learning_rate": 0.003, "loss": 4.1211, "step": 8105 }, { "epoch": 0.08106, "grad_norm": 0.7042488841506047, "learning_rate": 0.003, "loss": 4.1078, "step": 8106 }, { "epoch": 0.08107, "grad_norm": 0.6662191545969933, "learning_rate": 0.003, "loss": 4.1052, "step": 8107 }, { "epoch": 0.08108, "grad_norm": 0.7428305715535388, "learning_rate": 0.003, "loss": 4.0536, "step": 8108 }, { "epoch": 0.08109, "grad_norm": 0.7934277909736256, "learning_rate": 0.003, "loss": 4.0994, "step": 8109 }, { "epoch": 0.0811, "grad_norm": 0.8818863948526912, "learning_rate": 0.003, "loss": 4.1196, "step": 8110 }, { "epoch": 0.08111, "grad_norm": 1.0271099724103216, "learning_rate": 0.003, "loss": 4.115, "step": 8111 }, { "epoch": 0.08112, "grad_norm": 1.2432013981414785, "learning_rate": 0.003, "loss": 4.0916, "step": 8112 }, { "epoch": 0.08113, "grad_norm": 0.8995583170658074, "learning_rate": 0.003, "loss": 4.101, "step": 8113 }, { "epoch": 0.08114, "grad_norm": 1.0954559019247962, "learning_rate": 0.003, "loss": 4.1321, "step": 8114 }, { "epoch": 0.08115, "grad_norm": 1.261265478089719, "learning_rate": 0.003, "loss": 4.1249, "step": 8115 }, { "epoch": 0.08116, "grad_norm": 0.8815832900165901, "learning_rate": 0.003, "loss": 4.1289, "step": 8116 }, { "epoch": 0.08117, "grad_norm": 0.9087111097091439, "learning_rate": 0.003, "loss": 4.1278, "step": 8117 }, { "epoch": 0.08118, "grad_norm": 1.010760403401548, "learning_rate": 0.003, "loss": 4.1141, "step": 8118 }, { "epoch": 0.08119, "grad_norm": 1.056578294217066, "learning_rate": 0.003, "loss": 4.1093, "step": 8119 }, { "epoch": 0.0812, "grad_norm": 0.9182887201534551, "learning_rate": 0.003, "loss": 4.0887, "step": 8120 }, { "epoch": 0.08121, "grad_norm": 0.9041701432881559, "learning_rate": 0.003, "loss": 4.1182, "step": 8121 }, { "epoch": 0.08122, "grad_norm": 0.8561224947474484, "learning_rate": 0.003, "loss": 4.1108, "step": 8122 }, { "epoch": 0.08123, "grad_norm": 0.7614632039050264, "learning_rate": 0.003, "loss": 4.1016, "step": 8123 }, { "epoch": 0.08124, "grad_norm": 0.7907531416628623, "learning_rate": 0.003, "loss": 4.1148, "step": 8124 }, { "epoch": 0.08125, "grad_norm": 0.7300964775523906, "learning_rate": 0.003, "loss": 4.1313, "step": 8125 }, { "epoch": 0.08126, "grad_norm": 0.827569261113533, "learning_rate": 0.003, "loss": 4.0894, "step": 8126 }, { "epoch": 0.08127, "grad_norm": 1.0339006016723764, "learning_rate": 0.003, "loss": 4.0945, "step": 8127 }, { "epoch": 0.08128, "grad_norm": 1.2837248118220423, "learning_rate": 0.003, "loss": 4.1243, "step": 8128 }, { "epoch": 0.08129, "grad_norm": 0.831463644855417, "learning_rate": 0.003, "loss": 4.0863, "step": 8129 }, { "epoch": 0.0813, "grad_norm": 0.7313691024451621, "learning_rate": 0.003, "loss": 4.0842, "step": 8130 }, { "epoch": 0.08131, "grad_norm": 0.7449363031040399, "learning_rate": 0.003, "loss": 4.1078, "step": 8131 }, { "epoch": 0.08132, "grad_norm": 0.8923893884805761, "learning_rate": 0.003, "loss": 4.1007, "step": 8132 }, { "epoch": 0.08133, "grad_norm": 1.00858302579857, "learning_rate": 0.003, "loss": 4.0976, "step": 8133 }, { "epoch": 0.08134, "grad_norm": 1.0839591261263513, "learning_rate": 0.003, "loss": 4.0935, "step": 8134 }, { "epoch": 0.08135, "grad_norm": 0.9551176992279036, "learning_rate": 0.003, "loss": 4.0953, "step": 8135 }, { "epoch": 0.08136, "grad_norm": 1.1427717477515151, "learning_rate": 0.003, "loss": 4.1348, "step": 8136 }, { "epoch": 0.08137, "grad_norm": 1.1608247537070675, "learning_rate": 0.003, "loss": 4.0727, "step": 8137 }, { "epoch": 0.08138, "grad_norm": 0.8036743748727992, "learning_rate": 0.003, "loss": 4.0979, "step": 8138 }, { "epoch": 0.08139, "grad_norm": 0.7123613175658509, "learning_rate": 0.003, "loss": 4.1044, "step": 8139 }, { "epoch": 0.0814, "grad_norm": 0.7693281819705117, "learning_rate": 0.003, "loss": 4.1192, "step": 8140 }, { "epoch": 0.08141, "grad_norm": 0.6727495096734566, "learning_rate": 0.003, "loss": 4.1112, "step": 8141 }, { "epoch": 0.08142, "grad_norm": 0.6845081706658788, "learning_rate": 0.003, "loss": 4.1214, "step": 8142 }, { "epoch": 0.08143, "grad_norm": 0.7460442553490216, "learning_rate": 0.003, "loss": 4.1158, "step": 8143 }, { "epoch": 0.08144, "grad_norm": 0.8245361382413854, "learning_rate": 0.003, "loss": 4.0854, "step": 8144 }, { "epoch": 0.08145, "grad_norm": 0.8230934857490788, "learning_rate": 0.003, "loss": 4.0918, "step": 8145 }, { "epoch": 0.08146, "grad_norm": 0.7712994219712946, "learning_rate": 0.003, "loss": 4.1229, "step": 8146 }, { "epoch": 0.08147, "grad_norm": 0.7460865071832761, "learning_rate": 0.003, "loss": 4.1233, "step": 8147 }, { "epoch": 0.08148, "grad_norm": 0.7735141713283167, "learning_rate": 0.003, "loss": 4.0783, "step": 8148 }, { "epoch": 0.08149, "grad_norm": 0.7896814609713348, "learning_rate": 0.003, "loss": 4.088, "step": 8149 }, { "epoch": 0.0815, "grad_norm": 0.7747886996009599, "learning_rate": 0.003, "loss": 4.0794, "step": 8150 }, { "epoch": 0.08151, "grad_norm": 0.8866325001517937, "learning_rate": 0.003, "loss": 4.109, "step": 8151 }, { "epoch": 0.08152, "grad_norm": 0.9586498492742512, "learning_rate": 0.003, "loss": 4.1149, "step": 8152 }, { "epoch": 0.08153, "grad_norm": 0.8759445141936913, "learning_rate": 0.003, "loss": 4.0909, "step": 8153 }, { "epoch": 0.08154, "grad_norm": 0.8213012411475468, "learning_rate": 0.003, "loss": 4.104, "step": 8154 }, { "epoch": 0.08155, "grad_norm": 0.7343561260089444, "learning_rate": 0.003, "loss": 4.1183, "step": 8155 }, { "epoch": 0.08156, "grad_norm": 0.7055042559161329, "learning_rate": 0.003, "loss": 4.1006, "step": 8156 }, { "epoch": 0.08157, "grad_norm": 0.6906999012739224, "learning_rate": 0.003, "loss": 4.0476, "step": 8157 }, { "epoch": 0.08158, "grad_norm": 0.6797037737186805, "learning_rate": 0.003, "loss": 4.1267, "step": 8158 }, { "epoch": 0.08159, "grad_norm": 0.6440815279956414, "learning_rate": 0.003, "loss": 4.1222, "step": 8159 }, { "epoch": 0.0816, "grad_norm": 0.6557581564589431, "learning_rate": 0.003, "loss": 4.0944, "step": 8160 }, { "epoch": 0.08161, "grad_norm": 0.6961649850170736, "learning_rate": 0.003, "loss": 4.1012, "step": 8161 }, { "epoch": 0.08162, "grad_norm": 0.7152753880484393, "learning_rate": 0.003, "loss": 4.1055, "step": 8162 }, { "epoch": 0.08163, "grad_norm": 0.7444333284957504, "learning_rate": 0.003, "loss": 4.0603, "step": 8163 }, { "epoch": 0.08164, "grad_norm": 0.8297049679085736, "learning_rate": 0.003, "loss": 4.0913, "step": 8164 }, { "epoch": 0.08165, "grad_norm": 0.9254543973261257, "learning_rate": 0.003, "loss": 4.0958, "step": 8165 }, { "epoch": 0.08166, "grad_norm": 1.135495751545436, "learning_rate": 0.003, "loss": 4.1029, "step": 8166 }, { "epoch": 0.08167, "grad_norm": 1.003264644227776, "learning_rate": 0.003, "loss": 4.1002, "step": 8167 }, { "epoch": 0.08168, "grad_norm": 1.3151569568484487, "learning_rate": 0.003, "loss": 4.0823, "step": 8168 }, { "epoch": 0.08169, "grad_norm": 0.7417217462858907, "learning_rate": 0.003, "loss": 4.1065, "step": 8169 }, { "epoch": 0.0817, "grad_norm": 0.6967246388224767, "learning_rate": 0.003, "loss": 4.1001, "step": 8170 }, { "epoch": 0.08171, "grad_norm": 0.8159574604205326, "learning_rate": 0.003, "loss": 4.0882, "step": 8171 }, { "epoch": 0.08172, "grad_norm": 0.8338929792539445, "learning_rate": 0.003, "loss": 4.1141, "step": 8172 }, { "epoch": 0.08173, "grad_norm": 0.98094890794689, "learning_rate": 0.003, "loss": 4.141, "step": 8173 }, { "epoch": 0.08174, "grad_norm": 1.0004268057358918, "learning_rate": 0.003, "loss": 4.1362, "step": 8174 }, { "epoch": 0.08175, "grad_norm": 1.1607043587315327, "learning_rate": 0.003, "loss": 4.105, "step": 8175 }, { "epoch": 0.08176, "grad_norm": 0.9605737836969909, "learning_rate": 0.003, "loss": 4.0768, "step": 8176 }, { "epoch": 0.08177, "grad_norm": 0.8834558793913779, "learning_rate": 0.003, "loss": 4.0903, "step": 8177 }, { "epoch": 0.08178, "grad_norm": 0.9463401693916579, "learning_rate": 0.003, "loss": 4.1009, "step": 8178 }, { "epoch": 0.08179, "grad_norm": 0.9414403348160015, "learning_rate": 0.003, "loss": 4.1263, "step": 8179 }, { "epoch": 0.0818, "grad_norm": 0.9984246787560999, "learning_rate": 0.003, "loss": 4.1026, "step": 8180 }, { "epoch": 0.08181, "grad_norm": 1.1775258869822263, "learning_rate": 0.003, "loss": 4.0968, "step": 8181 }, { "epoch": 0.08182, "grad_norm": 0.8435168559607914, "learning_rate": 0.003, "loss": 4.1339, "step": 8182 }, { "epoch": 0.08183, "grad_norm": 0.8045360298546768, "learning_rate": 0.003, "loss": 4.0901, "step": 8183 }, { "epoch": 0.08184, "grad_norm": 0.7940871155944753, "learning_rate": 0.003, "loss": 4.0869, "step": 8184 }, { "epoch": 0.08185, "grad_norm": 0.7578411281597589, "learning_rate": 0.003, "loss": 4.1063, "step": 8185 }, { "epoch": 0.08186, "grad_norm": 0.7267884989595176, "learning_rate": 0.003, "loss": 4.1003, "step": 8186 }, { "epoch": 0.08187, "grad_norm": 0.9028879506424374, "learning_rate": 0.003, "loss": 4.0746, "step": 8187 }, { "epoch": 0.08188, "grad_norm": 1.0836114502579879, "learning_rate": 0.003, "loss": 4.1278, "step": 8188 }, { "epoch": 0.08189, "grad_norm": 1.2100347103121765, "learning_rate": 0.003, "loss": 4.1286, "step": 8189 }, { "epoch": 0.0819, "grad_norm": 0.7309510071375395, "learning_rate": 0.003, "loss": 4.0928, "step": 8190 }, { "epoch": 0.08191, "grad_norm": 0.6311549475723786, "learning_rate": 0.003, "loss": 4.0938, "step": 8191 }, { "epoch": 0.08192, "grad_norm": 0.7435668383560015, "learning_rate": 0.003, "loss": 4.0574, "step": 8192 }, { "epoch": 0.08193, "grad_norm": 0.8580955152495726, "learning_rate": 0.003, "loss": 4.1014, "step": 8193 }, { "epoch": 0.08194, "grad_norm": 0.8737720108493068, "learning_rate": 0.003, "loss": 4.1122, "step": 8194 }, { "epoch": 0.08195, "grad_norm": 0.7368558728662361, "learning_rate": 0.003, "loss": 4.0903, "step": 8195 }, { "epoch": 0.08196, "grad_norm": 0.648364175407553, "learning_rate": 0.003, "loss": 4.1175, "step": 8196 }, { "epoch": 0.08197, "grad_norm": 0.7511314273266356, "learning_rate": 0.003, "loss": 4.1014, "step": 8197 }, { "epoch": 0.08198, "grad_norm": 0.9036375613694632, "learning_rate": 0.003, "loss": 4.0962, "step": 8198 }, { "epoch": 0.08199, "grad_norm": 1.0500737128901654, "learning_rate": 0.003, "loss": 4.112, "step": 8199 }, { "epoch": 0.082, "grad_norm": 1.1226888117313103, "learning_rate": 0.003, "loss": 4.0843, "step": 8200 }, { "epoch": 0.08201, "grad_norm": 0.9559312403994046, "learning_rate": 0.003, "loss": 4.0925, "step": 8201 }, { "epoch": 0.08202, "grad_norm": 0.9864224976849971, "learning_rate": 0.003, "loss": 4.0857, "step": 8202 }, { "epoch": 0.08203, "grad_norm": 1.0665254798248887, "learning_rate": 0.003, "loss": 4.1128, "step": 8203 }, { "epoch": 0.08204, "grad_norm": 0.9332708500174354, "learning_rate": 0.003, "loss": 4.0993, "step": 8204 }, { "epoch": 0.08205, "grad_norm": 0.9114402954224472, "learning_rate": 0.003, "loss": 4.095, "step": 8205 }, { "epoch": 0.08206, "grad_norm": 0.9578612535805147, "learning_rate": 0.003, "loss": 4.1306, "step": 8206 }, { "epoch": 0.08207, "grad_norm": 0.8491904917863199, "learning_rate": 0.003, "loss": 4.1012, "step": 8207 }, { "epoch": 0.08208, "grad_norm": 0.6965883444740882, "learning_rate": 0.003, "loss": 4.0969, "step": 8208 }, { "epoch": 0.08209, "grad_norm": 0.7953053984407373, "learning_rate": 0.003, "loss": 4.1146, "step": 8209 }, { "epoch": 0.0821, "grad_norm": 0.7485874491455442, "learning_rate": 0.003, "loss": 4.0998, "step": 8210 }, { "epoch": 0.08211, "grad_norm": 0.6959203077939378, "learning_rate": 0.003, "loss": 4.0982, "step": 8211 }, { "epoch": 0.08212, "grad_norm": 0.8449995767744634, "learning_rate": 0.003, "loss": 4.0973, "step": 8212 }, { "epoch": 0.08213, "grad_norm": 1.1015900099708935, "learning_rate": 0.003, "loss": 4.0882, "step": 8213 }, { "epoch": 0.08214, "grad_norm": 1.0315791177365499, "learning_rate": 0.003, "loss": 4.1219, "step": 8214 }, { "epoch": 0.08215, "grad_norm": 0.9380557040904997, "learning_rate": 0.003, "loss": 4.1229, "step": 8215 }, { "epoch": 0.08216, "grad_norm": 0.8923877134844612, "learning_rate": 0.003, "loss": 4.1007, "step": 8216 }, { "epoch": 0.08217, "grad_norm": 0.8575588533424483, "learning_rate": 0.003, "loss": 4.1298, "step": 8217 }, { "epoch": 0.08218, "grad_norm": 0.8232596706369313, "learning_rate": 0.003, "loss": 4.0913, "step": 8218 }, { "epoch": 0.08219, "grad_norm": 0.9081979548545349, "learning_rate": 0.003, "loss": 4.091, "step": 8219 }, { "epoch": 0.0822, "grad_norm": 0.8155798907038242, "learning_rate": 0.003, "loss": 4.0994, "step": 8220 }, { "epoch": 0.08221, "grad_norm": 0.8942358339851308, "learning_rate": 0.003, "loss": 4.1209, "step": 8221 }, { "epoch": 0.08222, "grad_norm": 1.1787055400411643, "learning_rate": 0.003, "loss": 4.1132, "step": 8222 }, { "epoch": 0.08223, "grad_norm": 0.9509702723098798, "learning_rate": 0.003, "loss": 4.1173, "step": 8223 }, { "epoch": 0.08224, "grad_norm": 0.888798577292018, "learning_rate": 0.003, "loss": 4.1138, "step": 8224 }, { "epoch": 0.08225, "grad_norm": 0.8859713724373705, "learning_rate": 0.003, "loss": 4.1253, "step": 8225 }, { "epoch": 0.08226, "grad_norm": 0.9453256114459335, "learning_rate": 0.003, "loss": 4.0908, "step": 8226 }, { "epoch": 0.08227, "grad_norm": 1.0285023285363262, "learning_rate": 0.003, "loss": 4.1358, "step": 8227 }, { "epoch": 0.08228, "grad_norm": 1.0441982441165125, "learning_rate": 0.003, "loss": 4.1232, "step": 8228 }, { "epoch": 0.08229, "grad_norm": 0.9285745096914537, "learning_rate": 0.003, "loss": 4.0867, "step": 8229 }, { "epoch": 0.0823, "grad_norm": 0.8612854507864894, "learning_rate": 0.003, "loss": 4.1279, "step": 8230 }, { "epoch": 0.08231, "grad_norm": 0.9539626109153166, "learning_rate": 0.003, "loss": 4.083, "step": 8231 }, { "epoch": 0.08232, "grad_norm": 1.1127495391486235, "learning_rate": 0.003, "loss": 4.0966, "step": 8232 }, { "epoch": 0.08233, "grad_norm": 0.9309241874844985, "learning_rate": 0.003, "loss": 4.1035, "step": 8233 }, { "epoch": 0.08234, "grad_norm": 0.8586480291175957, "learning_rate": 0.003, "loss": 4.1162, "step": 8234 }, { "epoch": 0.08235, "grad_norm": 0.8943116440399151, "learning_rate": 0.003, "loss": 4.136, "step": 8235 }, { "epoch": 0.08236, "grad_norm": 0.8765191739384368, "learning_rate": 0.003, "loss": 4.0921, "step": 8236 }, { "epoch": 0.08237, "grad_norm": 0.8771079726635557, "learning_rate": 0.003, "loss": 4.1005, "step": 8237 }, { "epoch": 0.08238, "grad_norm": 0.983672922309625, "learning_rate": 0.003, "loss": 4.1149, "step": 8238 }, { "epoch": 0.08239, "grad_norm": 1.0592369601456073, "learning_rate": 0.003, "loss": 4.0891, "step": 8239 }, { "epoch": 0.0824, "grad_norm": 1.0147386504894589, "learning_rate": 0.003, "loss": 4.1256, "step": 8240 }, { "epoch": 0.08241, "grad_norm": 0.8937176926338467, "learning_rate": 0.003, "loss": 4.1036, "step": 8241 }, { "epoch": 0.08242, "grad_norm": 0.8211844129412678, "learning_rate": 0.003, "loss": 4.0748, "step": 8242 }, { "epoch": 0.08243, "grad_norm": 0.941400186703893, "learning_rate": 0.003, "loss": 4.1256, "step": 8243 }, { "epoch": 0.08244, "grad_norm": 1.147358753730803, "learning_rate": 0.003, "loss": 4.1014, "step": 8244 }, { "epoch": 0.08245, "grad_norm": 1.0164893306751752, "learning_rate": 0.003, "loss": 4.0998, "step": 8245 }, { "epoch": 0.08246, "grad_norm": 0.8548549249810995, "learning_rate": 0.003, "loss": 4.0985, "step": 8246 }, { "epoch": 0.08247, "grad_norm": 0.7706214458931752, "learning_rate": 0.003, "loss": 4.1073, "step": 8247 }, { "epoch": 0.08248, "grad_norm": 0.8475424364775245, "learning_rate": 0.003, "loss": 4.0948, "step": 8248 }, { "epoch": 0.08249, "grad_norm": 0.8469414618345413, "learning_rate": 0.003, "loss": 4.1146, "step": 8249 }, { "epoch": 0.0825, "grad_norm": 0.8229434139477997, "learning_rate": 0.003, "loss": 4.0883, "step": 8250 }, { "epoch": 0.08251, "grad_norm": 0.8011704320030776, "learning_rate": 0.003, "loss": 4.1089, "step": 8251 }, { "epoch": 0.08252, "grad_norm": 0.6866093619498361, "learning_rate": 0.003, "loss": 4.1101, "step": 8252 }, { "epoch": 0.08253, "grad_norm": 0.7062240053088432, "learning_rate": 0.003, "loss": 4.1009, "step": 8253 }, { "epoch": 0.08254, "grad_norm": 0.6263882996334876, "learning_rate": 0.003, "loss": 4.079, "step": 8254 }, { "epoch": 0.08255, "grad_norm": 0.6406212013874797, "learning_rate": 0.003, "loss": 4.0786, "step": 8255 }, { "epoch": 0.08256, "grad_norm": 0.6332253598165484, "learning_rate": 0.003, "loss": 4.1104, "step": 8256 }, { "epoch": 0.08257, "grad_norm": 0.7298881894270829, "learning_rate": 0.003, "loss": 4.0852, "step": 8257 }, { "epoch": 0.08258, "grad_norm": 0.8061858783513308, "learning_rate": 0.003, "loss": 4.0887, "step": 8258 }, { "epoch": 0.08259, "grad_norm": 0.9631605130652663, "learning_rate": 0.003, "loss": 4.1326, "step": 8259 }, { "epoch": 0.0826, "grad_norm": 1.312679616746302, "learning_rate": 0.003, "loss": 4.0868, "step": 8260 }, { "epoch": 0.08261, "grad_norm": 0.7878732930444392, "learning_rate": 0.003, "loss": 4.0916, "step": 8261 }, { "epoch": 0.08262, "grad_norm": 0.7074636050563899, "learning_rate": 0.003, "loss": 4.1289, "step": 8262 }, { "epoch": 0.08263, "grad_norm": 0.6997390004949057, "learning_rate": 0.003, "loss": 4.0925, "step": 8263 }, { "epoch": 0.08264, "grad_norm": 0.772169379776631, "learning_rate": 0.003, "loss": 4.1065, "step": 8264 }, { "epoch": 0.08265, "grad_norm": 0.7993877169518088, "learning_rate": 0.003, "loss": 4.0952, "step": 8265 }, { "epoch": 0.08266, "grad_norm": 1.0950952503503433, "learning_rate": 0.003, "loss": 4.105, "step": 8266 }, { "epoch": 0.08267, "grad_norm": 1.1929897335482988, "learning_rate": 0.003, "loss": 4.1079, "step": 8267 }, { "epoch": 0.08268, "grad_norm": 0.8778501205471867, "learning_rate": 0.003, "loss": 4.0854, "step": 8268 }, { "epoch": 0.08269, "grad_norm": 0.892710429777809, "learning_rate": 0.003, "loss": 4.1214, "step": 8269 }, { "epoch": 0.0827, "grad_norm": 1.0149545588000088, "learning_rate": 0.003, "loss": 4.1018, "step": 8270 }, { "epoch": 0.08271, "grad_norm": 1.124408719720435, "learning_rate": 0.003, "loss": 4.1285, "step": 8271 }, { "epoch": 0.08272, "grad_norm": 0.8266826354793325, "learning_rate": 0.003, "loss": 4.1071, "step": 8272 }, { "epoch": 0.08273, "grad_norm": 1.0123352772430743, "learning_rate": 0.003, "loss": 4.1082, "step": 8273 }, { "epoch": 0.08274, "grad_norm": 1.0192036235033302, "learning_rate": 0.003, "loss": 4.1246, "step": 8274 }, { "epoch": 0.08275, "grad_norm": 0.9102355182979404, "learning_rate": 0.003, "loss": 4.0994, "step": 8275 }, { "epoch": 0.08276, "grad_norm": 0.7090653355389134, "learning_rate": 0.003, "loss": 4.0851, "step": 8276 }, { "epoch": 0.08277, "grad_norm": 0.6881786586872917, "learning_rate": 0.003, "loss": 4.0832, "step": 8277 }, { "epoch": 0.08278, "grad_norm": 0.7699897377296708, "learning_rate": 0.003, "loss": 4.0967, "step": 8278 }, { "epoch": 0.08279, "grad_norm": 0.8358151974396222, "learning_rate": 0.003, "loss": 4.0957, "step": 8279 }, { "epoch": 0.0828, "grad_norm": 0.9018628671728572, "learning_rate": 0.003, "loss": 4.1249, "step": 8280 }, { "epoch": 0.08281, "grad_norm": 1.1343593814872228, "learning_rate": 0.003, "loss": 4.089, "step": 8281 }, { "epoch": 0.08282, "grad_norm": 0.9781923188910686, "learning_rate": 0.003, "loss": 4.0923, "step": 8282 }, { "epoch": 0.08283, "grad_norm": 0.9927329287124252, "learning_rate": 0.003, "loss": 4.1112, "step": 8283 }, { "epoch": 0.08284, "grad_norm": 0.98118308584172, "learning_rate": 0.003, "loss": 4.0933, "step": 8284 }, { "epoch": 0.08285, "grad_norm": 0.9263465527068231, "learning_rate": 0.003, "loss": 4.1095, "step": 8285 }, { "epoch": 0.08286, "grad_norm": 0.8673110587039227, "learning_rate": 0.003, "loss": 4.1219, "step": 8286 }, { "epoch": 0.08287, "grad_norm": 0.8419508037976696, "learning_rate": 0.003, "loss": 4.0988, "step": 8287 }, { "epoch": 0.08288, "grad_norm": 0.8220681636415317, "learning_rate": 0.003, "loss": 4.0823, "step": 8288 }, { "epoch": 0.08289, "grad_norm": 0.7505932152286665, "learning_rate": 0.003, "loss": 4.1028, "step": 8289 }, { "epoch": 0.0829, "grad_norm": 0.8010371678478732, "learning_rate": 0.003, "loss": 4.0791, "step": 8290 }, { "epoch": 0.08291, "grad_norm": 0.7956640617851198, "learning_rate": 0.003, "loss": 4.091, "step": 8291 }, { "epoch": 0.08292, "grad_norm": 0.9655748890689609, "learning_rate": 0.003, "loss": 4.0982, "step": 8292 }, { "epoch": 0.08293, "grad_norm": 1.2599943864429868, "learning_rate": 0.003, "loss": 4.1187, "step": 8293 }, { "epoch": 0.08294, "grad_norm": 0.9706360443217623, "learning_rate": 0.003, "loss": 4.0958, "step": 8294 }, { "epoch": 0.08295, "grad_norm": 1.048320155699331, "learning_rate": 0.003, "loss": 4.0989, "step": 8295 }, { "epoch": 0.08296, "grad_norm": 0.9595325746578597, "learning_rate": 0.003, "loss": 4.1017, "step": 8296 }, { "epoch": 0.08297, "grad_norm": 0.9381047024976223, "learning_rate": 0.003, "loss": 4.112, "step": 8297 }, { "epoch": 0.08298, "grad_norm": 0.863050580789861, "learning_rate": 0.003, "loss": 4.0817, "step": 8298 }, { "epoch": 0.08299, "grad_norm": 0.7240303327199505, "learning_rate": 0.003, "loss": 4.0939, "step": 8299 }, { "epoch": 0.083, "grad_norm": 0.7823461180480966, "learning_rate": 0.003, "loss": 4.1312, "step": 8300 }, { "epoch": 0.08301, "grad_norm": 0.8990430145286981, "learning_rate": 0.003, "loss": 4.1109, "step": 8301 }, { "epoch": 0.08302, "grad_norm": 0.938888405100819, "learning_rate": 0.003, "loss": 4.1236, "step": 8302 }, { "epoch": 0.08303, "grad_norm": 0.8734564423642468, "learning_rate": 0.003, "loss": 4.0935, "step": 8303 }, { "epoch": 0.08304, "grad_norm": 0.8158703165239903, "learning_rate": 0.003, "loss": 4.1239, "step": 8304 }, { "epoch": 0.08305, "grad_norm": 0.75620484767062, "learning_rate": 0.003, "loss": 4.0936, "step": 8305 }, { "epoch": 0.08306, "grad_norm": 0.6540375187690641, "learning_rate": 0.003, "loss": 4.1012, "step": 8306 }, { "epoch": 0.08307, "grad_norm": 0.7220457038710596, "learning_rate": 0.003, "loss": 4.0882, "step": 8307 }, { "epoch": 0.08308, "grad_norm": 0.7585842959907338, "learning_rate": 0.003, "loss": 4.0727, "step": 8308 }, { "epoch": 0.08309, "grad_norm": 0.7562956967912909, "learning_rate": 0.003, "loss": 4.0675, "step": 8309 }, { "epoch": 0.0831, "grad_norm": 0.8620656108784794, "learning_rate": 0.003, "loss": 4.0958, "step": 8310 }, { "epoch": 0.08311, "grad_norm": 1.0392515900505281, "learning_rate": 0.003, "loss": 4.0976, "step": 8311 }, { "epoch": 0.08312, "grad_norm": 1.1358410812232829, "learning_rate": 0.003, "loss": 4.1248, "step": 8312 }, { "epoch": 0.08313, "grad_norm": 0.8856911622270947, "learning_rate": 0.003, "loss": 4.0815, "step": 8313 }, { "epoch": 0.08314, "grad_norm": 0.8971402484163674, "learning_rate": 0.003, "loss": 4.1155, "step": 8314 }, { "epoch": 0.08315, "grad_norm": 0.9089314400577564, "learning_rate": 0.003, "loss": 4.0705, "step": 8315 }, { "epoch": 0.08316, "grad_norm": 0.9935520424178989, "learning_rate": 0.003, "loss": 4.095, "step": 8316 }, { "epoch": 0.08317, "grad_norm": 0.9643025611664917, "learning_rate": 0.003, "loss": 4.0928, "step": 8317 }, { "epoch": 0.08318, "grad_norm": 0.9787470048990922, "learning_rate": 0.003, "loss": 4.0841, "step": 8318 }, { "epoch": 0.08319, "grad_norm": 1.196041471896753, "learning_rate": 0.003, "loss": 4.0865, "step": 8319 }, { "epoch": 0.0832, "grad_norm": 0.7528163771935631, "learning_rate": 0.003, "loss": 4.0777, "step": 8320 }, { "epoch": 0.08321, "grad_norm": 0.6113115162451314, "learning_rate": 0.003, "loss": 4.1063, "step": 8321 }, { "epoch": 0.08322, "grad_norm": 0.6302210737347822, "learning_rate": 0.003, "loss": 4.0847, "step": 8322 }, { "epoch": 0.08323, "grad_norm": 0.6846448753101897, "learning_rate": 0.003, "loss": 4.0717, "step": 8323 }, { "epoch": 0.08324, "grad_norm": 0.81282422542553, "learning_rate": 0.003, "loss": 4.0976, "step": 8324 }, { "epoch": 0.08325, "grad_norm": 0.9233311436381315, "learning_rate": 0.003, "loss": 4.1047, "step": 8325 }, { "epoch": 0.08326, "grad_norm": 0.954642418929674, "learning_rate": 0.003, "loss": 4.0934, "step": 8326 }, { "epoch": 0.08327, "grad_norm": 0.8277691448246721, "learning_rate": 0.003, "loss": 4.0862, "step": 8327 }, { "epoch": 0.08328, "grad_norm": 0.7651088353198385, "learning_rate": 0.003, "loss": 4.0909, "step": 8328 }, { "epoch": 0.08329, "grad_norm": 0.8129905787749772, "learning_rate": 0.003, "loss": 4.0847, "step": 8329 }, { "epoch": 0.0833, "grad_norm": 0.7705105903193307, "learning_rate": 0.003, "loss": 4.0982, "step": 8330 }, { "epoch": 0.08331, "grad_norm": 0.8783516141923371, "learning_rate": 0.003, "loss": 4.0827, "step": 8331 }, { "epoch": 0.08332, "grad_norm": 0.9500635724943229, "learning_rate": 0.003, "loss": 4.1073, "step": 8332 }, { "epoch": 0.08333, "grad_norm": 1.0022282494956223, "learning_rate": 0.003, "loss": 4.1082, "step": 8333 }, { "epoch": 0.08334, "grad_norm": 0.9868081102540992, "learning_rate": 0.003, "loss": 4.1067, "step": 8334 }, { "epoch": 0.08335, "grad_norm": 0.9702541505884114, "learning_rate": 0.003, "loss": 4.0996, "step": 8335 }, { "epoch": 0.08336, "grad_norm": 0.9616697241003944, "learning_rate": 0.003, "loss": 4.1271, "step": 8336 }, { "epoch": 0.08337, "grad_norm": 0.9197392338204791, "learning_rate": 0.003, "loss": 4.1158, "step": 8337 }, { "epoch": 0.08338, "grad_norm": 0.9953727673941519, "learning_rate": 0.003, "loss": 4.1025, "step": 8338 }, { "epoch": 0.08339, "grad_norm": 1.0143098515036326, "learning_rate": 0.003, "loss": 4.1025, "step": 8339 }, { "epoch": 0.0834, "grad_norm": 0.8294567785378771, "learning_rate": 0.003, "loss": 4.0963, "step": 8340 }, { "epoch": 0.08341, "grad_norm": 0.6099348529213143, "learning_rate": 0.003, "loss": 4.1201, "step": 8341 }, { "epoch": 0.08342, "grad_norm": 0.7224131427612864, "learning_rate": 0.003, "loss": 4.0682, "step": 8342 }, { "epoch": 0.08343, "grad_norm": 0.8439433910256732, "learning_rate": 0.003, "loss": 4.0751, "step": 8343 }, { "epoch": 0.08344, "grad_norm": 0.9868340222493676, "learning_rate": 0.003, "loss": 4.0964, "step": 8344 }, { "epoch": 0.08345, "grad_norm": 0.9401831390198352, "learning_rate": 0.003, "loss": 4.0864, "step": 8345 }, { "epoch": 0.08346, "grad_norm": 1.0302321487078914, "learning_rate": 0.003, "loss": 4.1087, "step": 8346 }, { "epoch": 0.08347, "grad_norm": 0.8650226664698788, "learning_rate": 0.003, "loss": 4.1029, "step": 8347 }, { "epoch": 0.08348, "grad_norm": 0.7651090535509265, "learning_rate": 0.003, "loss": 4.0613, "step": 8348 }, { "epoch": 0.08349, "grad_norm": 0.804167869373704, "learning_rate": 0.003, "loss": 4.1046, "step": 8349 }, { "epoch": 0.0835, "grad_norm": 0.907923736734775, "learning_rate": 0.003, "loss": 4.1326, "step": 8350 }, { "epoch": 0.08351, "grad_norm": 0.9166006948131097, "learning_rate": 0.003, "loss": 4.121, "step": 8351 }, { "epoch": 0.08352, "grad_norm": 0.9742286942493177, "learning_rate": 0.003, "loss": 4.1174, "step": 8352 }, { "epoch": 0.08353, "grad_norm": 1.0443086898250653, "learning_rate": 0.003, "loss": 4.1032, "step": 8353 }, { "epoch": 0.08354, "grad_norm": 1.0482003041090164, "learning_rate": 0.003, "loss": 4.1117, "step": 8354 }, { "epoch": 0.08355, "grad_norm": 0.9948528176450955, "learning_rate": 0.003, "loss": 4.0841, "step": 8355 }, { "epoch": 0.08356, "grad_norm": 0.9930403012130316, "learning_rate": 0.003, "loss": 4.1109, "step": 8356 }, { "epoch": 0.08357, "grad_norm": 1.058290503425188, "learning_rate": 0.003, "loss": 4.12, "step": 8357 }, { "epoch": 0.08358, "grad_norm": 0.7888495179404684, "learning_rate": 0.003, "loss": 4.1032, "step": 8358 }, { "epoch": 0.08359, "grad_norm": 0.7764006655298166, "learning_rate": 0.003, "loss": 4.0751, "step": 8359 }, { "epoch": 0.0836, "grad_norm": 0.9269785078441903, "learning_rate": 0.003, "loss": 4.0881, "step": 8360 }, { "epoch": 0.08361, "grad_norm": 1.0448000920569085, "learning_rate": 0.003, "loss": 4.1102, "step": 8361 }, { "epoch": 0.08362, "grad_norm": 1.0677595443672623, "learning_rate": 0.003, "loss": 4.1246, "step": 8362 }, { "epoch": 0.08363, "grad_norm": 0.9762108759601814, "learning_rate": 0.003, "loss": 4.1332, "step": 8363 }, { "epoch": 0.08364, "grad_norm": 0.9250190436823729, "learning_rate": 0.003, "loss": 4.0939, "step": 8364 }, { "epoch": 0.08365, "grad_norm": 0.6667015272636464, "learning_rate": 0.003, "loss": 4.1102, "step": 8365 }, { "epoch": 0.08366, "grad_norm": 0.6845211638503894, "learning_rate": 0.003, "loss": 4.1198, "step": 8366 }, { "epoch": 0.08367, "grad_norm": 0.5939518428147165, "learning_rate": 0.003, "loss": 4.092, "step": 8367 }, { "epoch": 0.08368, "grad_norm": 0.5996368389783523, "learning_rate": 0.003, "loss": 4.1098, "step": 8368 }, { "epoch": 0.08369, "grad_norm": 0.6523282835388826, "learning_rate": 0.003, "loss": 4.0961, "step": 8369 }, { "epoch": 0.0837, "grad_norm": 0.8159208873071091, "learning_rate": 0.003, "loss": 4.0989, "step": 8370 }, { "epoch": 0.08371, "grad_norm": 0.9860834398447139, "learning_rate": 0.003, "loss": 4.11, "step": 8371 }, { "epoch": 0.08372, "grad_norm": 1.0940596921770058, "learning_rate": 0.003, "loss": 4.1339, "step": 8372 }, { "epoch": 0.08373, "grad_norm": 0.9442033751036637, "learning_rate": 0.003, "loss": 4.1091, "step": 8373 }, { "epoch": 0.08374, "grad_norm": 0.8663695859388787, "learning_rate": 0.003, "loss": 4.0856, "step": 8374 }, { "epoch": 0.08375, "grad_norm": 0.7415517737245889, "learning_rate": 0.003, "loss": 4.087, "step": 8375 }, { "epoch": 0.08376, "grad_norm": 0.8577754980749478, "learning_rate": 0.003, "loss": 4.0951, "step": 8376 }, { "epoch": 0.08377, "grad_norm": 1.0199862392503956, "learning_rate": 0.003, "loss": 4.0757, "step": 8377 }, { "epoch": 0.08378, "grad_norm": 1.2449008964290007, "learning_rate": 0.003, "loss": 4.1124, "step": 8378 }, { "epoch": 0.08379, "grad_norm": 0.9419961904078284, "learning_rate": 0.003, "loss": 4.0992, "step": 8379 }, { "epoch": 0.0838, "grad_norm": 0.9566083866982931, "learning_rate": 0.003, "loss": 4.1002, "step": 8380 }, { "epoch": 0.08381, "grad_norm": 0.921682176354393, "learning_rate": 0.003, "loss": 4.1022, "step": 8381 }, { "epoch": 0.08382, "grad_norm": 0.8446653628794919, "learning_rate": 0.003, "loss": 4.1172, "step": 8382 }, { "epoch": 0.08383, "grad_norm": 0.817980279883183, "learning_rate": 0.003, "loss": 4.0787, "step": 8383 }, { "epoch": 0.08384, "grad_norm": 0.7858769081287543, "learning_rate": 0.003, "loss": 4.0828, "step": 8384 }, { "epoch": 0.08385, "grad_norm": 0.7428564824532166, "learning_rate": 0.003, "loss": 4.0764, "step": 8385 }, { "epoch": 0.08386, "grad_norm": 0.7507352447976503, "learning_rate": 0.003, "loss": 4.0891, "step": 8386 }, { "epoch": 0.08387, "grad_norm": 0.8467430532110711, "learning_rate": 0.003, "loss": 4.0639, "step": 8387 }, { "epoch": 0.08388, "grad_norm": 0.9680594439287274, "learning_rate": 0.003, "loss": 4.0662, "step": 8388 }, { "epoch": 0.08389, "grad_norm": 1.2252522777923969, "learning_rate": 0.003, "loss": 4.1204, "step": 8389 }, { "epoch": 0.0839, "grad_norm": 0.7533692382713882, "learning_rate": 0.003, "loss": 4.0747, "step": 8390 }, { "epoch": 0.08391, "grad_norm": 0.578854889997608, "learning_rate": 0.003, "loss": 4.108, "step": 8391 }, { "epoch": 0.08392, "grad_norm": 0.7310406618044913, "learning_rate": 0.003, "loss": 4.078, "step": 8392 }, { "epoch": 0.08393, "grad_norm": 0.8080163613542798, "learning_rate": 0.003, "loss": 4.1083, "step": 8393 }, { "epoch": 0.08394, "grad_norm": 0.9313902145452904, "learning_rate": 0.003, "loss": 4.0701, "step": 8394 }, { "epoch": 0.08395, "grad_norm": 0.8774904667389832, "learning_rate": 0.003, "loss": 4.0802, "step": 8395 }, { "epoch": 0.08396, "grad_norm": 0.7336808643576519, "learning_rate": 0.003, "loss": 4.1099, "step": 8396 }, { "epoch": 0.08397, "grad_norm": 0.6526993931050207, "learning_rate": 0.003, "loss": 4.0795, "step": 8397 }, { "epoch": 0.08398, "grad_norm": 0.6704748477002462, "learning_rate": 0.003, "loss": 4.0987, "step": 8398 }, { "epoch": 0.08399, "grad_norm": 0.7009862189779927, "learning_rate": 0.003, "loss": 4.0861, "step": 8399 }, { "epoch": 0.084, "grad_norm": 0.7959197464027524, "learning_rate": 0.003, "loss": 4.0663, "step": 8400 }, { "epoch": 0.08401, "grad_norm": 0.7942806861956023, "learning_rate": 0.003, "loss": 4.0766, "step": 8401 }, { "epoch": 0.08402, "grad_norm": 0.7901087516905125, "learning_rate": 0.003, "loss": 4.0736, "step": 8402 }, { "epoch": 0.08403, "grad_norm": 0.7878285742447195, "learning_rate": 0.003, "loss": 4.0848, "step": 8403 }, { "epoch": 0.08404, "grad_norm": 0.760912884794821, "learning_rate": 0.003, "loss": 4.0789, "step": 8404 }, { "epoch": 0.08405, "grad_norm": 0.9006130071518619, "learning_rate": 0.003, "loss": 4.077, "step": 8405 }, { "epoch": 0.08406, "grad_norm": 1.0016144008572414, "learning_rate": 0.003, "loss": 4.0696, "step": 8406 }, { "epoch": 0.08407, "grad_norm": 1.3443814703857433, "learning_rate": 0.003, "loss": 4.1531, "step": 8407 }, { "epoch": 0.08408, "grad_norm": 0.9056353298369414, "learning_rate": 0.003, "loss": 4.0704, "step": 8408 }, { "epoch": 0.08409, "grad_norm": 0.8063158095511546, "learning_rate": 0.003, "loss": 4.111, "step": 8409 }, { "epoch": 0.0841, "grad_norm": 0.9219305008449818, "learning_rate": 0.003, "loss": 4.1093, "step": 8410 }, { "epoch": 0.08411, "grad_norm": 1.1026094058352998, "learning_rate": 0.003, "loss": 4.1366, "step": 8411 }, { "epoch": 0.08412, "grad_norm": 0.9534553488792078, "learning_rate": 0.003, "loss": 4.114, "step": 8412 }, { "epoch": 0.08413, "grad_norm": 0.942913373390524, "learning_rate": 0.003, "loss": 4.074, "step": 8413 }, { "epoch": 0.08414, "grad_norm": 1.0244828961575672, "learning_rate": 0.003, "loss": 4.1186, "step": 8414 }, { "epoch": 0.08415, "grad_norm": 0.9400339571658545, "learning_rate": 0.003, "loss": 4.0761, "step": 8415 }, { "epoch": 0.08416, "grad_norm": 0.8710315727738005, "learning_rate": 0.003, "loss": 4.1112, "step": 8416 }, { "epoch": 0.08417, "grad_norm": 0.7950437016706896, "learning_rate": 0.003, "loss": 4.1064, "step": 8417 }, { "epoch": 0.08418, "grad_norm": 0.8098581252447725, "learning_rate": 0.003, "loss": 4.0923, "step": 8418 }, { "epoch": 0.08419, "grad_norm": 0.9057985377788469, "learning_rate": 0.003, "loss": 4.1036, "step": 8419 }, { "epoch": 0.0842, "grad_norm": 0.9778312033623336, "learning_rate": 0.003, "loss": 4.0894, "step": 8420 }, { "epoch": 0.08421, "grad_norm": 0.8982644728463484, "learning_rate": 0.003, "loss": 4.0799, "step": 8421 }, { "epoch": 0.08422, "grad_norm": 0.9376171311714971, "learning_rate": 0.003, "loss": 4.1191, "step": 8422 }, { "epoch": 0.08423, "grad_norm": 0.9399936667089988, "learning_rate": 0.003, "loss": 4.1017, "step": 8423 }, { "epoch": 0.08424, "grad_norm": 0.8765907774353715, "learning_rate": 0.003, "loss": 4.0948, "step": 8424 }, { "epoch": 0.08425, "grad_norm": 0.8093336331696083, "learning_rate": 0.003, "loss": 4.1123, "step": 8425 }, { "epoch": 0.08426, "grad_norm": 0.7177444716043226, "learning_rate": 0.003, "loss": 4.092, "step": 8426 }, { "epoch": 0.08427, "grad_norm": 0.8420947027796308, "learning_rate": 0.003, "loss": 4.1152, "step": 8427 }, { "epoch": 0.08428, "grad_norm": 1.0570206327016962, "learning_rate": 0.003, "loss": 4.103, "step": 8428 }, { "epoch": 0.08429, "grad_norm": 1.0693132771916776, "learning_rate": 0.003, "loss": 4.0961, "step": 8429 }, { "epoch": 0.0843, "grad_norm": 1.037364188618762, "learning_rate": 0.003, "loss": 4.0748, "step": 8430 }, { "epoch": 0.08431, "grad_norm": 0.9849908084466207, "learning_rate": 0.003, "loss": 4.0895, "step": 8431 }, { "epoch": 0.08432, "grad_norm": 0.9291586130000788, "learning_rate": 0.003, "loss": 4.0753, "step": 8432 }, { "epoch": 0.08433, "grad_norm": 0.865132194731464, "learning_rate": 0.003, "loss": 4.1126, "step": 8433 }, { "epoch": 0.08434, "grad_norm": 0.8122049399169294, "learning_rate": 0.003, "loss": 4.0646, "step": 8434 }, { "epoch": 0.08435, "grad_norm": 0.747566312505787, "learning_rate": 0.003, "loss": 4.0972, "step": 8435 }, { "epoch": 0.08436, "grad_norm": 0.7617380609411367, "learning_rate": 0.003, "loss": 4.0959, "step": 8436 }, { "epoch": 0.08437, "grad_norm": 0.7640674198661149, "learning_rate": 0.003, "loss": 4.108, "step": 8437 }, { "epoch": 0.08438, "grad_norm": 0.7897561671508848, "learning_rate": 0.003, "loss": 4.0912, "step": 8438 }, { "epoch": 0.08439, "grad_norm": 0.9805961893177073, "learning_rate": 0.003, "loss": 4.1115, "step": 8439 }, { "epoch": 0.0844, "grad_norm": 1.2397471986974102, "learning_rate": 0.003, "loss": 4.1255, "step": 8440 }, { "epoch": 0.08441, "grad_norm": 0.7657531909009802, "learning_rate": 0.003, "loss": 4.0912, "step": 8441 }, { "epoch": 0.08442, "grad_norm": 0.6761237170315371, "learning_rate": 0.003, "loss": 4.0768, "step": 8442 }, { "epoch": 0.08443, "grad_norm": 0.6698413991893777, "learning_rate": 0.003, "loss": 4.0963, "step": 8443 }, { "epoch": 0.08444, "grad_norm": 0.6538638151947389, "learning_rate": 0.003, "loss": 4.1241, "step": 8444 }, { "epoch": 0.08445, "grad_norm": 0.7819289887319408, "learning_rate": 0.003, "loss": 4.0758, "step": 8445 }, { "epoch": 0.08446, "grad_norm": 0.7360808672824343, "learning_rate": 0.003, "loss": 4.095, "step": 8446 }, { "epoch": 0.08447, "grad_norm": 0.8192930714654896, "learning_rate": 0.003, "loss": 4.1053, "step": 8447 }, { "epoch": 0.08448, "grad_norm": 0.9656464757675268, "learning_rate": 0.003, "loss": 4.109, "step": 8448 }, { "epoch": 0.08449, "grad_norm": 1.1936151486401498, "learning_rate": 0.003, "loss": 4.1264, "step": 8449 }, { "epoch": 0.0845, "grad_norm": 0.685549098243575, "learning_rate": 0.003, "loss": 4.0833, "step": 8450 }, { "epoch": 0.08451, "grad_norm": 0.9139246196778028, "learning_rate": 0.003, "loss": 4.1179, "step": 8451 }, { "epoch": 0.08452, "grad_norm": 1.2202645871735718, "learning_rate": 0.003, "loss": 4.1024, "step": 8452 }, { "epoch": 0.08453, "grad_norm": 1.011747247457234, "learning_rate": 0.003, "loss": 4.1189, "step": 8453 }, { "epoch": 0.08454, "grad_norm": 1.0262226366034506, "learning_rate": 0.003, "loss": 4.1133, "step": 8454 }, { "epoch": 0.08455, "grad_norm": 1.0099082871033074, "learning_rate": 0.003, "loss": 4.0874, "step": 8455 }, { "epoch": 0.08456, "grad_norm": 0.90849087232309, "learning_rate": 0.003, "loss": 4.0955, "step": 8456 }, { "epoch": 0.08457, "grad_norm": 0.8383502937124733, "learning_rate": 0.003, "loss": 4.1304, "step": 8457 }, { "epoch": 0.08458, "grad_norm": 0.8440455792853685, "learning_rate": 0.003, "loss": 4.1004, "step": 8458 }, { "epoch": 0.08459, "grad_norm": 0.7112059922093875, "learning_rate": 0.003, "loss": 4.0786, "step": 8459 }, { "epoch": 0.0846, "grad_norm": 0.649515091750979, "learning_rate": 0.003, "loss": 4.1027, "step": 8460 }, { "epoch": 0.08461, "grad_norm": 0.6333241585768099, "learning_rate": 0.003, "loss": 4.107, "step": 8461 }, { "epoch": 0.08462, "grad_norm": 0.6384025767344915, "learning_rate": 0.003, "loss": 4.105, "step": 8462 }, { "epoch": 0.08463, "grad_norm": 0.6400437078939833, "learning_rate": 0.003, "loss": 4.0742, "step": 8463 }, { "epoch": 0.08464, "grad_norm": 0.6759399211580148, "learning_rate": 0.003, "loss": 4.1134, "step": 8464 }, { "epoch": 0.08465, "grad_norm": 0.7413271665798129, "learning_rate": 0.003, "loss": 4.0935, "step": 8465 }, { "epoch": 0.08466, "grad_norm": 0.8771869007648114, "learning_rate": 0.003, "loss": 4.1166, "step": 8466 }, { "epoch": 0.08467, "grad_norm": 0.9227181671457924, "learning_rate": 0.003, "loss": 4.1193, "step": 8467 }, { "epoch": 0.08468, "grad_norm": 0.9695793195002803, "learning_rate": 0.003, "loss": 4.1123, "step": 8468 }, { "epoch": 0.08469, "grad_norm": 1.2889696763476837, "learning_rate": 0.003, "loss": 4.1225, "step": 8469 }, { "epoch": 0.0847, "grad_norm": 0.9448980455530938, "learning_rate": 0.003, "loss": 4.0985, "step": 8470 }, { "epoch": 0.08471, "grad_norm": 1.1057721964654461, "learning_rate": 0.003, "loss": 4.087, "step": 8471 }, { "epoch": 0.08472, "grad_norm": 1.1484149282990468, "learning_rate": 0.003, "loss": 4.0923, "step": 8472 }, { "epoch": 0.08473, "grad_norm": 1.0503023171846668, "learning_rate": 0.003, "loss": 4.0876, "step": 8473 }, { "epoch": 0.08474, "grad_norm": 0.8628642552887439, "learning_rate": 0.003, "loss": 4.0692, "step": 8474 }, { "epoch": 0.08475, "grad_norm": 0.7509423112660792, "learning_rate": 0.003, "loss": 4.0941, "step": 8475 }, { "epoch": 0.08476, "grad_norm": 0.70961640106593, "learning_rate": 0.003, "loss": 4.1127, "step": 8476 }, { "epoch": 0.08477, "grad_norm": 0.7231357262197663, "learning_rate": 0.003, "loss": 4.0945, "step": 8477 }, { "epoch": 0.08478, "grad_norm": 0.804332121451016, "learning_rate": 0.003, "loss": 4.0883, "step": 8478 }, { "epoch": 0.08479, "grad_norm": 0.9887354629105892, "learning_rate": 0.003, "loss": 4.1141, "step": 8479 }, { "epoch": 0.0848, "grad_norm": 0.9881963393542088, "learning_rate": 0.003, "loss": 4.1153, "step": 8480 }, { "epoch": 0.08481, "grad_norm": 0.9379461518994493, "learning_rate": 0.003, "loss": 4.0825, "step": 8481 }, { "epoch": 0.08482, "grad_norm": 0.7805227298805099, "learning_rate": 0.003, "loss": 4.1036, "step": 8482 }, { "epoch": 0.08483, "grad_norm": 0.7856593793741489, "learning_rate": 0.003, "loss": 4.0977, "step": 8483 }, { "epoch": 0.08484, "grad_norm": 0.8932525879434696, "learning_rate": 0.003, "loss": 4.0785, "step": 8484 }, { "epoch": 0.08485, "grad_norm": 0.8851276814593144, "learning_rate": 0.003, "loss": 4.1077, "step": 8485 }, { "epoch": 0.08486, "grad_norm": 0.8944513894921937, "learning_rate": 0.003, "loss": 4.1289, "step": 8486 }, { "epoch": 0.08487, "grad_norm": 1.0310888734066477, "learning_rate": 0.003, "loss": 4.1311, "step": 8487 }, { "epoch": 0.08488, "grad_norm": 1.0686836420532142, "learning_rate": 0.003, "loss": 4.074, "step": 8488 }, { "epoch": 0.08489, "grad_norm": 1.0317207317630044, "learning_rate": 0.003, "loss": 4.1129, "step": 8489 }, { "epoch": 0.0849, "grad_norm": 1.1495485663194194, "learning_rate": 0.003, "loss": 4.1236, "step": 8490 }, { "epoch": 0.08491, "grad_norm": 1.0263416248655877, "learning_rate": 0.003, "loss": 4.1202, "step": 8491 }, { "epoch": 0.08492, "grad_norm": 0.9700428254885138, "learning_rate": 0.003, "loss": 4.1051, "step": 8492 }, { "epoch": 0.08493, "grad_norm": 0.9948556669000941, "learning_rate": 0.003, "loss": 4.1167, "step": 8493 }, { "epoch": 0.08494, "grad_norm": 0.910141922117321, "learning_rate": 0.003, "loss": 4.0986, "step": 8494 }, { "epoch": 0.08495, "grad_norm": 0.9597026790665053, "learning_rate": 0.003, "loss": 4.1105, "step": 8495 }, { "epoch": 0.08496, "grad_norm": 0.9275785612350264, "learning_rate": 0.003, "loss": 4.0845, "step": 8496 }, { "epoch": 0.08497, "grad_norm": 1.01867706095219, "learning_rate": 0.003, "loss": 4.1037, "step": 8497 }, { "epoch": 0.08498, "grad_norm": 1.1590197748031095, "learning_rate": 0.003, "loss": 4.118, "step": 8498 }, { "epoch": 0.08499, "grad_norm": 0.9880096011902544, "learning_rate": 0.003, "loss": 4.1334, "step": 8499 }, { "epoch": 0.085, "grad_norm": 0.8266327675260843, "learning_rate": 0.003, "loss": 4.0668, "step": 8500 }, { "epoch": 0.08501, "grad_norm": 0.7411428596681484, "learning_rate": 0.003, "loss": 4.0909, "step": 8501 }, { "epoch": 0.08502, "grad_norm": 0.6949138984131208, "learning_rate": 0.003, "loss": 4.0995, "step": 8502 }, { "epoch": 0.08503, "grad_norm": 0.7536288112312667, "learning_rate": 0.003, "loss": 4.1006, "step": 8503 }, { "epoch": 0.08504, "grad_norm": 0.7223239288762903, "learning_rate": 0.003, "loss": 4.1265, "step": 8504 }, { "epoch": 0.08505, "grad_norm": 0.675765276936943, "learning_rate": 0.003, "loss": 4.0868, "step": 8505 }, { "epoch": 0.08506, "grad_norm": 0.7576978694249362, "learning_rate": 0.003, "loss": 4.1066, "step": 8506 }, { "epoch": 0.08507, "grad_norm": 0.8515649801971872, "learning_rate": 0.003, "loss": 4.0762, "step": 8507 }, { "epoch": 0.08508, "grad_norm": 1.057062705401568, "learning_rate": 0.003, "loss": 4.088, "step": 8508 }, { "epoch": 0.08509, "grad_norm": 1.121630687562224, "learning_rate": 0.003, "loss": 4.1326, "step": 8509 }, { "epoch": 0.0851, "grad_norm": 0.9104321815273234, "learning_rate": 0.003, "loss": 4.1003, "step": 8510 }, { "epoch": 0.08511, "grad_norm": 0.8342488080751641, "learning_rate": 0.003, "loss": 4.1064, "step": 8511 }, { "epoch": 0.08512, "grad_norm": 0.7988070005118341, "learning_rate": 0.003, "loss": 4.1328, "step": 8512 }, { "epoch": 0.08513, "grad_norm": 0.799837965249621, "learning_rate": 0.003, "loss": 4.0938, "step": 8513 }, { "epoch": 0.08514, "grad_norm": 0.835432784668365, "learning_rate": 0.003, "loss": 4.1025, "step": 8514 }, { "epoch": 0.08515, "grad_norm": 0.8955465769825964, "learning_rate": 0.003, "loss": 4.0935, "step": 8515 }, { "epoch": 0.08516, "grad_norm": 0.8928688903157641, "learning_rate": 0.003, "loss": 4.1252, "step": 8516 }, { "epoch": 0.08517, "grad_norm": 0.8226476531644744, "learning_rate": 0.003, "loss": 4.0897, "step": 8517 }, { "epoch": 0.08518, "grad_norm": 0.9561707034998627, "learning_rate": 0.003, "loss": 4.0875, "step": 8518 }, { "epoch": 0.08519, "grad_norm": 1.1902153338579717, "learning_rate": 0.003, "loss": 4.0934, "step": 8519 }, { "epoch": 0.0852, "grad_norm": 0.8018095905157278, "learning_rate": 0.003, "loss": 4.0673, "step": 8520 }, { "epoch": 0.08521, "grad_norm": 0.5533864795410459, "learning_rate": 0.003, "loss": 4.1046, "step": 8521 }, { "epoch": 0.08522, "grad_norm": 0.7791152111447774, "learning_rate": 0.003, "loss": 4.0636, "step": 8522 }, { "epoch": 0.08523, "grad_norm": 0.900500602043729, "learning_rate": 0.003, "loss": 4.0839, "step": 8523 }, { "epoch": 0.08524, "grad_norm": 0.9074705833253267, "learning_rate": 0.003, "loss": 4.0906, "step": 8524 }, { "epoch": 0.08525, "grad_norm": 0.9112667601876601, "learning_rate": 0.003, "loss": 4.0881, "step": 8525 }, { "epoch": 0.08526, "grad_norm": 1.0212625794257977, "learning_rate": 0.003, "loss": 4.0955, "step": 8526 }, { "epoch": 0.08527, "grad_norm": 1.0584452877128767, "learning_rate": 0.003, "loss": 4.1014, "step": 8527 }, { "epoch": 0.08528, "grad_norm": 0.8561020391089984, "learning_rate": 0.003, "loss": 4.0806, "step": 8528 }, { "epoch": 0.08529, "grad_norm": 0.8298501875520379, "learning_rate": 0.003, "loss": 4.106, "step": 8529 }, { "epoch": 0.0853, "grad_norm": 0.9089572166905378, "learning_rate": 0.003, "loss": 4.1068, "step": 8530 }, { "epoch": 0.08531, "grad_norm": 0.8653272986614832, "learning_rate": 0.003, "loss": 4.0777, "step": 8531 }, { "epoch": 0.08532, "grad_norm": 0.788926984023568, "learning_rate": 0.003, "loss": 4.0841, "step": 8532 }, { "epoch": 0.08533, "grad_norm": 0.8903035626600532, "learning_rate": 0.003, "loss": 4.117, "step": 8533 }, { "epoch": 0.08534, "grad_norm": 0.963034125025281, "learning_rate": 0.003, "loss": 4.115, "step": 8534 }, { "epoch": 0.08535, "grad_norm": 1.2879128764669079, "learning_rate": 0.003, "loss": 4.1075, "step": 8535 }, { "epoch": 0.08536, "grad_norm": 0.8902797022173426, "learning_rate": 0.003, "loss": 4.1148, "step": 8536 }, { "epoch": 0.08537, "grad_norm": 0.8657020447888398, "learning_rate": 0.003, "loss": 4.0805, "step": 8537 }, { "epoch": 0.08538, "grad_norm": 0.849426185382095, "learning_rate": 0.003, "loss": 4.0868, "step": 8538 }, { "epoch": 0.08539, "grad_norm": 0.8702155984928356, "learning_rate": 0.003, "loss": 4.1264, "step": 8539 }, { "epoch": 0.0854, "grad_norm": 0.7764427649934481, "learning_rate": 0.003, "loss": 4.0733, "step": 8540 }, { "epoch": 0.08541, "grad_norm": 0.7058501386903097, "learning_rate": 0.003, "loss": 4.081, "step": 8541 }, { "epoch": 0.08542, "grad_norm": 0.8663633824046528, "learning_rate": 0.003, "loss": 4.1205, "step": 8542 }, { "epoch": 0.08543, "grad_norm": 0.906265069337281, "learning_rate": 0.003, "loss": 4.1076, "step": 8543 }, { "epoch": 0.08544, "grad_norm": 0.9275930195253327, "learning_rate": 0.003, "loss": 4.089, "step": 8544 }, { "epoch": 0.08545, "grad_norm": 0.8078784033785711, "learning_rate": 0.003, "loss": 4.0949, "step": 8545 }, { "epoch": 0.08546, "grad_norm": 0.9066354249548785, "learning_rate": 0.003, "loss": 4.1001, "step": 8546 }, { "epoch": 0.08547, "grad_norm": 1.3364123020627683, "learning_rate": 0.003, "loss": 4.1259, "step": 8547 }, { "epoch": 0.08548, "grad_norm": 0.9435393460362433, "learning_rate": 0.003, "loss": 4.1209, "step": 8548 }, { "epoch": 0.08549, "grad_norm": 0.9332959081861278, "learning_rate": 0.003, "loss": 4.1112, "step": 8549 }, { "epoch": 0.0855, "grad_norm": 0.8374993962866415, "learning_rate": 0.003, "loss": 4.1259, "step": 8550 }, { "epoch": 0.08551, "grad_norm": 0.72433894811856, "learning_rate": 0.003, "loss": 4.1202, "step": 8551 }, { "epoch": 0.08552, "grad_norm": 0.7603328491561405, "learning_rate": 0.003, "loss": 4.1029, "step": 8552 }, { "epoch": 0.08553, "grad_norm": 0.8165853461739714, "learning_rate": 0.003, "loss": 4.1123, "step": 8553 }, { "epoch": 0.08554, "grad_norm": 0.8329371861835474, "learning_rate": 0.003, "loss": 4.0715, "step": 8554 }, { "epoch": 0.08555, "grad_norm": 0.8836247642143207, "learning_rate": 0.003, "loss": 4.1223, "step": 8555 }, { "epoch": 0.08556, "grad_norm": 1.0655224233717422, "learning_rate": 0.003, "loss": 4.1118, "step": 8556 }, { "epoch": 0.08557, "grad_norm": 1.1033400346275346, "learning_rate": 0.003, "loss": 4.0809, "step": 8557 }, { "epoch": 0.08558, "grad_norm": 0.9185812974874371, "learning_rate": 0.003, "loss": 4.1023, "step": 8558 }, { "epoch": 0.08559, "grad_norm": 0.8449525867089718, "learning_rate": 0.003, "loss": 4.108, "step": 8559 }, { "epoch": 0.0856, "grad_norm": 0.8791279440183213, "learning_rate": 0.003, "loss": 4.0994, "step": 8560 }, { "epoch": 0.08561, "grad_norm": 1.007009657834021, "learning_rate": 0.003, "loss": 4.0971, "step": 8561 }, { "epoch": 0.08562, "grad_norm": 1.2506411131458524, "learning_rate": 0.003, "loss": 4.0774, "step": 8562 }, { "epoch": 0.08563, "grad_norm": 0.8595663427695304, "learning_rate": 0.003, "loss": 4.1006, "step": 8563 }, { "epoch": 0.08564, "grad_norm": 0.8258432628148261, "learning_rate": 0.003, "loss": 4.0966, "step": 8564 }, { "epoch": 0.08565, "grad_norm": 0.8232028179529909, "learning_rate": 0.003, "loss": 4.0845, "step": 8565 }, { "epoch": 0.08566, "grad_norm": 0.7588530988097654, "learning_rate": 0.003, "loss": 4.0996, "step": 8566 }, { "epoch": 0.08567, "grad_norm": 0.6824147570028768, "learning_rate": 0.003, "loss": 4.1034, "step": 8567 }, { "epoch": 0.08568, "grad_norm": 0.5996215117346594, "learning_rate": 0.003, "loss": 4.1149, "step": 8568 }, { "epoch": 0.08569, "grad_norm": 0.5807584231599277, "learning_rate": 0.003, "loss": 4.118, "step": 8569 }, { "epoch": 0.0857, "grad_norm": 0.5266785999312754, "learning_rate": 0.003, "loss": 4.1146, "step": 8570 }, { "epoch": 0.08571, "grad_norm": 0.4724117155463532, "learning_rate": 0.003, "loss": 4.0977, "step": 8571 }, { "epoch": 0.08572, "grad_norm": 0.49806214531928805, "learning_rate": 0.003, "loss": 4.0727, "step": 8572 }, { "epoch": 0.08573, "grad_norm": 0.5210491883768703, "learning_rate": 0.003, "loss": 4.0796, "step": 8573 }, { "epoch": 0.08574, "grad_norm": 0.580899421582982, "learning_rate": 0.003, "loss": 4.0694, "step": 8574 }, { "epoch": 0.08575, "grad_norm": 0.759338722165245, "learning_rate": 0.003, "loss": 4.0987, "step": 8575 }, { "epoch": 0.08576, "grad_norm": 1.0882621390033222, "learning_rate": 0.003, "loss": 4.0931, "step": 8576 }, { "epoch": 0.08577, "grad_norm": 1.286177814911776, "learning_rate": 0.003, "loss": 4.0995, "step": 8577 }, { "epoch": 0.08578, "grad_norm": 0.649434769796102, "learning_rate": 0.003, "loss": 4.0595, "step": 8578 }, { "epoch": 0.08579, "grad_norm": 0.7955247063051825, "learning_rate": 0.003, "loss": 4.0919, "step": 8579 }, { "epoch": 0.0858, "grad_norm": 0.9332228897387346, "learning_rate": 0.003, "loss": 4.0859, "step": 8580 }, { "epoch": 0.08581, "grad_norm": 0.9286534200000857, "learning_rate": 0.003, "loss": 4.0813, "step": 8581 }, { "epoch": 0.08582, "grad_norm": 0.8515796422164925, "learning_rate": 0.003, "loss": 4.1029, "step": 8582 }, { "epoch": 0.08583, "grad_norm": 0.9496730587144366, "learning_rate": 0.003, "loss": 4.1166, "step": 8583 }, { "epoch": 0.08584, "grad_norm": 1.0376797351944236, "learning_rate": 0.003, "loss": 4.1119, "step": 8584 }, { "epoch": 0.08585, "grad_norm": 1.0733355797768753, "learning_rate": 0.003, "loss": 4.1171, "step": 8585 }, { "epoch": 0.08586, "grad_norm": 1.0898909465211588, "learning_rate": 0.003, "loss": 4.1357, "step": 8586 }, { "epoch": 0.08587, "grad_norm": 0.8541629848679168, "learning_rate": 0.003, "loss": 4.1036, "step": 8587 }, { "epoch": 0.08588, "grad_norm": 0.9259074296023726, "learning_rate": 0.003, "loss": 4.0892, "step": 8588 }, { "epoch": 0.08589, "grad_norm": 1.0133166847780468, "learning_rate": 0.003, "loss": 4.1106, "step": 8589 }, { "epoch": 0.0859, "grad_norm": 0.9952675282959716, "learning_rate": 0.003, "loss": 4.0997, "step": 8590 }, { "epoch": 0.08591, "grad_norm": 0.8432804668392331, "learning_rate": 0.003, "loss": 4.1007, "step": 8591 }, { "epoch": 0.08592, "grad_norm": 0.7927775995190869, "learning_rate": 0.003, "loss": 4.1234, "step": 8592 }, { "epoch": 0.08593, "grad_norm": 0.9135931190636065, "learning_rate": 0.003, "loss": 4.1467, "step": 8593 }, { "epoch": 0.08594, "grad_norm": 0.89183466708387, "learning_rate": 0.003, "loss": 4.1019, "step": 8594 }, { "epoch": 0.08595, "grad_norm": 1.0748167455019701, "learning_rate": 0.003, "loss": 4.1136, "step": 8595 }, { "epoch": 0.08596, "grad_norm": 0.9757553423551145, "learning_rate": 0.003, "loss": 4.1293, "step": 8596 }, { "epoch": 0.08597, "grad_norm": 1.1029927439937899, "learning_rate": 0.003, "loss": 4.1324, "step": 8597 }, { "epoch": 0.08598, "grad_norm": 1.2202072285107675, "learning_rate": 0.003, "loss": 4.1056, "step": 8598 }, { "epoch": 0.08599, "grad_norm": 1.0472074968226261, "learning_rate": 0.003, "loss": 4.1223, "step": 8599 }, { "epoch": 0.086, "grad_norm": 0.9436651521834845, "learning_rate": 0.003, "loss": 4.1399, "step": 8600 }, { "epoch": 0.08601, "grad_norm": 0.8978086885499934, "learning_rate": 0.003, "loss": 4.122, "step": 8601 }, { "epoch": 0.08602, "grad_norm": 0.9046422898166396, "learning_rate": 0.003, "loss": 4.1342, "step": 8602 }, { "epoch": 0.08603, "grad_norm": 0.9940609256714368, "learning_rate": 0.003, "loss": 4.1248, "step": 8603 }, { "epoch": 0.08604, "grad_norm": 1.2914507269620887, "learning_rate": 0.003, "loss": 4.1102, "step": 8604 }, { "epoch": 0.08605, "grad_norm": 1.0010719188912025, "learning_rate": 0.003, "loss": 4.1236, "step": 8605 }, { "epoch": 0.08606, "grad_norm": 0.851064370373218, "learning_rate": 0.003, "loss": 4.0857, "step": 8606 }, { "epoch": 0.08607, "grad_norm": 0.8084048446777777, "learning_rate": 0.003, "loss": 4.129, "step": 8607 }, { "epoch": 0.08608, "grad_norm": 0.8260223223516866, "learning_rate": 0.003, "loss": 4.1001, "step": 8608 }, { "epoch": 0.08609, "grad_norm": 0.800858297527019, "learning_rate": 0.003, "loss": 4.1214, "step": 8609 }, { "epoch": 0.0861, "grad_norm": 0.7751744199950412, "learning_rate": 0.003, "loss": 4.1195, "step": 8610 }, { "epoch": 0.08611, "grad_norm": 0.7850692774747576, "learning_rate": 0.003, "loss": 4.1007, "step": 8611 }, { "epoch": 0.08612, "grad_norm": 0.8034287087644159, "learning_rate": 0.003, "loss": 4.1349, "step": 8612 }, { "epoch": 0.08613, "grad_norm": 0.7907981566162866, "learning_rate": 0.003, "loss": 4.1094, "step": 8613 }, { "epoch": 0.08614, "grad_norm": 0.7648908788973424, "learning_rate": 0.003, "loss": 4.0918, "step": 8614 }, { "epoch": 0.08615, "grad_norm": 0.8058658371668642, "learning_rate": 0.003, "loss": 4.0927, "step": 8615 }, { "epoch": 0.08616, "grad_norm": 1.0179169747185717, "learning_rate": 0.003, "loss": 4.075, "step": 8616 }, { "epoch": 0.08617, "grad_norm": 1.3401947590337657, "learning_rate": 0.003, "loss": 4.0957, "step": 8617 }, { "epoch": 0.08618, "grad_norm": 0.6850314006469229, "learning_rate": 0.003, "loss": 4.0983, "step": 8618 }, { "epoch": 0.08619, "grad_norm": 0.8070821056107416, "learning_rate": 0.003, "loss": 4.1173, "step": 8619 }, { "epoch": 0.0862, "grad_norm": 1.0123939132301667, "learning_rate": 0.003, "loss": 4.107, "step": 8620 }, { "epoch": 0.08621, "grad_norm": 1.1347343173557283, "learning_rate": 0.003, "loss": 4.0967, "step": 8621 }, { "epoch": 0.08622, "grad_norm": 0.9266592246218266, "learning_rate": 0.003, "loss": 4.12, "step": 8622 }, { "epoch": 0.08623, "grad_norm": 0.9044983849980115, "learning_rate": 0.003, "loss": 4.1097, "step": 8623 }, { "epoch": 0.08624, "grad_norm": 0.8761897564386483, "learning_rate": 0.003, "loss": 4.0926, "step": 8624 }, { "epoch": 0.08625, "grad_norm": 0.8620326850964458, "learning_rate": 0.003, "loss": 4.0828, "step": 8625 }, { "epoch": 0.08626, "grad_norm": 0.7720603843449916, "learning_rate": 0.003, "loss": 4.1196, "step": 8626 }, { "epoch": 0.08627, "grad_norm": 0.8313063670205044, "learning_rate": 0.003, "loss": 4.0995, "step": 8627 }, { "epoch": 0.08628, "grad_norm": 0.7788816276172811, "learning_rate": 0.003, "loss": 4.1021, "step": 8628 }, { "epoch": 0.08629, "grad_norm": 0.8225879339157376, "learning_rate": 0.003, "loss": 4.097, "step": 8629 }, { "epoch": 0.0863, "grad_norm": 0.9503798474325023, "learning_rate": 0.003, "loss": 4.0917, "step": 8630 }, { "epoch": 0.08631, "grad_norm": 1.0311613535749917, "learning_rate": 0.003, "loss": 4.1118, "step": 8631 }, { "epoch": 0.08632, "grad_norm": 1.097072302177679, "learning_rate": 0.003, "loss": 4.0961, "step": 8632 }, { "epoch": 0.08633, "grad_norm": 0.905010614567883, "learning_rate": 0.003, "loss": 4.1291, "step": 8633 }, { "epoch": 0.08634, "grad_norm": 0.9280254071681021, "learning_rate": 0.003, "loss": 4.1147, "step": 8634 }, { "epoch": 0.08635, "grad_norm": 0.9486845314400906, "learning_rate": 0.003, "loss": 4.1067, "step": 8635 }, { "epoch": 0.08636, "grad_norm": 0.965675528971389, "learning_rate": 0.003, "loss": 4.1059, "step": 8636 }, { "epoch": 0.08637, "grad_norm": 0.9874360924938551, "learning_rate": 0.003, "loss": 4.1314, "step": 8637 }, { "epoch": 0.08638, "grad_norm": 0.9015114006705611, "learning_rate": 0.003, "loss": 4.12, "step": 8638 }, { "epoch": 0.08639, "grad_norm": 0.8776646457013992, "learning_rate": 0.003, "loss": 4.0976, "step": 8639 }, { "epoch": 0.0864, "grad_norm": 1.1686490796312563, "learning_rate": 0.003, "loss": 4.1087, "step": 8640 }, { "epoch": 0.08641, "grad_norm": 1.1835266668134277, "learning_rate": 0.003, "loss": 4.1163, "step": 8641 }, { "epoch": 0.08642, "grad_norm": 0.7789856451482106, "learning_rate": 0.003, "loss": 4.1074, "step": 8642 }, { "epoch": 0.08643, "grad_norm": 0.7159387374682472, "learning_rate": 0.003, "loss": 4.0962, "step": 8643 }, { "epoch": 0.08644, "grad_norm": 0.7127204757723076, "learning_rate": 0.003, "loss": 4.0751, "step": 8644 }, { "epoch": 0.08645, "grad_norm": 0.8068921966237395, "learning_rate": 0.003, "loss": 4.0903, "step": 8645 }, { "epoch": 0.08646, "grad_norm": 0.9115412811447862, "learning_rate": 0.003, "loss": 4.0786, "step": 8646 }, { "epoch": 0.08647, "grad_norm": 0.9852125911616223, "learning_rate": 0.003, "loss": 4.0653, "step": 8647 }, { "epoch": 0.08648, "grad_norm": 0.9203439827975389, "learning_rate": 0.003, "loss": 4.1004, "step": 8648 }, { "epoch": 0.08649, "grad_norm": 0.8551894521735353, "learning_rate": 0.003, "loss": 4.123, "step": 8649 }, { "epoch": 0.0865, "grad_norm": 0.752190786342537, "learning_rate": 0.003, "loss": 4.0779, "step": 8650 }, { "epoch": 0.08651, "grad_norm": 0.736051245410185, "learning_rate": 0.003, "loss": 4.0992, "step": 8651 }, { "epoch": 0.08652, "grad_norm": 0.6463683221296629, "learning_rate": 0.003, "loss": 4.0704, "step": 8652 }, { "epoch": 0.08653, "grad_norm": 0.6704399865370819, "learning_rate": 0.003, "loss": 4.0775, "step": 8653 }, { "epoch": 0.08654, "grad_norm": 0.7075584672548234, "learning_rate": 0.003, "loss": 4.0918, "step": 8654 }, { "epoch": 0.08655, "grad_norm": 0.7589064409569638, "learning_rate": 0.003, "loss": 4.0739, "step": 8655 }, { "epoch": 0.08656, "grad_norm": 0.8797294460543287, "learning_rate": 0.003, "loss": 4.1078, "step": 8656 }, { "epoch": 0.08657, "grad_norm": 1.0746650206858512, "learning_rate": 0.003, "loss": 4.1251, "step": 8657 }, { "epoch": 0.08658, "grad_norm": 1.0381315037441246, "learning_rate": 0.003, "loss": 4.0852, "step": 8658 }, { "epoch": 0.08659, "grad_norm": 0.9216590731223901, "learning_rate": 0.003, "loss": 4.0936, "step": 8659 }, { "epoch": 0.0866, "grad_norm": 0.9060927251078454, "learning_rate": 0.003, "loss": 4.0894, "step": 8660 }, { "epoch": 0.08661, "grad_norm": 0.8365841556281326, "learning_rate": 0.003, "loss": 4.0914, "step": 8661 }, { "epoch": 0.08662, "grad_norm": 0.940796854284168, "learning_rate": 0.003, "loss": 4.0757, "step": 8662 }, { "epoch": 0.08663, "grad_norm": 1.1158443930232451, "learning_rate": 0.003, "loss": 4.1147, "step": 8663 }, { "epoch": 0.08664, "grad_norm": 1.0186570224186573, "learning_rate": 0.003, "loss": 4.0994, "step": 8664 }, { "epoch": 0.08665, "grad_norm": 1.0348438398835733, "learning_rate": 0.003, "loss": 4.1048, "step": 8665 }, { "epoch": 0.08666, "grad_norm": 0.9620834884254315, "learning_rate": 0.003, "loss": 4.0989, "step": 8666 }, { "epoch": 0.08667, "grad_norm": 1.006500389771506, "learning_rate": 0.003, "loss": 4.0898, "step": 8667 }, { "epoch": 0.08668, "grad_norm": 0.9850331378360856, "learning_rate": 0.003, "loss": 4.0902, "step": 8668 }, { "epoch": 0.08669, "grad_norm": 0.6894471054181678, "learning_rate": 0.003, "loss": 4.084, "step": 8669 }, { "epoch": 0.0867, "grad_norm": 0.6694894610529343, "learning_rate": 0.003, "loss": 4.0854, "step": 8670 }, { "epoch": 0.08671, "grad_norm": 0.5962672000226233, "learning_rate": 0.003, "loss": 4.0969, "step": 8671 }, { "epoch": 0.08672, "grad_norm": 0.6763083203036772, "learning_rate": 0.003, "loss": 4.0816, "step": 8672 }, { "epoch": 0.08673, "grad_norm": 0.7676489015109336, "learning_rate": 0.003, "loss": 4.0941, "step": 8673 }, { "epoch": 0.08674, "grad_norm": 0.7787151403749619, "learning_rate": 0.003, "loss": 4.1001, "step": 8674 }, { "epoch": 0.08675, "grad_norm": 0.7891229989108475, "learning_rate": 0.003, "loss": 4.1025, "step": 8675 }, { "epoch": 0.08676, "grad_norm": 0.8927860274538245, "learning_rate": 0.003, "loss": 4.0755, "step": 8676 }, { "epoch": 0.08677, "grad_norm": 1.067464395193778, "learning_rate": 0.003, "loss": 4.0821, "step": 8677 }, { "epoch": 0.08678, "grad_norm": 0.8827877751142491, "learning_rate": 0.003, "loss": 4.0855, "step": 8678 }, { "epoch": 0.08679, "grad_norm": 0.7244826450369652, "learning_rate": 0.003, "loss": 4.0942, "step": 8679 }, { "epoch": 0.0868, "grad_norm": 0.633006551065808, "learning_rate": 0.003, "loss": 4.0546, "step": 8680 }, { "epoch": 0.08681, "grad_norm": 0.7719505462142406, "learning_rate": 0.003, "loss": 4.0769, "step": 8681 }, { "epoch": 0.08682, "grad_norm": 1.0884331781139083, "learning_rate": 0.003, "loss": 4.0983, "step": 8682 }, { "epoch": 0.08683, "grad_norm": 1.1714352580907677, "learning_rate": 0.003, "loss": 4.1194, "step": 8683 }, { "epoch": 0.08684, "grad_norm": 1.0196218025826278, "learning_rate": 0.003, "loss": 4.1086, "step": 8684 }, { "epoch": 0.08685, "grad_norm": 1.0458752270945595, "learning_rate": 0.003, "loss": 4.1085, "step": 8685 }, { "epoch": 0.08686, "grad_norm": 0.9641978455934338, "learning_rate": 0.003, "loss": 4.109, "step": 8686 }, { "epoch": 0.08687, "grad_norm": 0.9135236516973639, "learning_rate": 0.003, "loss": 4.1102, "step": 8687 }, { "epoch": 0.08688, "grad_norm": 0.8512259905499174, "learning_rate": 0.003, "loss": 4.0977, "step": 8688 }, { "epoch": 0.08689, "grad_norm": 0.8190255834805528, "learning_rate": 0.003, "loss": 4.0589, "step": 8689 }, { "epoch": 0.0869, "grad_norm": 0.8398166759029391, "learning_rate": 0.003, "loss": 4.1031, "step": 8690 }, { "epoch": 0.08691, "grad_norm": 0.8574277247566975, "learning_rate": 0.003, "loss": 4.0776, "step": 8691 }, { "epoch": 0.08692, "grad_norm": 0.9518906449211479, "learning_rate": 0.003, "loss": 4.1055, "step": 8692 }, { "epoch": 0.08693, "grad_norm": 0.9069434842808899, "learning_rate": 0.003, "loss": 4.0767, "step": 8693 }, { "epoch": 0.08694, "grad_norm": 0.7908401280606225, "learning_rate": 0.003, "loss": 4.0911, "step": 8694 }, { "epoch": 0.08695, "grad_norm": 0.875710856836608, "learning_rate": 0.003, "loss": 4.0952, "step": 8695 }, { "epoch": 0.08696, "grad_norm": 1.0582108487000337, "learning_rate": 0.003, "loss": 4.1254, "step": 8696 }, { "epoch": 0.08697, "grad_norm": 1.1048858177228473, "learning_rate": 0.003, "loss": 4.118, "step": 8697 }, { "epoch": 0.08698, "grad_norm": 1.0885309061884358, "learning_rate": 0.003, "loss": 4.0962, "step": 8698 }, { "epoch": 0.08699, "grad_norm": 1.0780097630336325, "learning_rate": 0.003, "loss": 4.1093, "step": 8699 }, { "epoch": 0.087, "grad_norm": 0.8462640972072897, "learning_rate": 0.003, "loss": 4.1286, "step": 8700 }, { "epoch": 0.08701, "grad_norm": 0.7500583295478122, "learning_rate": 0.003, "loss": 4.1077, "step": 8701 }, { "epoch": 0.08702, "grad_norm": 0.8192338624119824, "learning_rate": 0.003, "loss": 4.0979, "step": 8702 }, { "epoch": 0.08703, "grad_norm": 0.9761783341836241, "learning_rate": 0.003, "loss": 4.0894, "step": 8703 }, { "epoch": 0.08704, "grad_norm": 1.0637726019049591, "learning_rate": 0.003, "loss": 4.0707, "step": 8704 }, { "epoch": 0.08705, "grad_norm": 1.039691640283121, "learning_rate": 0.003, "loss": 4.1296, "step": 8705 }, { "epoch": 0.08706, "grad_norm": 1.0345969448915813, "learning_rate": 0.003, "loss": 4.0988, "step": 8706 }, { "epoch": 0.08707, "grad_norm": 1.073442317904122, "learning_rate": 0.003, "loss": 4.1015, "step": 8707 }, { "epoch": 0.08708, "grad_norm": 1.0167968758055734, "learning_rate": 0.003, "loss": 4.1084, "step": 8708 }, { "epoch": 0.08709, "grad_norm": 1.1281785403043407, "learning_rate": 0.003, "loss": 4.1207, "step": 8709 }, { "epoch": 0.0871, "grad_norm": 0.856237067855571, "learning_rate": 0.003, "loss": 4.0919, "step": 8710 }, { "epoch": 0.08711, "grad_norm": 0.7245271989076703, "learning_rate": 0.003, "loss": 4.104, "step": 8711 }, { "epoch": 0.08712, "grad_norm": 0.6348264826820577, "learning_rate": 0.003, "loss": 4.0813, "step": 8712 }, { "epoch": 0.08713, "grad_norm": 0.7213305248825544, "learning_rate": 0.003, "loss": 4.1101, "step": 8713 }, { "epoch": 0.08714, "grad_norm": 0.7267634259694989, "learning_rate": 0.003, "loss": 4.0963, "step": 8714 }, { "epoch": 0.08715, "grad_norm": 0.7226602431965007, "learning_rate": 0.003, "loss": 4.1079, "step": 8715 }, { "epoch": 0.08716, "grad_norm": 0.749512188800938, "learning_rate": 0.003, "loss": 4.1092, "step": 8716 }, { "epoch": 0.08717, "grad_norm": 0.7945844637136416, "learning_rate": 0.003, "loss": 4.1148, "step": 8717 }, { "epoch": 0.08718, "grad_norm": 0.9455334040170957, "learning_rate": 0.003, "loss": 4.107, "step": 8718 }, { "epoch": 0.08719, "grad_norm": 1.2301600366994065, "learning_rate": 0.003, "loss": 4.1003, "step": 8719 }, { "epoch": 0.0872, "grad_norm": 0.9688272507046074, "learning_rate": 0.003, "loss": 4.0866, "step": 8720 }, { "epoch": 0.08721, "grad_norm": 0.882130678243584, "learning_rate": 0.003, "loss": 4.1033, "step": 8721 }, { "epoch": 0.08722, "grad_norm": 0.7624823510784595, "learning_rate": 0.003, "loss": 4.0923, "step": 8722 }, { "epoch": 0.08723, "grad_norm": 0.8208175149976452, "learning_rate": 0.003, "loss": 4.0866, "step": 8723 }, { "epoch": 0.08724, "grad_norm": 1.0208221289653825, "learning_rate": 0.003, "loss": 4.0965, "step": 8724 }, { "epoch": 0.08725, "grad_norm": 1.1583066056086238, "learning_rate": 0.003, "loss": 4.1105, "step": 8725 }, { "epoch": 0.08726, "grad_norm": 0.8636528049466422, "learning_rate": 0.003, "loss": 4.0862, "step": 8726 }, { "epoch": 0.08727, "grad_norm": 0.7315027868053344, "learning_rate": 0.003, "loss": 4.0785, "step": 8727 }, { "epoch": 0.08728, "grad_norm": 0.7105524649657913, "learning_rate": 0.003, "loss": 4.0625, "step": 8728 }, { "epoch": 0.08729, "grad_norm": 0.8027256735287427, "learning_rate": 0.003, "loss": 4.0617, "step": 8729 }, { "epoch": 0.0873, "grad_norm": 0.7642689263014413, "learning_rate": 0.003, "loss": 4.1013, "step": 8730 }, { "epoch": 0.08731, "grad_norm": 0.8054605331616608, "learning_rate": 0.003, "loss": 4.1007, "step": 8731 }, { "epoch": 0.08732, "grad_norm": 0.8920977986550327, "learning_rate": 0.003, "loss": 4.0846, "step": 8732 }, { "epoch": 0.08733, "grad_norm": 0.8107240409545715, "learning_rate": 0.003, "loss": 4.082, "step": 8733 }, { "epoch": 0.08734, "grad_norm": 0.8548040032308308, "learning_rate": 0.003, "loss": 4.1162, "step": 8734 }, { "epoch": 0.08735, "grad_norm": 1.2015525237447153, "learning_rate": 0.003, "loss": 4.1058, "step": 8735 }, { "epoch": 0.08736, "grad_norm": 1.0254473175716212, "learning_rate": 0.003, "loss": 4.1049, "step": 8736 }, { "epoch": 0.08737, "grad_norm": 0.9567188089886024, "learning_rate": 0.003, "loss": 4.1247, "step": 8737 }, { "epoch": 0.08738, "grad_norm": 1.0271105193607937, "learning_rate": 0.003, "loss": 4.0809, "step": 8738 }, { "epoch": 0.08739, "grad_norm": 0.9282295321228008, "learning_rate": 0.003, "loss": 4.0979, "step": 8739 }, { "epoch": 0.0874, "grad_norm": 0.8558073220103064, "learning_rate": 0.003, "loss": 4.0785, "step": 8740 }, { "epoch": 0.08741, "grad_norm": 0.8429315708538133, "learning_rate": 0.003, "loss": 4.0788, "step": 8741 }, { "epoch": 0.08742, "grad_norm": 0.8121185242286065, "learning_rate": 0.003, "loss": 4.0834, "step": 8742 }, { "epoch": 0.08743, "grad_norm": 0.9197196989946418, "learning_rate": 0.003, "loss": 4.081, "step": 8743 }, { "epoch": 0.08744, "grad_norm": 1.1348863902424295, "learning_rate": 0.003, "loss": 4.1278, "step": 8744 }, { "epoch": 0.08745, "grad_norm": 0.9755220575900516, "learning_rate": 0.003, "loss": 4.1005, "step": 8745 }, { "epoch": 0.08746, "grad_norm": 1.1617288318988086, "learning_rate": 0.003, "loss": 4.1137, "step": 8746 }, { "epoch": 0.08747, "grad_norm": 0.8515070893905221, "learning_rate": 0.003, "loss": 4.1078, "step": 8747 }, { "epoch": 0.08748, "grad_norm": 0.8210706822748797, "learning_rate": 0.003, "loss": 4.0965, "step": 8748 }, { "epoch": 0.08749, "grad_norm": 0.8533026928432937, "learning_rate": 0.003, "loss": 4.0963, "step": 8749 }, { "epoch": 0.0875, "grad_norm": 0.7247474091640602, "learning_rate": 0.003, "loss": 4.0967, "step": 8750 }, { "epoch": 0.08751, "grad_norm": 0.7613678855591323, "learning_rate": 0.003, "loss": 4.1013, "step": 8751 }, { "epoch": 0.08752, "grad_norm": 0.8298586709080547, "learning_rate": 0.003, "loss": 4.1247, "step": 8752 }, { "epoch": 0.08753, "grad_norm": 0.9510227968156119, "learning_rate": 0.003, "loss": 4.1277, "step": 8753 }, { "epoch": 0.08754, "grad_norm": 1.1661555141798292, "learning_rate": 0.003, "loss": 4.1142, "step": 8754 }, { "epoch": 0.08755, "grad_norm": 0.9742950157697924, "learning_rate": 0.003, "loss": 4.093, "step": 8755 }, { "epoch": 0.08756, "grad_norm": 1.0948999240878925, "learning_rate": 0.003, "loss": 4.0949, "step": 8756 }, { "epoch": 0.08757, "grad_norm": 1.0747066998581816, "learning_rate": 0.003, "loss": 4.1255, "step": 8757 }, { "epoch": 0.08758, "grad_norm": 0.9803826009713699, "learning_rate": 0.003, "loss": 4.0867, "step": 8758 }, { "epoch": 0.08759, "grad_norm": 0.8978411068790542, "learning_rate": 0.003, "loss": 4.1019, "step": 8759 }, { "epoch": 0.0876, "grad_norm": 0.8777548601783808, "learning_rate": 0.003, "loss": 4.1014, "step": 8760 }, { "epoch": 0.08761, "grad_norm": 0.8296530204156158, "learning_rate": 0.003, "loss": 4.1319, "step": 8761 }, { "epoch": 0.08762, "grad_norm": 0.9503211821723436, "learning_rate": 0.003, "loss": 4.0842, "step": 8762 }, { "epoch": 0.08763, "grad_norm": 0.9498831438790756, "learning_rate": 0.003, "loss": 4.0863, "step": 8763 }, { "epoch": 0.08764, "grad_norm": 0.9656491920428545, "learning_rate": 0.003, "loss": 4.0834, "step": 8764 }, { "epoch": 0.08765, "grad_norm": 1.0147165951625998, "learning_rate": 0.003, "loss": 4.0669, "step": 8765 }, { "epoch": 0.08766, "grad_norm": 0.9826351379893413, "learning_rate": 0.003, "loss": 4.1237, "step": 8766 }, { "epoch": 0.08767, "grad_norm": 1.0458878090436103, "learning_rate": 0.003, "loss": 4.1043, "step": 8767 }, { "epoch": 0.08768, "grad_norm": 1.0251752707071955, "learning_rate": 0.003, "loss": 4.1011, "step": 8768 }, { "epoch": 0.08769, "grad_norm": 0.9012853792156551, "learning_rate": 0.003, "loss": 4.0907, "step": 8769 }, { "epoch": 0.0877, "grad_norm": 0.8035752435955863, "learning_rate": 0.003, "loss": 4.1069, "step": 8770 }, { "epoch": 0.08771, "grad_norm": 0.7518314170540558, "learning_rate": 0.003, "loss": 4.0734, "step": 8771 }, { "epoch": 0.08772, "grad_norm": 0.8180447348392333, "learning_rate": 0.003, "loss": 4.0869, "step": 8772 }, { "epoch": 0.08773, "grad_norm": 0.7147251860859791, "learning_rate": 0.003, "loss": 4.1148, "step": 8773 }, { "epoch": 0.08774, "grad_norm": 0.5867039223653453, "learning_rate": 0.003, "loss": 4.0916, "step": 8774 }, { "epoch": 0.08775, "grad_norm": 0.5527229194986593, "learning_rate": 0.003, "loss": 4.0759, "step": 8775 }, { "epoch": 0.08776, "grad_norm": 0.5844947741413182, "learning_rate": 0.003, "loss": 4.0921, "step": 8776 }, { "epoch": 0.08777, "grad_norm": 0.6563370684489898, "learning_rate": 0.003, "loss": 4.1063, "step": 8777 }, { "epoch": 0.08778, "grad_norm": 0.7712524036141793, "learning_rate": 0.003, "loss": 4.0946, "step": 8778 }, { "epoch": 0.08779, "grad_norm": 0.7807517822630886, "learning_rate": 0.003, "loss": 4.1099, "step": 8779 }, { "epoch": 0.0878, "grad_norm": 0.6904453164163412, "learning_rate": 0.003, "loss": 4.0748, "step": 8780 }, { "epoch": 0.08781, "grad_norm": 0.767764779325477, "learning_rate": 0.003, "loss": 4.0872, "step": 8781 }, { "epoch": 0.08782, "grad_norm": 0.9600307430515294, "learning_rate": 0.003, "loss": 4.0921, "step": 8782 }, { "epoch": 0.08783, "grad_norm": 1.286330289921701, "learning_rate": 0.003, "loss": 4.1144, "step": 8783 }, { "epoch": 0.08784, "grad_norm": 0.9521055086549031, "learning_rate": 0.003, "loss": 4.0954, "step": 8784 }, { "epoch": 0.08785, "grad_norm": 1.3646540084787944, "learning_rate": 0.003, "loss": 4.0817, "step": 8785 }, { "epoch": 0.08786, "grad_norm": 0.9165165437571451, "learning_rate": 0.003, "loss": 4.1215, "step": 8786 }, { "epoch": 0.08787, "grad_norm": 0.8674027071829526, "learning_rate": 0.003, "loss": 4.0832, "step": 8787 }, { "epoch": 0.08788, "grad_norm": 0.8808990624383837, "learning_rate": 0.003, "loss": 4.1186, "step": 8788 }, { "epoch": 0.08789, "grad_norm": 0.9789530665994413, "learning_rate": 0.003, "loss": 4.0907, "step": 8789 }, { "epoch": 0.0879, "grad_norm": 1.05003444606044, "learning_rate": 0.003, "loss": 4.115, "step": 8790 }, { "epoch": 0.08791, "grad_norm": 0.9193191175375216, "learning_rate": 0.003, "loss": 4.0954, "step": 8791 }, { "epoch": 0.08792, "grad_norm": 0.8116887310714239, "learning_rate": 0.003, "loss": 4.1052, "step": 8792 }, { "epoch": 0.08793, "grad_norm": 0.8040951439376696, "learning_rate": 0.003, "loss": 4.0946, "step": 8793 }, { "epoch": 0.08794, "grad_norm": 0.8728936717925508, "learning_rate": 0.003, "loss": 4.0891, "step": 8794 }, { "epoch": 0.08795, "grad_norm": 0.9425258213709625, "learning_rate": 0.003, "loss": 4.0929, "step": 8795 }, { "epoch": 0.08796, "grad_norm": 0.9269620549507115, "learning_rate": 0.003, "loss": 4.0978, "step": 8796 }, { "epoch": 0.08797, "grad_norm": 0.9274274055304775, "learning_rate": 0.003, "loss": 4.0767, "step": 8797 }, { "epoch": 0.08798, "grad_norm": 1.0495786479990563, "learning_rate": 0.003, "loss": 4.1215, "step": 8798 }, { "epoch": 0.08799, "grad_norm": 1.0941402572479257, "learning_rate": 0.003, "loss": 4.0765, "step": 8799 }, { "epoch": 0.088, "grad_norm": 1.0409088434122218, "learning_rate": 0.003, "loss": 4.1182, "step": 8800 }, { "epoch": 0.08801, "grad_norm": 1.0008095022296148, "learning_rate": 0.003, "loss": 4.0656, "step": 8801 }, { "epoch": 0.08802, "grad_norm": 0.9260235726665792, "learning_rate": 0.003, "loss": 4.1157, "step": 8802 }, { "epoch": 0.08803, "grad_norm": 0.959188194655516, "learning_rate": 0.003, "loss": 4.0816, "step": 8803 }, { "epoch": 0.08804, "grad_norm": 1.0435507338866736, "learning_rate": 0.003, "loss": 4.0911, "step": 8804 }, { "epoch": 0.08805, "grad_norm": 0.9112940402613895, "learning_rate": 0.003, "loss": 4.0793, "step": 8805 }, { "epoch": 0.08806, "grad_norm": 0.9708013111525423, "learning_rate": 0.003, "loss": 4.0885, "step": 8806 }, { "epoch": 0.08807, "grad_norm": 1.0648214390050283, "learning_rate": 0.003, "loss": 4.114, "step": 8807 }, { "epoch": 0.08808, "grad_norm": 0.996034423121219, "learning_rate": 0.003, "loss": 4.0943, "step": 8808 }, { "epoch": 0.08809, "grad_norm": 1.049344011106456, "learning_rate": 0.003, "loss": 4.0977, "step": 8809 }, { "epoch": 0.0881, "grad_norm": 0.8742010348146958, "learning_rate": 0.003, "loss": 4.1013, "step": 8810 }, { "epoch": 0.08811, "grad_norm": 0.8862670461095695, "learning_rate": 0.003, "loss": 4.0965, "step": 8811 }, { "epoch": 0.08812, "grad_norm": 0.8360246704981421, "learning_rate": 0.003, "loss": 4.1038, "step": 8812 }, { "epoch": 0.08813, "grad_norm": 0.8761385565588603, "learning_rate": 0.003, "loss": 4.073, "step": 8813 }, { "epoch": 0.08814, "grad_norm": 0.8913146587128812, "learning_rate": 0.003, "loss": 4.1124, "step": 8814 }, { "epoch": 0.08815, "grad_norm": 0.9207388295031492, "learning_rate": 0.003, "loss": 4.1102, "step": 8815 }, { "epoch": 0.08816, "grad_norm": 1.0838936761521594, "learning_rate": 0.003, "loss": 4.1003, "step": 8816 }, { "epoch": 0.08817, "grad_norm": 0.967762286433456, "learning_rate": 0.003, "loss": 4.082, "step": 8817 }, { "epoch": 0.08818, "grad_norm": 0.8962809858840016, "learning_rate": 0.003, "loss": 4.0896, "step": 8818 }, { "epoch": 0.08819, "grad_norm": 0.7701740177256127, "learning_rate": 0.003, "loss": 4.1215, "step": 8819 }, { "epoch": 0.0882, "grad_norm": 0.7258766499158136, "learning_rate": 0.003, "loss": 4.094, "step": 8820 }, { "epoch": 0.08821, "grad_norm": 0.7058532115848066, "learning_rate": 0.003, "loss": 4.1208, "step": 8821 }, { "epoch": 0.08822, "grad_norm": 0.7289621370989584, "learning_rate": 0.003, "loss": 4.0567, "step": 8822 }, { "epoch": 0.08823, "grad_norm": 0.9370302189185644, "learning_rate": 0.003, "loss": 4.0915, "step": 8823 }, { "epoch": 0.08824, "grad_norm": 1.1037702165909034, "learning_rate": 0.003, "loss": 4.0924, "step": 8824 }, { "epoch": 0.08825, "grad_norm": 0.8276194064577156, "learning_rate": 0.003, "loss": 4.0973, "step": 8825 }, { "epoch": 0.08826, "grad_norm": 0.7237687402112217, "learning_rate": 0.003, "loss": 4.0739, "step": 8826 }, { "epoch": 0.08827, "grad_norm": 0.6068281322897773, "learning_rate": 0.003, "loss": 4.0962, "step": 8827 }, { "epoch": 0.08828, "grad_norm": 0.6236195415565339, "learning_rate": 0.003, "loss": 4.098, "step": 8828 }, { "epoch": 0.08829, "grad_norm": 0.6501295996843758, "learning_rate": 0.003, "loss": 4.0957, "step": 8829 }, { "epoch": 0.0883, "grad_norm": 0.7182692035416562, "learning_rate": 0.003, "loss": 4.088, "step": 8830 }, { "epoch": 0.08831, "grad_norm": 0.825778877664003, "learning_rate": 0.003, "loss": 4.0899, "step": 8831 }, { "epoch": 0.08832, "grad_norm": 0.8409707101378571, "learning_rate": 0.003, "loss": 4.0689, "step": 8832 }, { "epoch": 0.08833, "grad_norm": 0.8688218779665827, "learning_rate": 0.003, "loss": 4.0866, "step": 8833 }, { "epoch": 0.08834, "grad_norm": 1.053985971333072, "learning_rate": 0.003, "loss": 4.0861, "step": 8834 }, { "epoch": 0.08835, "grad_norm": 1.2011317648997946, "learning_rate": 0.003, "loss": 4.1244, "step": 8835 }, { "epoch": 0.08836, "grad_norm": 0.8675347336538084, "learning_rate": 0.003, "loss": 4.1047, "step": 8836 }, { "epoch": 0.08837, "grad_norm": 0.6847724769015546, "learning_rate": 0.003, "loss": 4.0755, "step": 8837 }, { "epoch": 0.08838, "grad_norm": 0.7664193052446167, "learning_rate": 0.003, "loss": 4.0935, "step": 8838 }, { "epoch": 0.08839, "grad_norm": 0.7849476506437728, "learning_rate": 0.003, "loss": 4.0831, "step": 8839 }, { "epoch": 0.0884, "grad_norm": 0.8581420457247539, "learning_rate": 0.003, "loss": 4.0804, "step": 8840 }, { "epoch": 0.08841, "grad_norm": 1.1437255891991425, "learning_rate": 0.003, "loss": 4.1094, "step": 8841 }, { "epoch": 0.08842, "grad_norm": 1.0640839104480644, "learning_rate": 0.003, "loss": 4.0886, "step": 8842 }, { "epoch": 0.08843, "grad_norm": 0.8177173898310196, "learning_rate": 0.003, "loss": 4.0807, "step": 8843 }, { "epoch": 0.08844, "grad_norm": 0.7006811082685729, "learning_rate": 0.003, "loss": 4.0688, "step": 8844 }, { "epoch": 0.08845, "grad_norm": 0.6654586857775031, "learning_rate": 0.003, "loss": 4.0522, "step": 8845 }, { "epoch": 0.08846, "grad_norm": 0.7828442286838105, "learning_rate": 0.003, "loss": 4.1247, "step": 8846 }, { "epoch": 0.08847, "grad_norm": 1.002596697776703, "learning_rate": 0.003, "loss": 4.1223, "step": 8847 }, { "epoch": 0.08848, "grad_norm": 1.0769646848289371, "learning_rate": 0.003, "loss": 4.1192, "step": 8848 }, { "epoch": 0.08849, "grad_norm": 0.9588089835995077, "learning_rate": 0.003, "loss": 4.0895, "step": 8849 }, { "epoch": 0.0885, "grad_norm": 0.9736041532230283, "learning_rate": 0.003, "loss": 4.1066, "step": 8850 }, { "epoch": 0.08851, "grad_norm": 1.0038135324969604, "learning_rate": 0.003, "loss": 4.1078, "step": 8851 }, { "epoch": 0.08852, "grad_norm": 0.8144136857171164, "learning_rate": 0.003, "loss": 4.0928, "step": 8852 }, { "epoch": 0.08853, "grad_norm": 0.7492869712538301, "learning_rate": 0.003, "loss": 4.1103, "step": 8853 }, { "epoch": 0.08854, "grad_norm": 0.7964882824236851, "learning_rate": 0.003, "loss": 4.1266, "step": 8854 }, { "epoch": 0.08855, "grad_norm": 0.8940261132351949, "learning_rate": 0.003, "loss": 4.086, "step": 8855 }, { "epoch": 0.08856, "grad_norm": 0.954650133885222, "learning_rate": 0.003, "loss": 4.1049, "step": 8856 }, { "epoch": 0.08857, "grad_norm": 0.9861048909441799, "learning_rate": 0.003, "loss": 4.1074, "step": 8857 }, { "epoch": 0.08858, "grad_norm": 1.0148434704216471, "learning_rate": 0.003, "loss": 4.0939, "step": 8858 }, { "epoch": 0.08859, "grad_norm": 1.2410690598958254, "learning_rate": 0.003, "loss": 4.1131, "step": 8859 }, { "epoch": 0.0886, "grad_norm": 0.9416605613389235, "learning_rate": 0.003, "loss": 4.116, "step": 8860 }, { "epoch": 0.08861, "grad_norm": 0.8801276255413449, "learning_rate": 0.003, "loss": 4.0913, "step": 8861 }, { "epoch": 0.08862, "grad_norm": 0.867667458558974, "learning_rate": 0.003, "loss": 4.0892, "step": 8862 }, { "epoch": 0.08863, "grad_norm": 0.9625185944766314, "learning_rate": 0.003, "loss": 4.1077, "step": 8863 }, { "epoch": 0.08864, "grad_norm": 1.025113767580424, "learning_rate": 0.003, "loss": 4.1198, "step": 8864 }, { "epoch": 0.08865, "grad_norm": 1.0305392893083485, "learning_rate": 0.003, "loss": 4.1141, "step": 8865 }, { "epoch": 0.08866, "grad_norm": 1.0855251168096982, "learning_rate": 0.003, "loss": 4.0863, "step": 8866 }, { "epoch": 0.08867, "grad_norm": 0.9690901901735053, "learning_rate": 0.003, "loss": 4.1148, "step": 8867 }, { "epoch": 0.08868, "grad_norm": 0.9930947313572007, "learning_rate": 0.003, "loss": 4.095, "step": 8868 }, { "epoch": 0.08869, "grad_norm": 1.0329659460825737, "learning_rate": 0.003, "loss": 4.1513, "step": 8869 }, { "epoch": 0.0887, "grad_norm": 0.962624300853848, "learning_rate": 0.003, "loss": 4.0869, "step": 8870 }, { "epoch": 0.08871, "grad_norm": 0.8967371579665262, "learning_rate": 0.003, "loss": 4.0782, "step": 8871 }, { "epoch": 0.08872, "grad_norm": 0.8242663665328195, "learning_rate": 0.003, "loss": 4.1344, "step": 8872 }, { "epoch": 0.08873, "grad_norm": 0.8080330802630397, "learning_rate": 0.003, "loss": 4.087, "step": 8873 }, { "epoch": 0.08874, "grad_norm": 0.8320866327498992, "learning_rate": 0.003, "loss": 4.0975, "step": 8874 }, { "epoch": 0.08875, "grad_norm": 0.8237590696825445, "learning_rate": 0.003, "loss": 4.1174, "step": 8875 }, { "epoch": 0.08876, "grad_norm": 0.7670191226856026, "learning_rate": 0.003, "loss": 4.0727, "step": 8876 }, { "epoch": 0.08877, "grad_norm": 0.8150234000038336, "learning_rate": 0.003, "loss": 4.0817, "step": 8877 }, { "epoch": 0.08878, "grad_norm": 0.9188804850843868, "learning_rate": 0.003, "loss": 4.0976, "step": 8878 }, { "epoch": 0.08879, "grad_norm": 1.1505868854127517, "learning_rate": 0.003, "loss": 4.0911, "step": 8879 }, { "epoch": 0.0888, "grad_norm": 1.0381859644112361, "learning_rate": 0.003, "loss": 4.086, "step": 8880 }, { "epoch": 0.08881, "grad_norm": 0.8126207042081777, "learning_rate": 0.003, "loss": 4.0698, "step": 8881 }, { "epoch": 0.08882, "grad_norm": 0.6375023727555839, "learning_rate": 0.003, "loss": 4.0995, "step": 8882 }, { "epoch": 0.08883, "grad_norm": 0.8121388182245645, "learning_rate": 0.003, "loss": 4.1415, "step": 8883 }, { "epoch": 0.08884, "grad_norm": 1.0502326054062787, "learning_rate": 0.003, "loss": 4.0997, "step": 8884 }, { "epoch": 0.08885, "grad_norm": 1.2283294749042548, "learning_rate": 0.003, "loss": 4.0973, "step": 8885 }, { "epoch": 0.08886, "grad_norm": 0.749971714680993, "learning_rate": 0.003, "loss": 4.0972, "step": 8886 }, { "epoch": 0.08887, "grad_norm": 0.8691386698939016, "learning_rate": 0.003, "loss": 4.0913, "step": 8887 }, { "epoch": 0.08888, "grad_norm": 0.9459570536683495, "learning_rate": 0.003, "loss": 4.0887, "step": 8888 }, { "epoch": 0.08889, "grad_norm": 1.0034071726990192, "learning_rate": 0.003, "loss": 4.0901, "step": 8889 }, { "epoch": 0.0889, "grad_norm": 1.1036828603076412, "learning_rate": 0.003, "loss": 4.0931, "step": 8890 }, { "epoch": 0.08891, "grad_norm": 0.7547248732778705, "learning_rate": 0.003, "loss": 4.0658, "step": 8891 }, { "epoch": 0.08892, "grad_norm": 0.7125455249994503, "learning_rate": 0.003, "loss": 4.1096, "step": 8892 }, { "epoch": 0.08893, "grad_norm": 0.9110389886249487, "learning_rate": 0.003, "loss": 4.0871, "step": 8893 }, { "epoch": 0.08894, "grad_norm": 1.0085325989909744, "learning_rate": 0.003, "loss": 4.079, "step": 8894 }, { "epoch": 0.08895, "grad_norm": 0.9395630146430425, "learning_rate": 0.003, "loss": 4.0917, "step": 8895 }, { "epoch": 0.08896, "grad_norm": 0.932005587780262, "learning_rate": 0.003, "loss": 4.0727, "step": 8896 }, { "epoch": 0.08897, "grad_norm": 0.9683101599385363, "learning_rate": 0.003, "loss": 4.0844, "step": 8897 }, { "epoch": 0.08898, "grad_norm": 0.8778233714006216, "learning_rate": 0.003, "loss": 4.0975, "step": 8898 }, { "epoch": 0.08899, "grad_norm": 0.8843104049066195, "learning_rate": 0.003, "loss": 4.0961, "step": 8899 }, { "epoch": 0.089, "grad_norm": 0.8611736022905074, "learning_rate": 0.003, "loss": 4.0846, "step": 8900 }, { "epoch": 0.08901, "grad_norm": 0.7675741709312146, "learning_rate": 0.003, "loss": 4.0967, "step": 8901 }, { "epoch": 0.08902, "grad_norm": 0.7542143956414811, "learning_rate": 0.003, "loss": 4.0963, "step": 8902 }, { "epoch": 0.08903, "grad_norm": 0.824143498268071, "learning_rate": 0.003, "loss": 4.0832, "step": 8903 }, { "epoch": 0.08904, "grad_norm": 1.0036741311146173, "learning_rate": 0.003, "loss": 4.1018, "step": 8904 }, { "epoch": 0.08905, "grad_norm": 1.1665427882326276, "learning_rate": 0.003, "loss": 4.1198, "step": 8905 }, { "epoch": 0.08906, "grad_norm": 0.9167446591164601, "learning_rate": 0.003, "loss": 4.1021, "step": 8906 }, { "epoch": 0.08907, "grad_norm": 0.8151193889750405, "learning_rate": 0.003, "loss": 4.0966, "step": 8907 }, { "epoch": 0.08908, "grad_norm": 0.8148807348539009, "learning_rate": 0.003, "loss": 4.116, "step": 8908 }, { "epoch": 0.08909, "grad_norm": 0.8006264904937266, "learning_rate": 0.003, "loss": 4.1071, "step": 8909 }, { "epoch": 0.0891, "grad_norm": 0.7847183342716237, "learning_rate": 0.003, "loss": 4.0888, "step": 8910 }, { "epoch": 0.08911, "grad_norm": 0.871137922082431, "learning_rate": 0.003, "loss": 4.0945, "step": 8911 }, { "epoch": 0.08912, "grad_norm": 0.9186358754285994, "learning_rate": 0.003, "loss": 4.0941, "step": 8912 }, { "epoch": 0.08913, "grad_norm": 0.8792963429484404, "learning_rate": 0.003, "loss": 4.1138, "step": 8913 }, { "epoch": 0.08914, "grad_norm": 0.8389522069822098, "learning_rate": 0.003, "loss": 4.1074, "step": 8914 }, { "epoch": 0.08915, "grad_norm": 0.7438660035575635, "learning_rate": 0.003, "loss": 4.111, "step": 8915 }, { "epoch": 0.08916, "grad_norm": 0.81031482315623, "learning_rate": 0.003, "loss": 4.0994, "step": 8916 }, { "epoch": 0.08917, "grad_norm": 0.9879431715632052, "learning_rate": 0.003, "loss": 4.0864, "step": 8917 }, { "epoch": 0.08918, "grad_norm": 1.1263164984050484, "learning_rate": 0.003, "loss": 4.1033, "step": 8918 }, { "epoch": 0.08919, "grad_norm": 0.8217791465004585, "learning_rate": 0.003, "loss": 4.1052, "step": 8919 }, { "epoch": 0.0892, "grad_norm": 0.838741117076949, "learning_rate": 0.003, "loss": 4.0805, "step": 8920 }, { "epoch": 0.08921, "grad_norm": 0.8013123325103196, "learning_rate": 0.003, "loss": 4.0809, "step": 8921 }, { "epoch": 0.08922, "grad_norm": 0.8829008386490941, "learning_rate": 0.003, "loss": 4.1102, "step": 8922 }, { "epoch": 0.08923, "grad_norm": 0.831010513545994, "learning_rate": 0.003, "loss": 4.1134, "step": 8923 }, { "epoch": 0.08924, "grad_norm": 0.8026665557631941, "learning_rate": 0.003, "loss": 4.0877, "step": 8924 }, { "epoch": 0.08925, "grad_norm": 0.7516895419563575, "learning_rate": 0.003, "loss": 4.0955, "step": 8925 }, { "epoch": 0.08926, "grad_norm": 0.6859708086212067, "learning_rate": 0.003, "loss": 4.0767, "step": 8926 }, { "epoch": 0.08927, "grad_norm": 0.705876228465091, "learning_rate": 0.003, "loss": 4.0883, "step": 8927 }, { "epoch": 0.08928, "grad_norm": 0.8197442137549819, "learning_rate": 0.003, "loss": 4.0828, "step": 8928 }, { "epoch": 0.08929, "grad_norm": 1.158197035763089, "learning_rate": 0.003, "loss": 4.0983, "step": 8929 }, { "epoch": 0.0893, "grad_norm": 1.2835489288131519, "learning_rate": 0.003, "loss": 4.1078, "step": 8930 }, { "epoch": 0.08931, "grad_norm": 0.9255470549413722, "learning_rate": 0.003, "loss": 4.0873, "step": 8931 }, { "epoch": 0.08932, "grad_norm": 0.9494109894231941, "learning_rate": 0.003, "loss": 4.1048, "step": 8932 }, { "epoch": 0.08933, "grad_norm": 0.9325764456947372, "learning_rate": 0.003, "loss": 4.0954, "step": 8933 }, { "epoch": 0.08934, "grad_norm": 0.9120790589940708, "learning_rate": 0.003, "loss": 4.0916, "step": 8934 }, { "epoch": 0.08935, "grad_norm": 0.9622123789895787, "learning_rate": 0.003, "loss": 4.106, "step": 8935 }, { "epoch": 0.08936, "grad_norm": 0.9982089757464768, "learning_rate": 0.003, "loss": 4.0712, "step": 8936 }, { "epoch": 0.08937, "grad_norm": 0.8488183553743273, "learning_rate": 0.003, "loss": 4.0731, "step": 8937 }, { "epoch": 0.08938, "grad_norm": 0.711168048268646, "learning_rate": 0.003, "loss": 4.106, "step": 8938 }, { "epoch": 0.08939, "grad_norm": 0.7145069662449071, "learning_rate": 0.003, "loss": 4.1228, "step": 8939 }, { "epoch": 0.0894, "grad_norm": 0.9351641300422199, "learning_rate": 0.003, "loss": 4.0905, "step": 8940 }, { "epoch": 0.08941, "grad_norm": 0.9908374405712305, "learning_rate": 0.003, "loss": 4.1068, "step": 8941 }, { "epoch": 0.08942, "grad_norm": 1.0369857459287184, "learning_rate": 0.003, "loss": 4.0808, "step": 8942 }, { "epoch": 0.08943, "grad_norm": 0.9815132194398201, "learning_rate": 0.003, "loss": 4.1022, "step": 8943 }, { "epoch": 0.08944, "grad_norm": 0.8275387834639889, "learning_rate": 0.003, "loss": 4.0679, "step": 8944 }, { "epoch": 0.08945, "grad_norm": 0.7363945481987325, "learning_rate": 0.003, "loss": 4.0622, "step": 8945 }, { "epoch": 0.08946, "grad_norm": 0.7493687013662418, "learning_rate": 0.003, "loss": 4.1161, "step": 8946 }, { "epoch": 0.08947, "grad_norm": 0.8635191868013845, "learning_rate": 0.003, "loss": 4.0948, "step": 8947 }, { "epoch": 0.08948, "grad_norm": 0.8462786471943283, "learning_rate": 0.003, "loss": 4.0659, "step": 8948 }, { "epoch": 0.08949, "grad_norm": 0.8671422196356308, "learning_rate": 0.003, "loss": 4.0903, "step": 8949 }, { "epoch": 0.0895, "grad_norm": 0.9409364514795033, "learning_rate": 0.003, "loss": 4.0879, "step": 8950 }, { "epoch": 0.08951, "grad_norm": 1.090641876171059, "learning_rate": 0.003, "loss": 4.0984, "step": 8951 }, { "epoch": 0.08952, "grad_norm": 1.0606427376589673, "learning_rate": 0.003, "loss": 4.1129, "step": 8952 }, { "epoch": 0.08953, "grad_norm": 0.9918378904457558, "learning_rate": 0.003, "loss": 4.1043, "step": 8953 }, { "epoch": 0.08954, "grad_norm": 0.9774618750373327, "learning_rate": 0.003, "loss": 4.0661, "step": 8954 }, { "epoch": 0.08955, "grad_norm": 1.2355673972258312, "learning_rate": 0.003, "loss": 4.1081, "step": 8955 }, { "epoch": 0.08956, "grad_norm": 0.958337187094943, "learning_rate": 0.003, "loss": 4.1043, "step": 8956 }, { "epoch": 0.08957, "grad_norm": 1.1059278113358553, "learning_rate": 0.003, "loss": 4.1176, "step": 8957 }, { "epoch": 0.08958, "grad_norm": 1.0600369480191711, "learning_rate": 0.003, "loss": 4.0708, "step": 8958 }, { "epoch": 0.08959, "grad_norm": 0.9044461430911768, "learning_rate": 0.003, "loss": 4.0892, "step": 8959 }, { "epoch": 0.0896, "grad_norm": 0.751672034162579, "learning_rate": 0.003, "loss": 4.0631, "step": 8960 }, { "epoch": 0.08961, "grad_norm": 0.6890812720881869, "learning_rate": 0.003, "loss": 4.0814, "step": 8961 }, { "epoch": 0.08962, "grad_norm": 0.6420758804789315, "learning_rate": 0.003, "loss": 4.0848, "step": 8962 }, { "epoch": 0.08963, "grad_norm": 0.7715824227745989, "learning_rate": 0.003, "loss": 4.119, "step": 8963 }, { "epoch": 0.08964, "grad_norm": 0.9769914979893802, "learning_rate": 0.003, "loss": 4.1009, "step": 8964 }, { "epoch": 0.08965, "grad_norm": 1.1055411204729655, "learning_rate": 0.003, "loss": 4.1159, "step": 8965 }, { "epoch": 0.08966, "grad_norm": 0.8858925381363988, "learning_rate": 0.003, "loss": 4.0805, "step": 8966 }, { "epoch": 0.08967, "grad_norm": 0.973405476101832, "learning_rate": 0.003, "loss": 4.125, "step": 8967 }, { "epoch": 0.08968, "grad_norm": 1.246488899818171, "learning_rate": 0.003, "loss": 4.1068, "step": 8968 }, { "epoch": 0.08969, "grad_norm": 0.7919830012222802, "learning_rate": 0.003, "loss": 4.092, "step": 8969 }, { "epoch": 0.0897, "grad_norm": 0.783905488272791, "learning_rate": 0.003, "loss": 4.1039, "step": 8970 }, { "epoch": 0.08971, "grad_norm": 0.8587360970358755, "learning_rate": 0.003, "loss": 4.0975, "step": 8971 }, { "epoch": 0.08972, "grad_norm": 1.1812204010197107, "learning_rate": 0.003, "loss": 4.0953, "step": 8972 }, { "epoch": 0.08973, "grad_norm": 1.072340183548216, "learning_rate": 0.003, "loss": 4.1285, "step": 8973 }, { "epoch": 0.08974, "grad_norm": 0.9624889318642255, "learning_rate": 0.003, "loss": 4.1191, "step": 8974 }, { "epoch": 0.08975, "grad_norm": 1.103329859592797, "learning_rate": 0.003, "loss": 4.1042, "step": 8975 }, { "epoch": 0.08976, "grad_norm": 0.9226100631400868, "learning_rate": 0.003, "loss": 4.1147, "step": 8976 }, { "epoch": 0.08977, "grad_norm": 0.9050852208024028, "learning_rate": 0.003, "loss": 4.0989, "step": 8977 }, { "epoch": 0.08978, "grad_norm": 0.9593748971336933, "learning_rate": 0.003, "loss": 4.1056, "step": 8978 }, { "epoch": 0.08979, "grad_norm": 0.917813099791213, "learning_rate": 0.003, "loss": 4.0846, "step": 8979 }, { "epoch": 0.0898, "grad_norm": 0.850359408142017, "learning_rate": 0.003, "loss": 4.1055, "step": 8980 }, { "epoch": 0.08981, "grad_norm": 0.7873512624929737, "learning_rate": 0.003, "loss": 4.08, "step": 8981 }, { "epoch": 0.08982, "grad_norm": 0.7460904455327514, "learning_rate": 0.003, "loss": 4.0876, "step": 8982 }, { "epoch": 0.08983, "grad_norm": 0.813398130626127, "learning_rate": 0.003, "loss": 4.0698, "step": 8983 }, { "epoch": 0.08984, "grad_norm": 1.0379519181428254, "learning_rate": 0.003, "loss": 4.1201, "step": 8984 }, { "epoch": 0.08985, "grad_norm": 1.2369546672505778, "learning_rate": 0.003, "loss": 4.107, "step": 8985 }, { "epoch": 0.08986, "grad_norm": 0.7482892312093569, "learning_rate": 0.003, "loss": 4.1, "step": 8986 }, { "epoch": 0.08987, "grad_norm": 0.6543486223493022, "learning_rate": 0.003, "loss": 4.0833, "step": 8987 }, { "epoch": 0.08988, "grad_norm": 0.6451367251570368, "learning_rate": 0.003, "loss": 4.0811, "step": 8988 }, { "epoch": 0.08989, "grad_norm": 0.601214981373186, "learning_rate": 0.003, "loss": 4.1077, "step": 8989 }, { "epoch": 0.0899, "grad_norm": 0.670157839844488, "learning_rate": 0.003, "loss": 4.0765, "step": 8990 }, { "epoch": 0.08991, "grad_norm": 0.6344666888841743, "learning_rate": 0.003, "loss": 4.0835, "step": 8991 }, { "epoch": 0.08992, "grad_norm": 0.6087663409550896, "learning_rate": 0.003, "loss": 4.0701, "step": 8992 }, { "epoch": 0.08993, "grad_norm": 0.6007106864127649, "learning_rate": 0.003, "loss": 4.0781, "step": 8993 }, { "epoch": 0.08994, "grad_norm": 0.6569404032054016, "learning_rate": 0.003, "loss": 4.0801, "step": 8994 }, { "epoch": 0.08995, "grad_norm": 0.8011788589422568, "learning_rate": 0.003, "loss": 4.0907, "step": 8995 }, { "epoch": 0.08996, "grad_norm": 1.090537283208205, "learning_rate": 0.003, "loss": 4.1194, "step": 8996 }, { "epoch": 0.08997, "grad_norm": 1.259139252903192, "learning_rate": 0.003, "loss": 4.096, "step": 8997 }, { "epoch": 0.08998, "grad_norm": 0.7711113115206871, "learning_rate": 0.003, "loss": 4.0732, "step": 8998 }, { "epoch": 0.08999, "grad_norm": 0.7350565093378323, "learning_rate": 0.003, "loss": 4.0998, "step": 8999 }, { "epoch": 0.09, "grad_norm": 0.8212463674203402, "learning_rate": 0.003, "loss": 4.093, "step": 9000 }, { "epoch": 0.09001, "grad_norm": 0.9909623667562864, "learning_rate": 0.003, "loss": 4.0903, "step": 9001 }, { "epoch": 0.09002, "grad_norm": 1.1337359570952938, "learning_rate": 0.003, "loss": 4.0892, "step": 9002 }, { "epoch": 0.09003, "grad_norm": 1.066025616693887, "learning_rate": 0.003, "loss": 4.0681, "step": 9003 }, { "epoch": 0.09004, "grad_norm": 1.022429577767667, "learning_rate": 0.003, "loss": 4.0909, "step": 9004 }, { "epoch": 0.09005, "grad_norm": 0.8748962510761829, "learning_rate": 0.003, "loss": 4.1092, "step": 9005 }, { "epoch": 0.09006, "grad_norm": 0.7098491602250373, "learning_rate": 0.003, "loss": 4.0837, "step": 9006 }, { "epoch": 0.09007, "grad_norm": 0.5692107949898508, "learning_rate": 0.003, "loss": 4.1054, "step": 9007 }, { "epoch": 0.09008, "grad_norm": 0.5772606567632318, "learning_rate": 0.003, "loss": 4.0889, "step": 9008 }, { "epoch": 0.09009, "grad_norm": 0.6669070549022329, "learning_rate": 0.003, "loss": 4.0955, "step": 9009 }, { "epoch": 0.0901, "grad_norm": 0.8245290219344589, "learning_rate": 0.003, "loss": 4.0863, "step": 9010 }, { "epoch": 0.09011, "grad_norm": 0.9106422213480005, "learning_rate": 0.003, "loss": 4.0857, "step": 9011 }, { "epoch": 0.09012, "grad_norm": 0.9526910382185314, "learning_rate": 0.003, "loss": 4.0873, "step": 9012 }, { "epoch": 0.09013, "grad_norm": 0.9182834414849824, "learning_rate": 0.003, "loss": 4.081, "step": 9013 }, { "epoch": 0.09014, "grad_norm": 0.8705840436567862, "learning_rate": 0.003, "loss": 4.1167, "step": 9014 }, { "epoch": 0.09015, "grad_norm": 0.9199037787555114, "learning_rate": 0.003, "loss": 4.1304, "step": 9015 }, { "epoch": 0.09016, "grad_norm": 0.8979198281963656, "learning_rate": 0.003, "loss": 4.1133, "step": 9016 }, { "epoch": 0.09017, "grad_norm": 0.9760173547686396, "learning_rate": 0.003, "loss": 4.122, "step": 9017 }, { "epoch": 0.09018, "grad_norm": 1.0721385512227886, "learning_rate": 0.003, "loss": 4.1147, "step": 9018 }, { "epoch": 0.09019, "grad_norm": 0.9082460234466702, "learning_rate": 0.003, "loss": 4.0808, "step": 9019 }, { "epoch": 0.0902, "grad_norm": 0.9401369695786034, "learning_rate": 0.003, "loss": 4.0984, "step": 9020 }, { "epoch": 0.09021, "grad_norm": 0.9948251941982775, "learning_rate": 0.003, "loss": 4.1138, "step": 9021 }, { "epoch": 0.09022, "grad_norm": 1.1842814118486442, "learning_rate": 0.003, "loss": 4.1091, "step": 9022 }, { "epoch": 0.09023, "grad_norm": 0.7972897768988289, "learning_rate": 0.003, "loss": 4.0942, "step": 9023 }, { "epoch": 0.09024, "grad_norm": 0.7421033982019426, "learning_rate": 0.003, "loss": 4.079, "step": 9024 }, { "epoch": 0.09025, "grad_norm": 0.7738259101346501, "learning_rate": 0.003, "loss": 4.0991, "step": 9025 }, { "epoch": 0.09026, "grad_norm": 0.8970150206090364, "learning_rate": 0.003, "loss": 4.0937, "step": 9026 }, { "epoch": 0.09027, "grad_norm": 1.0492341252458535, "learning_rate": 0.003, "loss": 4.0907, "step": 9027 }, { "epoch": 0.09028, "grad_norm": 1.2454585937869171, "learning_rate": 0.003, "loss": 4.1123, "step": 9028 }, { "epoch": 0.09029, "grad_norm": 0.9252985895422703, "learning_rate": 0.003, "loss": 4.0959, "step": 9029 }, { "epoch": 0.0903, "grad_norm": 0.9241514183975191, "learning_rate": 0.003, "loss": 4.1248, "step": 9030 }, { "epoch": 0.09031, "grad_norm": 0.8763169048046952, "learning_rate": 0.003, "loss": 4.0787, "step": 9031 }, { "epoch": 0.09032, "grad_norm": 0.9386352079429895, "learning_rate": 0.003, "loss": 4.1293, "step": 9032 }, { "epoch": 0.09033, "grad_norm": 0.9783200260031812, "learning_rate": 0.003, "loss": 4.0726, "step": 9033 }, { "epoch": 0.09034, "grad_norm": 0.956003427831922, "learning_rate": 0.003, "loss": 4.0942, "step": 9034 }, { "epoch": 0.09035, "grad_norm": 0.8041920674510835, "learning_rate": 0.003, "loss": 4.0918, "step": 9035 }, { "epoch": 0.09036, "grad_norm": 0.8190103140470621, "learning_rate": 0.003, "loss": 4.0878, "step": 9036 }, { "epoch": 0.09037, "grad_norm": 0.8735918968075451, "learning_rate": 0.003, "loss": 4.0966, "step": 9037 }, { "epoch": 0.09038, "grad_norm": 0.8950344717934043, "learning_rate": 0.003, "loss": 4.0977, "step": 9038 }, { "epoch": 0.09039, "grad_norm": 0.8598737751053982, "learning_rate": 0.003, "loss": 4.1071, "step": 9039 }, { "epoch": 0.0904, "grad_norm": 0.909321180715369, "learning_rate": 0.003, "loss": 4.0825, "step": 9040 }, { "epoch": 0.09041, "grad_norm": 0.8046317272372915, "learning_rate": 0.003, "loss": 4.0756, "step": 9041 }, { "epoch": 0.09042, "grad_norm": 0.7635821438024983, "learning_rate": 0.003, "loss": 4.0779, "step": 9042 }, { "epoch": 0.09043, "grad_norm": 0.9240410411420402, "learning_rate": 0.003, "loss": 4.0861, "step": 9043 }, { "epoch": 0.09044, "grad_norm": 1.15129774565637, "learning_rate": 0.003, "loss": 4.0891, "step": 9044 }, { "epoch": 0.09045, "grad_norm": 1.0176507372571526, "learning_rate": 0.003, "loss": 4.0963, "step": 9045 }, { "epoch": 0.09046, "grad_norm": 1.0168928227228877, "learning_rate": 0.003, "loss": 4.0698, "step": 9046 }, { "epoch": 0.09047, "grad_norm": 1.0855871224553029, "learning_rate": 0.003, "loss": 4.0938, "step": 9047 }, { "epoch": 0.09048, "grad_norm": 0.9615447887028904, "learning_rate": 0.003, "loss": 4.0888, "step": 9048 }, { "epoch": 0.09049, "grad_norm": 0.9780305253510888, "learning_rate": 0.003, "loss": 4.1164, "step": 9049 }, { "epoch": 0.0905, "grad_norm": 0.919712660817147, "learning_rate": 0.003, "loss": 4.0926, "step": 9050 }, { "epoch": 0.09051, "grad_norm": 0.9279054815642093, "learning_rate": 0.003, "loss": 4.0897, "step": 9051 }, { "epoch": 0.09052, "grad_norm": 0.9418205046994602, "learning_rate": 0.003, "loss": 4.0995, "step": 9052 }, { "epoch": 0.09053, "grad_norm": 0.9469408829030873, "learning_rate": 0.003, "loss": 4.0813, "step": 9053 }, { "epoch": 0.09054, "grad_norm": 0.9051974077534375, "learning_rate": 0.003, "loss": 4.0836, "step": 9054 }, { "epoch": 0.09055, "grad_norm": 1.024878799010692, "learning_rate": 0.003, "loss": 4.0809, "step": 9055 }, { "epoch": 0.09056, "grad_norm": 0.9922488913468747, "learning_rate": 0.003, "loss": 4.0868, "step": 9056 }, { "epoch": 0.09057, "grad_norm": 0.881282710691066, "learning_rate": 0.003, "loss": 4.1119, "step": 9057 }, { "epoch": 0.09058, "grad_norm": 0.9505568921531226, "learning_rate": 0.003, "loss": 4.1212, "step": 9058 }, { "epoch": 0.09059, "grad_norm": 0.9853808181646652, "learning_rate": 0.003, "loss": 4.1086, "step": 9059 }, { "epoch": 0.0906, "grad_norm": 1.085605727215449, "learning_rate": 0.003, "loss": 4.1289, "step": 9060 }, { "epoch": 0.09061, "grad_norm": 0.9708980429037549, "learning_rate": 0.003, "loss": 4.1086, "step": 9061 }, { "epoch": 0.09062, "grad_norm": 0.9437630749155476, "learning_rate": 0.003, "loss": 4.077, "step": 9062 }, { "epoch": 0.09063, "grad_norm": 1.15349103657829, "learning_rate": 0.003, "loss": 4.0964, "step": 9063 }, { "epoch": 0.09064, "grad_norm": 0.9708247162873537, "learning_rate": 0.003, "loss": 4.1191, "step": 9064 }, { "epoch": 0.09065, "grad_norm": 0.8902413777847998, "learning_rate": 0.003, "loss": 4.1096, "step": 9065 }, { "epoch": 0.09066, "grad_norm": 0.737181277087745, "learning_rate": 0.003, "loss": 4.0956, "step": 9066 }, { "epoch": 0.09067, "grad_norm": 0.6279056212510534, "learning_rate": 0.003, "loss": 4.0873, "step": 9067 }, { "epoch": 0.09068, "grad_norm": 0.6385574242216696, "learning_rate": 0.003, "loss": 4.0991, "step": 9068 }, { "epoch": 0.09069, "grad_norm": 0.6483082983150563, "learning_rate": 0.003, "loss": 4.1367, "step": 9069 }, { "epoch": 0.0907, "grad_norm": 0.8253555420224653, "learning_rate": 0.003, "loss": 4.1165, "step": 9070 }, { "epoch": 0.09071, "grad_norm": 1.0346138256073594, "learning_rate": 0.003, "loss": 4.1003, "step": 9071 }, { "epoch": 0.09072, "grad_norm": 0.9313433051670195, "learning_rate": 0.003, "loss": 4.074, "step": 9072 }, { "epoch": 0.09073, "grad_norm": 1.0276295171442573, "learning_rate": 0.003, "loss": 4.1075, "step": 9073 }, { "epoch": 0.09074, "grad_norm": 1.1532562018862866, "learning_rate": 0.003, "loss": 4.0826, "step": 9074 }, { "epoch": 0.09075, "grad_norm": 1.0895198666143424, "learning_rate": 0.003, "loss": 4.1231, "step": 9075 }, { "epoch": 0.09076, "grad_norm": 0.9624509207513962, "learning_rate": 0.003, "loss": 4.1227, "step": 9076 }, { "epoch": 0.09077, "grad_norm": 1.0339320521373805, "learning_rate": 0.003, "loss": 4.1254, "step": 9077 }, { "epoch": 0.09078, "grad_norm": 1.0567530098664932, "learning_rate": 0.003, "loss": 4.1014, "step": 9078 }, { "epoch": 0.09079, "grad_norm": 0.9774925721478938, "learning_rate": 0.003, "loss": 4.1089, "step": 9079 }, { "epoch": 0.0908, "grad_norm": 1.0195847641854132, "learning_rate": 0.003, "loss": 4.0894, "step": 9080 }, { "epoch": 0.09081, "grad_norm": 1.0092501737148587, "learning_rate": 0.003, "loss": 4.1127, "step": 9081 }, { "epoch": 0.09082, "grad_norm": 1.2418266763070203, "learning_rate": 0.003, "loss": 4.0955, "step": 9082 }, { "epoch": 0.09083, "grad_norm": 0.7841216118125688, "learning_rate": 0.003, "loss": 4.1031, "step": 9083 }, { "epoch": 0.09084, "grad_norm": 0.6720714850940325, "learning_rate": 0.003, "loss": 4.0714, "step": 9084 }, { "epoch": 0.09085, "grad_norm": 0.8612009953168612, "learning_rate": 0.003, "loss": 4.1132, "step": 9085 }, { "epoch": 0.09086, "grad_norm": 1.0531662786767666, "learning_rate": 0.003, "loss": 4.087, "step": 9086 }, { "epoch": 0.09087, "grad_norm": 1.0678011915356584, "learning_rate": 0.003, "loss": 4.0907, "step": 9087 }, { "epoch": 0.09088, "grad_norm": 0.8076834182848771, "learning_rate": 0.003, "loss": 4.0829, "step": 9088 }, { "epoch": 0.09089, "grad_norm": 0.7750777971721545, "learning_rate": 0.003, "loss": 4.0956, "step": 9089 }, { "epoch": 0.0909, "grad_norm": 0.8512525308462379, "learning_rate": 0.003, "loss": 4.0864, "step": 9090 }, { "epoch": 0.09091, "grad_norm": 1.0582316561541651, "learning_rate": 0.003, "loss": 4.0728, "step": 9091 }, { "epoch": 0.09092, "grad_norm": 0.898113597368355, "learning_rate": 0.003, "loss": 4.1276, "step": 9092 }, { "epoch": 0.09093, "grad_norm": 0.8629700654684181, "learning_rate": 0.003, "loss": 4.0842, "step": 9093 }, { "epoch": 0.09094, "grad_norm": 0.8939556429591207, "learning_rate": 0.003, "loss": 4.0975, "step": 9094 }, { "epoch": 0.09095, "grad_norm": 0.8038507515784818, "learning_rate": 0.003, "loss": 4.1036, "step": 9095 }, { "epoch": 0.09096, "grad_norm": 0.7779918254246555, "learning_rate": 0.003, "loss": 4.0804, "step": 9096 }, { "epoch": 0.09097, "grad_norm": 0.7898567013017581, "learning_rate": 0.003, "loss": 4.0962, "step": 9097 }, { "epoch": 0.09098, "grad_norm": 0.7469344511651337, "learning_rate": 0.003, "loss": 4.068, "step": 9098 }, { "epoch": 0.09099, "grad_norm": 0.81782758465319, "learning_rate": 0.003, "loss": 4.0794, "step": 9099 }, { "epoch": 0.091, "grad_norm": 0.8663932475321643, "learning_rate": 0.003, "loss": 4.071, "step": 9100 }, { "epoch": 0.09101, "grad_norm": 0.9040908007482058, "learning_rate": 0.003, "loss": 4.0922, "step": 9101 }, { "epoch": 0.09102, "grad_norm": 0.8974025166455672, "learning_rate": 0.003, "loss": 4.1023, "step": 9102 }, { "epoch": 0.09103, "grad_norm": 0.8432481209256596, "learning_rate": 0.003, "loss": 4.0653, "step": 9103 }, { "epoch": 0.09104, "grad_norm": 0.7839104837670768, "learning_rate": 0.003, "loss": 4.0897, "step": 9104 }, { "epoch": 0.09105, "grad_norm": 0.74968201955386, "learning_rate": 0.003, "loss": 4.0693, "step": 9105 }, { "epoch": 0.09106, "grad_norm": 0.862192217242314, "learning_rate": 0.003, "loss": 4.0704, "step": 9106 }, { "epoch": 0.09107, "grad_norm": 0.8987410447628759, "learning_rate": 0.003, "loss": 4.0857, "step": 9107 }, { "epoch": 0.09108, "grad_norm": 0.834199691676135, "learning_rate": 0.003, "loss": 4.0778, "step": 9108 }, { "epoch": 0.09109, "grad_norm": 0.8002673934636545, "learning_rate": 0.003, "loss": 4.0754, "step": 9109 }, { "epoch": 0.0911, "grad_norm": 0.8991637907546197, "learning_rate": 0.003, "loss": 4.0872, "step": 9110 }, { "epoch": 0.09111, "grad_norm": 1.0150826030428626, "learning_rate": 0.003, "loss": 4.1061, "step": 9111 }, { "epoch": 0.09112, "grad_norm": 1.0652838006542034, "learning_rate": 0.003, "loss": 4.0773, "step": 9112 }, { "epoch": 0.09113, "grad_norm": 0.8745809726912822, "learning_rate": 0.003, "loss": 4.0628, "step": 9113 }, { "epoch": 0.09114, "grad_norm": 0.891905966917683, "learning_rate": 0.003, "loss": 4.0808, "step": 9114 }, { "epoch": 0.09115, "grad_norm": 1.0521275368839003, "learning_rate": 0.003, "loss": 4.0925, "step": 9115 }, { "epoch": 0.09116, "grad_norm": 0.8815584303045687, "learning_rate": 0.003, "loss": 4.1368, "step": 9116 }, { "epoch": 0.09117, "grad_norm": 1.0372762229109376, "learning_rate": 0.003, "loss": 4.1088, "step": 9117 }, { "epoch": 0.09118, "grad_norm": 0.9807823101415436, "learning_rate": 0.003, "loss": 4.1009, "step": 9118 }, { "epoch": 0.09119, "grad_norm": 1.0463681239596228, "learning_rate": 0.003, "loss": 4.0978, "step": 9119 }, { "epoch": 0.0912, "grad_norm": 1.1558735058794685, "learning_rate": 0.003, "loss": 4.117, "step": 9120 }, { "epoch": 0.09121, "grad_norm": 0.9471193935945554, "learning_rate": 0.003, "loss": 4.1303, "step": 9121 }, { "epoch": 0.09122, "grad_norm": 0.903406256690084, "learning_rate": 0.003, "loss": 4.1164, "step": 9122 }, { "epoch": 0.09123, "grad_norm": 0.9082666157548727, "learning_rate": 0.003, "loss": 4.0726, "step": 9123 }, { "epoch": 0.09124, "grad_norm": 1.0288774290050668, "learning_rate": 0.003, "loss": 4.0951, "step": 9124 }, { "epoch": 0.09125, "grad_norm": 1.1563589417293623, "learning_rate": 0.003, "loss": 4.1232, "step": 9125 }, { "epoch": 0.09126, "grad_norm": 0.9664619902252881, "learning_rate": 0.003, "loss": 4.0717, "step": 9126 }, { "epoch": 0.09127, "grad_norm": 0.9629651919979498, "learning_rate": 0.003, "loss": 4.1087, "step": 9127 }, { "epoch": 0.09128, "grad_norm": 1.0135807050519996, "learning_rate": 0.003, "loss": 4.103, "step": 9128 }, { "epoch": 0.09129, "grad_norm": 1.1527226810083393, "learning_rate": 0.003, "loss": 4.0973, "step": 9129 }, { "epoch": 0.0913, "grad_norm": 0.8454458533144666, "learning_rate": 0.003, "loss": 4.0879, "step": 9130 }, { "epoch": 0.09131, "grad_norm": 0.6217315912216276, "learning_rate": 0.003, "loss": 4.0836, "step": 9131 }, { "epoch": 0.09132, "grad_norm": 0.6894799817711807, "learning_rate": 0.003, "loss": 4.1128, "step": 9132 }, { "epoch": 0.09133, "grad_norm": 0.9164420443264122, "learning_rate": 0.003, "loss": 4.0936, "step": 9133 }, { "epoch": 0.09134, "grad_norm": 1.118137516546724, "learning_rate": 0.003, "loss": 4.0987, "step": 9134 }, { "epoch": 0.09135, "grad_norm": 0.9451104479671788, "learning_rate": 0.003, "loss": 4.1064, "step": 9135 }, { "epoch": 0.09136, "grad_norm": 0.8741863870514021, "learning_rate": 0.003, "loss": 4.0892, "step": 9136 }, { "epoch": 0.09137, "grad_norm": 0.6559657083546983, "learning_rate": 0.003, "loss": 4.1186, "step": 9137 }, { "epoch": 0.09138, "grad_norm": 0.6591621056680503, "learning_rate": 0.003, "loss": 4.085, "step": 9138 }, { "epoch": 0.09139, "grad_norm": 0.7172388233296422, "learning_rate": 0.003, "loss": 4.0864, "step": 9139 }, { "epoch": 0.0914, "grad_norm": 0.74698785265184, "learning_rate": 0.003, "loss": 4.0891, "step": 9140 }, { "epoch": 0.09141, "grad_norm": 0.7383928212536637, "learning_rate": 0.003, "loss": 4.1203, "step": 9141 }, { "epoch": 0.09142, "grad_norm": 0.8365703977523314, "learning_rate": 0.003, "loss": 4.1106, "step": 9142 }, { "epoch": 0.09143, "grad_norm": 0.8481652164716268, "learning_rate": 0.003, "loss": 4.0595, "step": 9143 }, { "epoch": 0.09144, "grad_norm": 0.7795528073061184, "learning_rate": 0.003, "loss": 4.0832, "step": 9144 }, { "epoch": 0.09145, "grad_norm": 0.8962209591026435, "learning_rate": 0.003, "loss": 4.1122, "step": 9145 }, { "epoch": 0.09146, "grad_norm": 0.977738810966034, "learning_rate": 0.003, "loss": 4.1051, "step": 9146 }, { "epoch": 0.09147, "grad_norm": 1.1158176717774442, "learning_rate": 0.003, "loss": 4.0971, "step": 9147 }, { "epoch": 0.09148, "grad_norm": 0.851745175787143, "learning_rate": 0.003, "loss": 4.1069, "step": 9148 }, { "epoch": 0.09149, "grad_norm": 0.7540599482452148, "learning_rate": 0.003, "loss": 4.0494, "step": 9149 }, { "epoch": 0.0915, "grad_norm": 0.8969362552019574, "learning_rate": 0.003, "loss": 4.0603, "step": 9150 }, { "epoch": 0.09151, "grad_norm": 0.9897498994292178, "learning_rate": 0.003, "loss": 4.1018, "step": 9151 }, { "epoch": 0.09152, "grad_norm": 1.0440364187474465, "learning_rate": 0.003, "loss": 4.1119, "step": 9152 }, { "epoch": 0.09153, "grad_norm": 1.1474475308871095, "learning_rate": 0.003, "loss": 4.1149, "step": 9153 }, { "epoch": 0.09154, "grad_norm": 0.9705935043490699, "learning_rate": 0.003, "loss": 4.1128, "step": 9154 }, { "epoch": 0.09155, "grad_norm": 0.9298282598002533, "learning_rate": 0.003, "loss": 4.1163, "step": 9155 }, { "epoch": 0.09156, "grad_norm": 0.9723217314936119, "learning_rate": 0.003, "loss": 4.1139, "step": 9156 }, { "epoch": 0.09157, "grad_norm": 1.0948870894323015, "learning_rate": 0.003, "loss": 4.0749, "step": 9157 }, { "epoch": 0.09158, "grad_norm": 1.082303366161037, "learning_rate": 0.003, "loss": 4.102, "step": 9158 }, { "epoch": 0.09159, "grad_norm": 1.1823492886345546, "learning_rate": 0.003, "loss": 4.1154, "step": 9159 }, { "epoch": 0.0916, "grad_norm": 1.1448872296285921, "learning_rate": 0.003, "loss": 4.0891, "step": 9160 }, { "epoch": 0.09161, "grad_norm": 0.7917308786382252, "learning_rate": 0.003, "loss": 4.0869, "step": 9161 }, { "epoch": 0.09162, "grad_norm": 0.7101340083428053, "learning_rate": 0.003, "loss": 4.1247, "step": 9162 }, { "epoch": 0.09163, "grad_norm": 0.5734667662994228, "learning_rate": 0.003, "loss": 4.0715, "step": 9163 }, { "epoch": 0.09164, "grad_norm": 0.6463742170108849, "learning_rate": 0.003, "loss": 4.0888, "step": 9164 }, { "epoch": 0.09165, "grad_norm": 0.8791425623296205, "learning_rate": 0.003, "loss": 4.1211, "step": 9165 }, { "epoch": 0.09166, "grad_norm": 1.2896040065899281, "learning_rate": 0.003, "loss": 4.1028, "step": 9166 }, { "epoch": 0.09167, "grad_norm": 0.777626739229617, "learning_rate": 0.003, "loss": 4.0827, "step": 9167 }, { "epoch": 0.09168, "grad_norm": 0.6059045588929811, "learning_rate": 0.003, "loss": 4.0705, "step": 9168 }, { "epoch": 0.09169, "grad_norm": 0.6211082886708152, "learning_rate": 0.003, "loss": 4.0999, "step": 9169 }, { "epoch": 0.0917, "grad_norm": 0.6201173250342321, "learning_rate": 0.003, "loss": 4.0755, "step": 9170 }, { "epoch": 0.09171, "grad_norm": 0.65420012036481, "learning_rate": 0.003, "loss": 4.1143, "step": 9171 }, { "epoch": 0.09172, "grad_norm": 0.6193382027516, "learning_rate": 0.003, "loss": 4.1106, "step": 9172 }, { "epoch": 0.09173, "grad_norm": 0.726634103434902, "learning_rate": 0.003, "loss": 4.0827, "step": 9173 }, { "epoch": 0.09174, "grad_norm": 0.75555391022193, "learning_rate": 0.003, "loss": 4.096, "step": 9174 }, { "epoch": 0.09175, "grad_norm": 0.791110324469862, "learning_rate": 0.003, "loss": 4.0913, "step": 9175 }, { "epoch": 0.09176, "grad_norm": 0.785799126594617, "learning_rate": 0.003, "loss": 4.0962, "step": 9176 }, { "epoch": 0.09177, "grad_norm": 0.7612319543811192, "learning_rate": 0.003, "loss": 4.0504, "step": 9177 }, { "epoch": 0.09178, "grad_norm": 0.7565419822439869, "learning_rate": 0.003, "loss": 4.1, "step": 9178 }, { "epoch": 0.09179, "grad_norm": 0.8420544599698648, "learning_rate": 0.003, "loss": 4.0873, "step": 9179 }, { "epoch": 0.0918, "grad_norm": 1.084438958299728, "learning_rate": 0.003, "loss": 4.091, "step": 9180 }, { "epoch": 0.09181, "grad_norm": 1.1059617305324483, "learning_rate": 0.003, "loss": 4.0826, "step": 9181 }, { "epoch": 0.09182, "grad_norm": 1.047379795485612, "learning_rate": 0.003, "loss": 4.0869, "step": 9182 }, { "epoch": 0.09183, "grad_norm": 0.9756788460870816, "learning_rate": 0.003, "loss": 4.0794, "step": 9183 }, { "epoch": 0.09184, "grad_norm": 0.8475124797770287, "learning_rate": 0.003, "loss": 4.0781, "step": 9184 }, { "epoch": 0.09185, "grad_norm": 0.7818162898325053, "learning_rate": 0.003, "loss": 4.1069, "step": 9185 }, { "epoch": 0.09186, "grad_norm": 0.7766932782419979, "learning_rate": 0.003, "loss": 4.05, "step": 9186 }, { "epoch": 0.09187, "grad_norm": 0.8825420516075367, "learning_rate": 0.003, "loss": 4.1197, "step": 9187 }, { "epoch": 0.09188, "grad_norm": 0.9631123401662338, "learning_rate": 0.003, "loss": 4.0989, "step": 9188 }, { "epoch": 0.09189, "grad_norm": 1.0225689776098181, "learning_rate": 0.003, "loss": 4.1105, "step": 9189 }, { "epoch": 0.0919, "grad_norm": 1.0782850500259313, "learning_rate": 0.003, "loss": 4.1264, "step": 9190 }, { "epoch": 0.09191, "grad_norm": 0.9063112295904093, "learning_rate": 0.003, "loss": 4.0957, "step": 9191 }, { "epoch": 0.09192, "grad_norm": 1.055089562560554, "learning_rate": 0.003, "loss": 4.0876, "step": 9192 }, { "epoch": 0.09193, "grad_norm": 1.014344237878195, "learning_rate": 0.003, "loss": 4.0764, "step": 9193 }, { "epoch": 0.09194, "grad_norm": 1.3217721747559106, "learning_rate": 0.003, "loss": 4.1042, "step": 9194 }, { "epoch": 0.09195, "grad_norm": 0.9570521535388152, "learning_rate": 0.003, "loss": 4.0985, "step": 9195 }, { "epoch": 0.09196, "grad_norm": 0.9200244611664627, "learning_rate": 0.003, "loss": 4.1116, "step": 9196 }, { "epoch": 0.09197, "grad_norm": 0.962705740113485, "learning_rate": 0.003, "loss": 4.106, "step": 9197 }, { "epoch": 0.09198, "grad_norm": 1.0994040596641663, "learning_rate": 0.003, "loss": 4.1027, "step": 9198 }, { "epoch": 0.09199, "grad_norm": 0.9897723274209836, "learning_rate": 0.003, "loss": 4.112, "step": 9199 }, { "epoch": 0.092, "grad_norm": 0.9542387690632056, "learning_rate": 0.003, "loss": 4.1077, "step": 9200 }, { "epoch": 0.09201, "grad_norm": 1.073948721463555, "learning_rate": 0.003, "loss": 4.0998, "step": 9201 }, { "epoch": 0.09202, "grad_norm": 0.9997228282811497, "learning_rate": 0.003, "loss": 4.1149, "step": 9202 }, { "epoch": 0.09203, "grad_norm": 0.8556418820489938, "learning_rate": 0.003, "loss": 4.1072, "step": 9203 }, { "epoch": 0.09204, "grad_norm": 0.7789051082427513, "learning_rate": 0.003, "loss": 4.1096, "step": 9204 }, { "epoch": 0.09205, "grad_norm": 0.7717492134597749, "learning_rate": 0.003, "loss": 4.0978, "step": 9205 }, { "epoch": 0.09206, "grad_norm": 0.8428849907020309, "learning_rate": 0.003, "loss": 4.107, "step": 9206 }, { "epoch": 0.09207, "grad_norm": 0.9040648516741944, "learning_rate": 0.003, "loss": 4.1254, "step": 9207 }, { "epoch": 0.09208, "grad_norm": 1.0913270867403186, "learning_rate": 0.003, "loss": 4.0903, "step": 9208 }, { "epoch": 0.09209, "grad_norm": 1.0048770200215928, "learning_rate": 0.003, "loss": 4.0869, "step": 9209 }, { "epoch": 0.0921, "grad_norm": 1.0392328935432598, "learning_rate": 0.003, "loss": 4.116, "step": 9210 }, { "epoch": 0.09211, "grad_norm": 0.9204701850435161, "learning_rate": 0.003, "loss": 4.0902, "step": 9211 }, { "epoch": 0.09212, "grad_norm": 0.9402955316963719, "learning_rate": 0.003, "loss": 4.1128, "step": 9212 }, { "epoch": 0.09213, "grad_norm": 1.048761874355027, "learning_rate": 0.003, "loss": 4.0984, "step": 9213 }, { "epoch": 0.09214, "grad_norm": 1.0562819020868857, "learning_rate": 0.003, "loss": 4.084, "step": 9214 }, { "epoch": 0.09215, "grad_norm": 1.085719329634591, "learning_rate": 0.003, "loss": 4.0949, "step": 9215 }, { "epoch": 0.09216, "grad_norm": 0.9924362432989838, "learning_rate": 0.003, "loss": 4.1003, "step": 9216 }, { "epoch": 0.09217, "grad_norm": 0.9416068771195782, "learning_rate": 0.003, "loss": 4.0615, "step": 9217 }, { "epoch": 0.09218, "grad_norm": 0.9216157764389595, "learning_rate": 0.003, "loss": 4.1165, "step": 9218 }, { "epoch": 0.09219, "grad_norm": 1.0578289251402284, "learning_rate": 0.003, "loss": 4.1044, "step": 9219 }, { "epoch": 0.0922, "grad_norm": 0.8494550851933941, "learning_rate": 0.003, "loss": 4.0996, "step": 9220 }, { "epoch": 0.09221, "grad_norm": 0.7004867483817171, "learning_rate": 0.003, "loss": 4.1055, "step": 9221 }, { "epoch": 0.09222, "grad_norm": 0.7348105532675423, "learning_rate": 0.003, "loss": 4.0927, "step": 9222 }, { "epoch": 0.09223, "grad_norm": 0.9417547593730853, "learning_rate": 0.003, "loss": 4.094, "step": 9223 }, { "epoch": 0.09224, "grad_norm": 1.049648042435812, "learning_rate": 0.003, "loss": 4.1138, "step": 9224 }, { "epoch": 0.09225, "grad_norm": 1.0276279527925067, "learning_rate": 0.003, "loss": 4.0919, "step": 9225 }, { "epoch": 0.09226, "grad_norm": 0.9585527602924722, "learning_rate": 0.003, "loss": 4.076, "step": 9226 }, { "epoch": 0.09227, "grad_norm": 0.9337166610853146, "learning_rate": 0.003, "loss": 4.0935, "step": 9227 }, { "epoch": 0.09228, "grad_norm": 0.9027418181317508, "learning_rate": 0.003, "loss": 4.0947, "step": 9228 }, { "epoch": 0.09229, "grad_norm": 0.9198727309478711, "learning_rate": 0.003, "loss": 4.1011, "step": 9229 }, { "epoch": 0.0923, "grad_norm": 0.9045649479252982, "learning_rate": 0.003, "loss": 4.0985, "step": 9230 }, { "epoch": 0.09231, "grad_norm": 0.9974652662909131, "learning_rate": 0.003, "loss": 4.0971, "step": 9231 }, { "epoch": 0.09232, "grad_norm": 1.0912379899673035, "learning_rate": 0.003, "loss": 4.1094, "step": 9232 }, { "epoch": 0.09233, "grad_norm": 1.0915766025748115, "learning_rate": 0.003, "loss": 4.1243, "step": 9233 }, { "epoch": 0.09234, "grad_norm": 0.9888878322311199, "learning_rate": 0.003, "loss": 4.1278, "step": 9234 }, { "epoch": 0.09235, "grad_norm": 0.8256741524071066, "learning_rate": 0.003, "loss": 4.0888, "step": 9235 }, { "epoch": 0.09236, "grad_norm": 0.7623729808775097, "learning_rate": 0.003, "loss": 4.1281, "step": 9236 }, { "epoch": 0.09237, "grad_norm": 0.784646386208245, "learning_rate": 0.003, "loss": 4.087, "step": 9237 }, { "epoch": 0.09238, "grad_norm": 0.781447671218314, "learning_rate": 0.003, "loss": 4.074, "step": 9238 }, { "epoch": 0.09239, "grad_norm": 0.7580100064874866, "learning_rate": 0.003, "loss": 4.0791, "step": 9239 }, { "epoch": 0.0924, "grad_norm": 0.7887138742063122, "learning_rate": 0.003, "loss": 4.0837, "step": 9240 }, { "epoch": 0.09241, "grad_norm": 0.9232343150935588, "learning_rate": 0.003, "loss": 4.0991, "step": 9241 }, { "epoch": 0.09242, "grad_norm": 1.1881392195909175, "learning_rate": 0.003, "loss": 4.1256, "step": 9242 }, { "epoch": 0.09243, "grad_norm": 0.9840929580945444, "learning_rate": 0.003, "loss": 4.12, "step": 9243 }, { "epoch": 0.09244, "grad_norm": 1.304430856896462, "learning_rate": 0.003, "loss": 4.1126, "step": 9244 }, { "epoch": 0.09245, "grad_norm": 0.9351259127699327, "learning_rate": 0.003, "loss": 4.0863, "step": 9245 }, { "epoch": 0.09246, "grad_norm": 0.9793836268379075, "learning_rate": 0.003, "loss": 4.1026, "step": 9246 }, { "epoch": 0.09247, "grad_norm": 0.9197456699785576, "learning_rate": 0.003, "loss": 4.0905, "step": 9247 }, { "epoch": 0.09248, "grad_norm": 0.8196313202466566, "learning_rate": 0.003, "loss": 4.0982, "step": 9248 }, { "epoch": 0.09249, "grad_norm": 0.7928866730787516, "learning_rate": 0.003, "loss": 4.1449, "step": 9249 }, { "epoch": 0.0925, "grad_norm": 0.7531883062490411, "learning_rate": 0.003, "loss": 4.1114, "step": 9250 }, { "epoch": 0.09251, "grad_norm": 0.7666898332256793, "learning_rate": 0.003, "loss": 4.1092, "step": 9251 }, { "epoch": 0.09252, "grad_norm": 0.7988522492516801, "learning_rate": 0.003, "loss": 4.1198, "step": 9252 }, { "epoch": 0.09253, "grad_norm": 0.7491102789906501, "learning_rate": 0.003, "loss": 4.0904, "step": 9253 }, { "epoch": 0.09254, "grad_norm": 0.6784653406918104, "learning_rate": 0.003, "loss": 4.1062, "step": 9254 }, { "epoch": 0.09255, "grad_norm": 0.5654398621536241, "learning_rate": 0.003, "loss": 4.0643, "step": 9255 }, { "epoch": 0.09256, "grad_norm": 0.6643608382759081, "learning_rate": 0.003, "loss": 4.0957, "step": 9256 }, { "epoch": 0.09257, "grad_norm": 0.7193700387398151, "learning_rate": 0.003, "loss": 4.0907, "step": 9257 }, { "epoch": 0.09258, "grad_norm": 0.8759808055403874, "learning_rate": 0.003, "loss": 4.0794, "step": 9258 }, { "epoch": 0.09259, "grad_norm": 1.2012930245710616, "learning_rate": 0.003, "loss": 4.1075, "step": 9259 }, { "epoch": 0.0926, "grad_norm": 0.7418114937003976, "learning_rate": 0.003, "loss": 4.0931, "step": 9260 }, { "epoch": 0.09261, "grad_norm": 0.5474250675303859, "learning_rate": 0.003, "loss": 4.0658, "step": 9261 }, { "epoch": 0.09262, "grad_norm": 0.6829621284506816, "learning_rate": 0.003, "loss": 4.0698, "step": 9262 }, { "epoch": 0.09263, "grad_norm": 0.7768878158809396, "learning_rate": 0.003, "loss": 4.0916, "step": 9263 }, { "epoch": 0.09264, "grad_norm": 0.9996107855545961, "learning_rate": 0.003, "loss": 4.1261, "step": 9264 }, { "epoch": 0.09265, "grad_norm": 1.163827375444901, "learning_rate": 0.003, "loss": 4.0995, "step": 9265 }, { "epoch": 0.09266, "grad_norm": 0.7349870746113485, "learning_rate": 0.003, "loss": 4.0908, "step": 9266 }, { "epoch": 0.09267, "grad_norm": 0.6853508114179676, "learning_rate": 0.003, "loss": 4.07, "step": 9267 }, { "epoch": 0.09268, "grad_norm": 0.6333065813900194, "learning_rate": 0.003, "loss": 4.0733, "step": 9268 }, { "epoch": 0.09269, "grad_norm": 0.7044393984561579, "learning_rate": 0.003, "loss": 4.0822, "step": 9269 }, { "epoch": 0.0927, "grad_norm": 0.7829797104270343, "learning_rate": 0.003, "loss": 4.1113, "step": 9270 }, { "epoch": 0.09271, "grad_norm": 0.8257575052405663, "learning_rate": 0.003, "loss": 4.0963, "step": 9271 }, { "epoch": 0.09272, "grad_norm": 0.9439694552484319, "learning_rate": 0.003, "loss": 4.0839, "step": 9272 }, { "epoch": 0.09273, "grad_norm": 1.0602176422418417, "learning_rate": 0.003, "loss": 4.1077, "step": 9273 }, { "epoch": 0.09274, "grad_norm": 0.9858527050122837, "learning_rate": 0.003, "loss": 4.0971, "step": 9274 }, { "epoch": 0.09275, "grad_norm": 0.9316290417225153, "learning_rate": 0.003, "loss": 4.1187, "step": 9275 }, { "epoch": 0.09276, "grad_norm": 0.9228744715959912, "learning_rate": 0.003, "loss": 4.1045, "step": 9276 }, { "epoch": 0.09277, "grad_norm": 0.97661764276748, "learning_rate": 0.003, "loss": 4.1017, "step": 9277 }, { "epoch": 0.09278, "grad_norm": 0.9667223874049936, "learning_rate": 0.003, "loss": 4.0726, "step": 9278 }, { "epoch": 0.09279, "grad_norm": 1.0509045802580743, "learning_rate": 0.003, "loss": 4.1021, "step": 9279 }, { "epoch": 0.0928, "grad_norm": 1.199654598508013, "learning_rate": 0.003, "loss": 4.1025, "step": 9280 }, { "epoch": 0.09281, "grad_norm": 0.8190040066327821, "learning_rate": 0.003, "loss": 4.0984, "step": 9281 }, { "epoch": 0.09282, "grad_norm": 0.8005381258667182, "learning_rate": 0.003, "loss": 4.1265, "step": 9282 }, { "epoch": 0.09283, "grad_norm": 0.9731528357234364, "learning_rate": 0.003, "loss": 4.121, "step": 9283 }, { "epoch": 0.09284, "grad_norm": 1.0699122649133415, "learning_rate": 0.003, "loss": 4.1056, "step": 9284 }, { "epoch": 0.09285, "grad_norm": 0.9736646539104704, "learning_rate": 0.003, "loss": 4.0955, "step": 9285 }, { "epoch": 0.09286, "grad_norm": 0.7534181301780545, "learning_rate": 0.003, "loss": 4.1069, "step": 9286 }, { "epoch": 0.09287, "grad_norm": 0.7039247323481843, "learning_rate": 0.003, "loss": 4.1049, "step": 9287 }, { "epoch": 0.09288, "grad_norm": 0.6153847411143628, "learning_rate": 0.003, "loss": 4.1008, "step": 9288 }, { "epoch": 0.09289, "grad_norm": 0.7439491494868004, "learning_rate": 0.003, "loss": 4.0733, "step": 9289 }, { "epoch": 0.0929, "grad_norm": 1.0211178389308402, "learning_rate": 0.003, "loss": 4.1044, "step": 9290 }, { "epoch": 0.09291, "grad_norm": 1.2912396043684187, "learning_rate": 0.003, "loss": 4.1027, "step": 9291 }, { "epoch": 0.09292, "grad_norm": 1.0329590152181434, "learning_rate": 0.003, "loss": 4.115, "step": 9292 }, { "epoch": 0.09293, "grad_norm": 0.925671296507913, "learning_rate": 0.003, "loss": 4.0791, "step": 9293 }, { "epoch": 0.09294, "grad_norm": 0.8753174570663704, "learning_rate": 0.003, "loss": 4.0968, "step": 9294 }, { "epoch": 0.09295, "grad_norm": 0.7750429431472656, "learning_rate": 0.003, "loss": 4.0975, "step": 9295 }, { "epoch": 0.09296, "grad_norm": 0.8498716958126399, "learning_rate": 0.003, "loss": 4.084, "step": 9296 }, { "epoch": 0.09297, "grad_norm": 0.9547075919931944, "learning_rate": 0.003, "loss": 4.0993, "step": 9297 }, { "epoch": 0.09298, "grad_norm": 1.0547931478455412, "learning_rate": 0.003, "loss": 4.1016, "step": 9298 }, { "epoch": 0.09299, "grad_norm": 0.9874863882567291, "learning_rate": 0.003, "loss": 4.0972, "step": 9299 }, { "epoch": 0.093, "grad_norm": 1.0320217787422357, "learning_rate": 0.003, "loss": 4.1013, "step": 9300 }, { "epoch": 0.09301, "grad_norm": 0.868256523126305, "learning_rate": 0.003, "loss": 4.0922, "step": 9301 }, { "epoch": 0.09302, "grad_norm": 0.8911189359509369, "learning_rate": 0.003, "loss": 4.1232, "step": 9302 }, { "epoch": 0.09303, "grad_norm": 1.1184167860096568, "learning_rate": 0.003, "loss": 4.1016, "step": 9303 }, { "epoch": 0.09304, "grad_norm": 1.0616222963872175, "learning_rate": 0.003, "loss": 4.0933, "step": 9304 }, { "epoch": 0.09305, "grad_norm": 1.0167967680320442, "learning_rate": 0.003, "loss": 4.0971, "step": 9305 }, { "epoch": 0.09306, "grad_norm": 0.9100621741165005, "learning_rate": 0.003, "loss": 4.1025, "step": 9306 }, { "epoch": 0.09307, "grad_norm": 0.7277198010566454, "learning_rate": 0.003, "loss": 4.0987, "step": 9307 }, { "epoch": 0.09308, "grad_norm": 0.7642184186042998, "learning_rate": 0.003, "loss": 4.0818, "step": 9308 }, { "epoch": 0.09309, "grad_norm": 0.7898375897779486, "learning_rate": 0.003, "loss": 4.1093, "step": 9309 }, { "epoch": 0.0931, "grad_norm": 0.8313627098153431, "learning_rate": 0.003, "loss": 4.1156, "step": 9310 }, { "epoch": 0.09311, "grad_norm": 0.9180361447599314, "learning_rate": 0.003, "loss": 4.1021, "step": 9311 }, { "epoch": 0.09312, "grad_norm": 1.0722183150254507, "learning_rate": 0.003, "loss": 4.1053, "step": 9312 }, { "epoch": 0.09313, "grad_norm": 1.0053080965881522, "learning_rate": 0.003, "loss": 4.1345, "step": 9313 }, { "epoch": 0.09314, "grad_norm": 1.0429150067918742, "learning_rate": 0.003, "loss": 4.1065, "step": 9314 }, { "epoch": 0.09315, "grad_norm": 1.0275188761589462, "learning_rate": 0.003, "loss": 4.1035, "step": 9315 }, { "epoch": 0.09316, "grad_norm": 0.9852252098429989, "learning_rate": 0.003, "loss": 4.1047, "step": 9316 }, { "epoch": 0.09317, "grad_norm": 1.0943356019238426, "learning_rate": 0.003, "loss": 4.1259, "step": 9317 }, { "epoch": 0.09318, "grad_norm": 0.8892700832028245, "learning_rate": 0.003, "loss": 4.0911, "step": 9318 }, { "epoch": 0.09319, "grad_norm": 0.8440332983350739, "learning_rate": 0.003, "loss": 4.1321, "step": 9319 }, { "epoch": 0.0932, "grad_norm": 0.9136370265528279, "learning_rate": 0.003, "loss": 4.1065, "step": 9320 }, { "epoch": 0.09321, "grad_norm": 1.0652363081449419, "learning_rate": 0.003, "loss": 4.1019, "step": 9321 }, { "epoch": 0.09322, "grad_norm": 0.9657802025171371, "learning_rate": 0.003, "loss": 4.0739, "step": 9322 }, { "epoch": 0.09323, "grad_norm": 0.9541349646760839, "learning_rate": 0.003, "loss": 4.1003, "step": 9323 }, { "epoch": 0.09324, "grad_norm": 0.940690991603989, "learning_rate": 0.003, "loss": 4.1185, "step": 9324 }, { "epoch": 0.09325, "grad_norm": 0.9773641494477271, "learning_rate": 0.003, "loss": 4.0676, "step": 9325 }, { "epoch": 0.09326, "grad_norm": 1.1123850382527165, "learning_rate": 0.003, "loss": 4.0887, "step": 9326 }, { "epoch": 0.09327, "grad_norm": 0.8756313815561116, "learning_rate": 0.003, "loss": 4.0956, "step": 9327 }, { "epoch": 0.09328, "grad_norm": 0.7389172317899884, "learning_rate": 0.003, "loss": 4.0941, "step": 9328 }, { "epoch": 0.09329, "grad_norm": 0.6983046686762022, "learning_rate": 0.003, "loss": 4.0969, "step": 9329 }, { "epoch": 0.0933, "grad_norm": 0.6752597877286427, "learning_rate": 0.003, "loss": 4.1111, "step": 9330 }, { "epoch": 0.09331, "grad_norm": 0.8385408030611338, "learning_rate": 0.003, "loss": 4.081, "step": 9331 }, { "epoch": 0.09332, "grad_norm": 0.92425769636424, "learning_rate": 0.003, "loss": 4.1071, "step": 9332 }, { "epoch": 0.09333, "grad_norm": 1.1149373386342603, "learning_rate": 0.003, "loss": 4.0875, "step": 9333 }, { "epoch": 0.09334, "grad_norm": 0.9061499154659839, "learning_rate": 0.003, "loss": 4.1063, "step": 9334 }, { "epoch": 0.09335, "grad_norm": 0.7814181455603038, "learning_rate": 0.003, "loss": 4.0905, "step": 9335 }, { "epoch": 0.09336, "grad_norm": 0.6185969547295879, "learning_rate": 0.003, "loss": 4.0753, "step": 9336 }, { "epoch": 0.09337, "grad_norm": 0.570098829353087, "learning_rate": 0.003, "loss": 4.091, "step": 9337 }, { "epoch": 0.09338, "grad_norm": 0.6307398718763437, "learning_rate": 0.003, "loss": 4.0956, "step": 9338 }, { "epoch": 0.09339, "grad_norm": 0.5876765823170635, "learning_rate": 0.003, "loss": 4.0708, "step": 9339 }, { "epoch": 0.0934, "grad_norm": 0.6057455120022778, "learning_rate": 0.003, "loss": 4.0634, "step": 9340 }, { "epoch": 0.09341, "grad_norm": 0.7819289757235965, "learning_rate": 0.003, "loss": 4.0898, "step": 9341 }, { "epoch": 0.09342, "grad_norm": 1.0525239903626875, "learning_rate": 0.003, "loss": 4.0861, "step": 9342 }, { "epoch": 0.09343, "grad_norm": 0.922797767764872, "learning_rate": 0.003, "loss": 4.0887, "step": 9343 }, { "epoch": 0.09344, "grad_norm": 0.7651325270309342, "learning_rate": 0.003, "loss": 4.0864, "step": 9344 }, { "epoch": 0.09345, "grad_norm": 0.8307201489790069, "learning_rate": 0.003, "loss": 4.1062, "step": 9345 }, { "epoch": 0.09346, "grad_norm": 0.9268559341614948, "learning_rate": 0.003, "loss": 4.0995, "step": 9346 }, { "epoch": 0.09347, "grad_norm": 0.8843815142269438, "learning_rate": 0.003, "loss": 4.0832, "step": 9347 }, { "epoch": 0.09348, "grad_norm": 0.7511422034725228, "learning_rate": 0.003, "loss": 4.1036, "step": 9348 }, { "epoch": 0.09349, "grad_norm": 0.7681336246456821, "learning_rate": 0.003, "loss": 4.0989, "step": 9349 }, { "epoch": 0.0935, "grad_norm": 0.8391732024120159, "learning_rate": 0.003, "loss": 4.0878, "step": 9350 }, { "epoch": 0.09351, "grad_norm": 0.8572535723613135, "learning_rate": 0.003, "loss": 4.0611, "step": 9351 }, { "epoch": 0.09352, "grad_norm": 1.1345129108011323, "learning_rate": 0.003, "loss": 4.0954, "step": 9352 }, { "epoch": 0.09353, "grad_norm": 1.188322335200505, "learning_rate": 0.003, "loss": 4.0725, "step": 9353 }, { "epoch": 0.09354, "grad_norm": 0.7871694631960734, "learning_rate": 0.003, "loss": 4.0855, "step": 9354 }, { "epoch": 0.09355, "grad_norm": 0.7942202356512671, "learning_rate": 0.003, "loss": 4.109, "step": 9355 }, { "epoch": 0.09356, "grad_norm": 0.8720300671130099, "learning_rate": 0.003, "loss": 4.095, "step": 9356 }, { "epoch": 0.09357, "grad_norm": 0.9400770356953755, "learning_rate": 0.003, "loss": 4.0942, "step": 9357 }, { "epoch": 0.09358, "grad_norm": 0.872908592359249, "learning_rate": 0.003, "loss": 4.0944, "step": 9358 }, { "epoch": 0.09359, "grad_norm": 0.9729065406434361, "learning_rate": 0.003, "loss": 4.1064, "step": 9359 }, { "epoch": 0.0936, "grad_norm": 1.4002450511532056, "learning_rate": 0.003, "loss": 4.1089, "step": 9360 }, { "epoch": 0.09361, "grad_norm": 0.8007667671323881, "learning_rate": 0.003, "loss": 4.1014, "step": 9361 }, { "epoch": 0.09362, "grad_norm": 0.8253755648015588, "learning_rate": 0.003, "loss": 4.1318, "step": 9362 }, { "epoch": 0.09363, "grad_norm": 0.8603986662115061, "learning_rate": 0.003, "loss": 4.12, "step": 9363 }, { "epoch": 0.09364, "grad_norm": 0.8888437458688937, "learning_rate": 0.003, "loss": 4.089, "step": 9364 }, { "epoch": 0.09365, "grad_norm": 1.062809120441484, "learning_rate": 0.003, "loss": 4.1185, "step": 9365 }, { "epoch": 0.09366, "grad_norm": 0.9302023322409313, "learning_rate": 0.003, "loss": 4.069, "step": 9366 }, { "epoch": 0.09367, "grad_norm": 1.1396009232198598, "learning_rate": 0.003, "loss": 4.1054, "step": 9367 }, { "epoch": 0.09368, "grad_norm": 0.9961165393209213, "learning_rate": 0.003, "loss": 4.1149, "step": 9368 }, { "epoch": 0.09369, "grad_norm": 0.9522147427617241, "learning_rate": 0.003, "loss": 4.0764, "step": 9369 }, { "epoch": 0.0937, "grad_norm": 1.0784020973346404, "learning_rate": 0.003, "loss": 4.1296, "step": 9370 }, { "epoch": 0.09371, "grad_norm": 1.1071187911940317, "learning_rate": 0.003, "loss": 4.1135, "step": 9371 }, { "epoch": 0.09372, "grad_norm": 0.94833464425284, "learning_rate": 0.003, "loss": 4.0895, "step": 9372 }, { "epoch": 0.09373, "grad_norm": 0.9480880293079869, "learning_rate": 0.003, "loss": 4.1405, "step": 9373 }, { "epoch": 0.09374, "grad_norm": 0.994929884790511, "learning_rate": 0.003, "loss": 4.1413, "step": 9374 }, { "epoch": 0.09375, "grad_norm": 0.988811266369445, "learning_rate": 0.003, "loss": 4.0794, "step": 9375 }, { "epoch": 0.09376, "grad_norm": 0.9324886958686643, "learning_rate": 0.003, "loss": 4.124, "step": 9376 }, { "epoch": 0.09377, "grad_norm": 0.9548553533271001, "learning_rate": 0.003, "loss": 4.1048, "step": 9377 }, { "epoch": 0.09378, "grad_norm": 0.9562248687261611, "learning_rate": 0.003, "loss": 4.1138, "step": 9378 }, { "epoch": 0.09379, "grad_norm": 1.0222988140200817, "learning_rate": 0.003, "loss": 4.0804, "step": 9379 }, { "epoch": 0.0938, "grad_norm": 0.9124887754159823, "learning_rate": 0.003, "loss": 4.0815, "step": 9380 }, { "epoch": 0.09381, "grad_norm": 1.037020146160175, "learning_rate": 0.003, "loss": 4.0908, "step": 9381 }, { "epoch": 0.09382, "grad_norm": 0.9546631751003297, "learning_rate": 0.003, "loss": 4.1103, "step": 9382 }, { "epoch": 0.09383, "grad_norm": 1.040184676131133, "learning_rate": 0.003, "loss": 4.0971, "step": 9383 }, { "epoch": 0.09384, "grad_norm": 1.029833944161637, "learning_rate": 0.003, "loss": 4.1261, "step": 9384 }, { "epoch": 0.09385, "grad_norm": 0.899541831452752, "learning_rate": 0.003, "loss": 4.103, "step": 9385 }, { "epoch": 0.09386, "grad_norm": 0.8629312944127356, "learning_rate": 0.003, "loss": 4.0716, "step": 9386 }, { "epoch": 0.09387, "grad_norm": 1.0409426461274158, "learning_rate": 0.003, "loss": 4.1109, "step": 9387 }, { "epoch": 0.09388, "grad_norm": 1.006589469006825, "learning_rate": 0.003, "loss": 4.1055, "step": 9388 }, { "epoch": 0.09389, "grad_norm": 1.1077114456174915, "learning_rate": 0.003, "loss": 4.0839, "step": 9389 }, { "epoch": 0.0939, "grad_norm": 1.011899263145205, "learning_rate": 0.003, "loss": 4.111, "step": 9390 }, { "epoch": 0.09391, "grad_norm": 0.8545868658356651, "learning_rate": 0.003, "loss": 4.0882, "step": 9391 }, { "epoch": 0.09392, "grad_norm": 0.7530450948177452, "learning_rate": 0.003, "loss": 4.1191, "step": 9392 }, { "epoch": 0.09393, "grad_norm": 0.8389311586343143, "learning_rate": 0.003, "loss": 4.108, "step": 9393 }, { "epoch": 0.09394, "grad_norm": 0.8905715644273344, "learning_rate": 0.003, "loss": 4.0956, "step": 9394 }, { "epoch": 0.09395, "grad_norm": 0.9931804256938302, "learning_rate": 0.003, "loss": 4.1021, "step": 9395 }, { "epoch": 0.09396, "grad_norm": 1.1886533323261048, "learning_rate": 0.003, "loss": 4.1355, "step": 9396 }, { "epoch": 0.09397, "grad_norm": 0.8490709939667962, "learning_rate": 0.003, "loss": 4.0824, "step": 9397 }, { "epoch": 0.09398, "grad_norm": 0.9054704841108464, "learning_rate": 0.003, "loss": 4.1157, "step": 9398 }, { "epoch": 0.09399, "grad_norm": 0.8063846844294164, "learning_rate": 0.003, "loss": 4.1027, "step": 9399 }, { "epoch": 0.094, "grad_norm": 0.69451549125735, "learning_rate": 0.003, "loss": 4.0933, "step": 9400 }, { "epoch": 0.09401, "grad_norm": 0.6661444422351755, "learning_rate": 0.003, "loss": 4.1062, "step": 9401 }, { "epoch": 0.09402, "grad_norm": 0.7717530463401683, "learning_rate": 0.003, "loss": 4.0935, "step": 9402 }, { "epoch": 0.09403, "grad_norm": 0.9162649629518274, "learning_rate": 0.003, "loss": 4.117, "step": 9403 }, { "epoch": 0.09404, "grad_norm": 1.0198930452293649, "learning_rate": 0.003, "loss": 4.0793, "step": 9404 }, { "epoch": 0.09405, "grad_norm": 0.9665735513307437, "learning_rate": 0.003, "loss": 4.1156, "step": 9405 }, { "epoch": 0.09406, "grad_norm": 0.9242295093220471, "learning_rate": 0.003, "loss": 4.1116, "step": 9406 }, { "epoch": 0.09407, "grad_norm": 0.8899999098808963, "learning_rate": 0.003, "loss": 4.0989, "step": 9407 }, { "epoch": 0.09408, "grad_norm": 0.7998331174379482, "learning_rate": 0.003, "loss": 4.0791, "step": 9408 }, { "epoch": 0.09409, "grad_norm": 0.7813089937027473, "learning_rate": 0.003, "loss": 4.1042, "step": 9409 }, { "epoch": 0.0941, "grad_norm": 0.8667120250947528, "learning_rate": 0.003, "loss": 4.1145, "step": 9410 }, { "epoch": 0.09411, "grad_norm": 0.9269402848773428, "learning_rate": 0.003, "loss": 4.1221, "step": 9411 }, { "epoch": 0.09412, "grad_norm": 0.9835401553473229, "learning_rate": 0.003, "loss": 4.1174, "step": 9412 }, { "epoch": 0.09413, "grad_norm": 0.9172083233778977, "learning_rate": 0.003, "loss": 4.0894, "step": 9413 }, { "epoch": 0.09414, "grad_norm": 1.006134207979243, "learning_rate": 0.003, "loss": 4.1099, "step": 9414 }, { "epoch": 0.09415, "grad_norm": 1.132729901318295, "learning_rate": 0.003, "loss": 4.08, "step": 9415 }, { "epoch": 0.09416, "grad_norm": 0.8404655613518899, "learning_rate": 0.003, "loss": 4.0507, "step": 9416 }, { "epoch": 0.09417, "grad_norm": 0.7194877963786109, "learning_rate": 0.003, "loss": 4.0823, "step": 9417 }, { "epoch": 0.09418, "grad_norm": 0.7730057932177794, "learning_rate": 0.003, "loss": 4.0817, "step": 9418 }, { "epoch": 0.09419, "grad_norm": 1.1292343134560585, "learning_rate": 0.003, "loss": 4.1207, "step": 9419 }, { "epoch": 0.0942, "grad_norm": 1.1774763095295602, "learning_rate": 0.003, "loss": 4.0976, "step": 9420 }, { "epoch": 0.09421, "grad_norm": 0.7857554926583439, "learning_rate": 0.003, "loss": 4.0891, "step": 9421 }, { "epoch": 0.09422, "grad_norm": 0.7323939073029049, "learning_rate": 0.003, "loss": 4.0925, "step": 9422 }, { "epoch": 0.09423, "grad_norm": 0.7380592179333626, "learning_rate": 0.003, "loss": 4.0374, "step": 9423 }, { "epoch": 0.09424, "grad_norm": 0.811775438330701, "learning_rate": 0.003, "loss": 4.0955, "step": 9424 }, { "epoch": 0.09425, "grad_norm": 0.7513687697939088, "learning_rate": 0.003, "loss": 4.1115, "step": 9425 }, { "epoch": 0.09426, "grad_norm": 0.7219130870944211, "learning_rate": 0.003, "loss": 4.1169, "step": 9426 }, { "epoch": 0.09427, "grad_norm": 0.7513581740192289, "learning_rate": 0.003, "loss": 4.1019, "step": 9427 }, { "epoch": 0.09428, "grad_norm": 0.8363301820337448, "learning_rate": 0.003, "loss": 4.1118, "step": 9428 }, { "epoch": 0.09429, "grad_norm": 0.8654532174390273, "learning_rate": 0.003, "loss": 4.0784, "step": 9429 }, { "epoch": 0.0943, "grad_norm": 1.071277664835941, "learning_rate": 0.003, "loss": 4.0947, "step": 9430 }, { "epoch": 0.09431, "grad_norm": 1.2678205157799658, "learning_rate": 0.003, "loss": 4.1126, "step": 9431 }, { "epoch": 0.09432, "grad_norm": 0.754680518969369, "learning_rate": 0.003, "loss": 4.0858, "step": 9432 }, { "epoch": 0.09433, "grad_norm": 0.8341523731988698, "learning_rate": 0.003, "loss": 4.09, "step": 9433 }, { "epoch": 0.09434, "grad_norm": 0.9342603548990726, "learning_rate": 0.003, "loss": 4.0668, "step": 9434 }, { "epoch": 0.09435, "grad_norm": 0.9289091636637407, "learning_rate": 0.003, "loss": 4.0882, "step": 9435 }, { "epoch": 0.09436, "grad_norm": 1.107766105780556, "learning_rate": 0.003, "loss": 4.0718, "step": 9436 }, { "epoch": 0.09437, "grad_norm": 0.9085956133024019, "learning_rate": 0.003, "loss": 4.0912, "step": 9437 }, { "epoch": 0.09438, "grad_norm": 0.7903225243057821, "learning_rate": 0.003, "loss": 4.1089, "step": 9438 }, { "epoch": 0.09439, "grad_norm": 0.8100969592812339, "learning_rate": 0.003, "loss": 4.1025, "step": 9439 }, { "epoch": 0.0944, "grad_norm": 0.9111468297674603, "learning_rate": 0.003, "loss": 4.0863, "step": 9440 }, { "epoch": 0.09441, "grad_norm": 0.8331316815484991, "learning_rate": 0.003, "loss": 4.0926, "step": 9441 }, { "epoch": 0.09442, "grad_norm": 0.870678637091265, "learning_rate": 0.003, "loss": 4.0857, "step": 9442 }, { "epoch": 0.09443, "grad_norm": 0.9311560253694179, "learning_rate": 0.003, "loss": 4.1204, "step": 9443 }, { "epoch": 0.09444, "grad_norm": 1.0214536095553628, "learning_rate": 0.003, "loss": 4.111, "step": 9444 }, { "epoch": 0.09445, "grad_norm": 1.1766483143623587, "learning_rate": 0.003, "loss": 4.1068, "step": 9445 }, { "epoch": 0.09446, "grad_norm": 0.9519894255979084, "learning_rate": 0.003, "loss": 4.0924, "step": 9446 }, { "epoch": 0.09447, "grad_norm": 0.9198353317350884, "learning_rate": 0.003, "loss": 4.1122, "step": 9447 }, { "epoch": 0.09448, "grad_norm": 1.05453779469519, "learning_rate": 0.003, "loss": 4.1114, "step": 9448 }, { "epoch": 0.09449, "grad_norm": 1.0266876101640614, "learning_rate": 0.003, "loss": 4.1013, "step": 9449 }, { "epoch": 0.0945, "grad_norm": 1.029243760537115, "learning_rate": 0.003, "loss": 4.0999, "step": 9450 }, { "epoch": 0.09451, "grad_norm": 1.0088314651093355, "learning_rate": 0.003, "loss": 4.1092, "step": 9451 }, { "epoch": 0.09452, "grad_norm": 0.8598300778559472, "learning_rate": 0.003, "loss": 4.1151, "step": 9452 }, { "epoch": 0.09453, "grad_norm": 0.8624461571472201, "learning_rate": 0.003, "loss": 4.0594, "step": 9453 }, { "epoch": 0.09454, "grad_norm": 1.0851623978196565, "learning_rate": 0.003, "loss": 4.086, "step": 9454 }, { "epoch": 0.09455, "grad_norm": 1.1706846745804926, "learning_rate": 0.003, "loss": 4.0997, "step": 9455 }, { "epoch": 0.09456, "grad_norm": 0.720398550854311, "learning_rate": 0.003, "loss": 4.0894, "step": 9456 }, { "epoch": 0.09457, "grad_norm": 0.6428844036374872, "learning_rate": 0.003, "loss": 4.1011, "step": 9457 }, { "epoch": 0.09458, "grad_norm": 0.6854122980561818, "learning_rate": 0.003, "loss": 4.0808, "step": 9458 }, { "epoch": 0.09459, "grad_norm": 0.7557470276669376, "learning_rate": 0.003, "loss": 4.1019, "step": 9459 }, { "epoch": 0.0946, "grad_norm": 0.9400745909919421, "learning_rate": 0.003, "loss": 4.1177, "step": 9460 }, { "epoch": 0.09461, "grad_norm": 0.8272490781668856, "learning_rate": 0.003, "loss": 4.1247, "step": 9461 }, { "epoch": 0.09462, "grad_norm": 0.7264239631357388, "learning_rate": 0.003, "loss": 4.0716, "step": 9462 }, { "epoch": 0.09463, "grad_norm": 0.6420461092891769, "learning_rate": 0.003, "loss": 4.0777, "step": 9463 }, { "epoch": 0.09464, "grad_norm": 0.6005295130137699, "learning_rate": 0.003, "loss": 4.0792, "step": 9464 }, { "epoch": 0.09465, "grad_norm": 0.7261508186379931, "learning_rate": 0.003, "loss": 4.0825, "step": 9465 }, { "epoch": 0.09466, "grad_norm": 0.7958128917859779, "learning_rate": 0.003, "loss": 4.0981, "step": 9466 }, { "epoch": 0.09467, "grad_norm": 0.7585869693775182, "learning_rate": 0.003, "loss": 4.0885, "step": 9467 }, { "epoch": 0.09468, "grad_norm": 0.7806606873704798, "learning_rate": 0.003, "loss": 4.0968, "step": 9468 }, { "epoch": 0.09469, "grad_norm": 0.9838410664741073, "learning_rate": 0.003, "loss": 4.0686, "step": 9469 }, { "epoch": 0.0947, "grad_norm": 1.1761299535165681, "learning_rate": 0.003, "loss": 4.0866, "step": 9470 }, { "epoch": 0.09471, "grad_norm": 1.0889544845009487, "learning_rate": 0.003, "loss": 4.1187, "step": 9471 }, { "epoch": 0.09472, "grad_norm": 1.0307683311785911, "learning_rate": 0.003, "loss": 4.0934, "step": 9472 }, { "epoch": 0.09473, "grad_norm": 0.9564223700183074, "learning_rate": 0.003, "loss": 4.0883, "step": 9473 }, { "epoch": 0.09474, "grad_norm": 0.9376809113489067, "learning_rate": 0.003, "loss": 4.0862, "step": 9474 }, { "epoch": 0.09475, "grad_norm": 0.9618524959475816, "learning_rate": 0.003, "loss": 4.0907, "step": 9475 }, { "epoch": 0.09476, "grad_norm": 0.9554715312019628, "learning_rate": 0.003, "loss": 4.0931, "step": 9476 }, { "epoch": 0.09477, "grad_norm": 1.000035475540716, "learning_rate": 0.003, "loss": 4.093, "step": 9477 }, { "epoch": 0.09478, "grad_norm": 1.0839210902354595, "learning_rate": 0.003, "loss": 4.0804, "step": 9478 }, { "epoch": 0.09479, "grad_norm": 0.869487169656702, "learning_rate": 0.003, "loss": 4.0948, "step": 9479 }, { "epoch": 0.0948, "grad_norm": 0.7926358942114886, "learning_rate": 0.003, "loss": 4.0791, "step": 9480 }, { "epoch": 0.09481, "grad_norm": 0.6884497755914634, "learning_rate": 0.003, "loss": 4.1179, "step": 9481 }, { "epoch": 0.09482, "grad_norm": 0.6845896021916732, "learning_rate": 0.003, "loss": 4.0771, "step": 9482 }, { "epoch": 0.09483, "grad_norm": 0.6566997735021866, "learning_rate": 0.003, "loss": 4.054, "step": 9483 }, { "epoch": 0.09484, "grad_norm": 0.616530773620674, "learning_rate": 0.003, "loss": 4.1012, "step": 9484 }, { "epoch": 0.09485, "grad_norm": 0.7642856486603846, "learning_rate": 0.003, "loss": 4.0842, "step": 9485 }, { "epoch": 0.09486, "grad_norm": 0.8665981874168233, "learning_rate": 0.003, "loss": 4.0973, "step": 9486 }, { "epoch": 0.09487, "grad_norm": 1.0909398291701577, "learning_rate": 0.003, "loss": 4.0915, "step": 9487 }, { "epoch": 0.09488, "grad_norm": 1.1491216476616155, "learning_rate": 0.003, "loss": 4.1348, "step": 9488 }, { "epoch": 0.09489, "grad_norm": 0.8354603918248583, "learning_rate": 0.003, "loss": 4.0975, "step": 9489 }, { "epoch": 0.0949, "grad_norm": 0.6895784339169947, "learning_rate": 0.003, "loss": 4.0892, "step": 9490 }, { "epoch": 0.09491, "grad_norm": 0.711524009729495, "learning_rate": 0.003, "loss": 4.072, "step": 9491 }, { "epoch": 0.09492, "grad_norm": 0.8394489207958683, "learning_rate": 0.003, "loss": 4.0846, "step": 9492 }, { "epoch": 0.09493, "grad_norm": 1.2414418185211027, "learning_rate": 0.003, "loss": 4.0673, "step": 9493 }, { "epoch": 0.09494, "grad_norm": 0.8710054135901388, "learning_rate": 0.003, "loss": 4.1035, "step": 9494 }, { "epoch": 0.09495, "grad_norm": 0.8645492616541143, "learning_rate": 0.003, "loss": 4.0936, "step": 9495 }, { "epoch": 0.09496, "grad_norm": 0.83427100248925, "learning_rate": 0.003, "loss": 4.1095, "step": 9496 }, { "epoch": 0.09497, "grad_norm": 0.7271594302410049, "learning_rate": 0.003, "loss": 4.0875, "step": 9497 }, { "epoch": 0.09498, "grad_norm": 0.641946567264643, "learning_rate": 0.003, "loss": 4.0641, "step": 9498 }, { "epoch": 0.09499, "grad_norm": 0.6170199490496553, "learning_rate": 0.003, "loss": 4.0707, "step": 9499 }, { "epoch": 0.095, "grad_norm": 0.6200424975500217, "learning_rate": 0.003, "loss": 4.1149, "step": 9500 }, { "epoch": 0.09501, "grad_norm": 0.809057988961176, "learning_rate": 0.003, "loss": 4.0824, "step": 9501 }, { "epoch": 0.09502, "grad_norm": 1.1020241879146306, "learning_rate": 0.003, "loss": 4.0865, "step": 9502 }, { "epoch": 0.09503, "grad_norm": 1.030306126051077, "learning_rate": 0.003, "loss": 4.049, "step": 9503 }, { "epoch": 0.09504, "grad_norm": 0.9735652853838207, "learning_rate": 0.003, "loss": 4.1009, "step": 9504 }, { "epoch": 0.09505, "grad_norm": 0.9236443544745729, "learning_rate": 0.003, "loss": 4.0903, "step": 9505 }, { "epoch": 0.09506, "grad_norm": 0.8592569358203569, "learning_rate": 0.003, "loss": 4.104, "step": 9506 }, { "epoch": 0.09507, "grad_norm": 0.9643353093501955, "learning_rate": 0.003, "loss": 4.116, "step": 9507 }, { "epoch": 0.09508, "grad_norm": 1.0137276574092238, "learning_rate": 0.003, "loss": 4.0914, "step": 9508 }, { "epoch": 0.09509, "grad_norm": 1.1660412011916976, "learning_rate": 0.003, "loss": 4.0934, "step": 9509 }, { "epoch": 0.0951, "grad_norm": 1.082728289928524, "learning_rate": 0.003, "loss": 4.0988, "step": 9510 }, { "epoch": 0.09511, "grad_norm": 0.9624008478957283, "learning_rate": 0.003, "loss": 4.0849, "step": 9511 }, { "epoch": 0.09512, "grad_norm": 1.0923795540142767, "learning_rate": 0.003, "loss": 4.0925, "step": 9512 }, { "epoch": 0.09513, "grad_norm": 1.0328382351919922, "learning_rate": 0.003, "loss": 4.0803, "step": 9513 }, { "epoch": 0.09514, "grad_norm": 1.0089078992511966, "learning_rate": 0.003, "loss": 4.0887, "step": 9514 }, { "epoch": 0.09515, "grad_norm": 0.9704873672647005, "learning_rate": 0.003, "loss": 4.1029, "step": 9515 }, { "epoch": 0.09516, "grad_norm": 0.8985075408487389, "learning_rate": 0.003, "loss": 4.0934, "step": 9516 }, { "epoch": 0.09517, "grad_norm": 0.9130742158343249, "learning_rate": 0.003, "loss": 4.0927, "step": 9517 }, { "epoch": 0.09518, "grad_norm": 0.9244616783687439, "learning_rate": 0.003, "loss": 4.1091, "step": 9518 }, { "epoch": 0.09519, "grad_norm": 0.9668574431326041, "learning_rate": 0.003, "loss": 4.1233, "step": 9519 }, { "epoch": 0.0952, "grad_norm": 0.9637024791097162, "learning_rate": 0.003, "loss": 4.0695, "step": 9520 }, { "epoch": 0.09521, "grad_norm": 1.016108766606305, "learning_rate": 0.003, "loss": 4.0803, "step": 9521 }, { "epoch": 0.09522, "grad_norm": 1.0754104899740695, "learning_rate": 0.003, "loss": 4.0894, "step": 9522 }, { "epoch": 0.09523, "grad_norm": 0.9460439865375799, "learning_rate": 0.003, "loss": 4.0892, "step": 9523 }, { "epoch": 0.09524, "grad_norm": 0.8972736244759736, "learning_rate": 0.003, "loss": 4.0838, "step": 9524 }, { "epoch": 0.09525, "grad_norm": 0.8707423847198635, "learning_rate": 0.003, "loss": 4.0978, "step": 9525 }, { "epoch": 0.09526, "grad_norm": 0.9298574511865992, "learning_rate": 0.003, "loss": 4.0934, "step": 9526 }, { "epoch": 0.09527, "grad_norm": 0.873097150552636, "learning_rate": 0.003, "loss": 4.0942, "step": 9527 }, { "epoch": 0.09528, "grad_norm": 0.861692752888038, "learning_rate": 0.003, "loss": 4.1144, "step": 9528 }, { "epoch": 0.09529, "grad_norm": 0.8130110024800541, "learning_rate": 0.003, "loss": 4.0684, "step": 9529 }, { "epoch": 0.0953, "grad_norm": 0.7905443102217827, "learning_rate": 0.003, "loss": 4.0911, "step": 9530 }, { "epoch": 0.09531, "grad_norm": 0.8291019771418112, "learning_rate": 0.003, "loss": 4.0808, "step": 9531 }, { "epoch": 0.09532, "grad_norm": 0.8459154353642524, "learning_rate": 0.003, "loss": 4.0743, "step": 9532 }, { "epoch": 0.09533, "grad_norm": 0.9922895216920463, "learning_rate": 0.003, "loss": 4.0721, "step": 9533 }, { "epoch": 0.09534, "grad_norm": 1.185301105499309, "learning_rate": 0.003, "loss": 4.0837, "step": 9534 }, { "epoch": 0.09535, "grad_norm": 1.0236593208473932, "learning_rate": 0.003, "loss": 4.0911, "step": 9535 }, { "epoch": 0.09536, "grad_norm": 1.1070204390625342, "learning_rate": 0.003, "loss": 4.1128, "step": 9536 }, { "epoch": 0.09537, "grad_norm": 0.9574983262523417, "learning_rate": 0.003, "loss": 4.1175, "step": 9537 }, { "epoch": 0.09538, "grad_norm": 0.8493144641874563, "learning_rate": 0.003, "loss": 4.096, "step": 9538 }, { "epoch": 0.09539, "grad_norm": 0.835531151551209, "learning_rate": 0.003, "loss": 4.1131, "step": 9539 }, { "epoch": 0.0954, "grad_norm": 0.8283289116606958, "learning_rate": 0.003, "loss": 4.0738, "step": 9540 }, { "epoch": 0.09541, "grad_norm": 0.9341466910775755, "learning_rate": 0.003, "loss": 4.1036, "step": 9541 }, { "epoch": 0.09542, "grad_norm": 1.0314622632534094, "learning_rate": 0.003, "loss": 4.0792, "step": 9542 }, { "epoch": 0.09543, "grad_norm": 1.0142586101903608, "learning_rate": 0.003, "loss": 4.0876, "step": 9543 }, { "epoch": 0.09544, "grad_norm": 1.4582749640876167, "learning_rate": 0.003, "loss": 4.0953, "step": 9544 }, { "epoch": 0.09545, "grad_norm": 0.6203000789802795, "learning_rate": 0.003, "loss": 4.0932, "step": 9545 }, { "epoch": 0.09546, "grad_norm": 0.7933224015182839, "learning_rate": 0.003, "loss": 4.1106, "step": 9546 }, { "epoch": 0.09547, "grad_norm": 0.7738375177165676, "learning_rate": 0.003, "loss": 4.0975, "step": 9547 }, { "epoch": 0.09548, "grad_norm": 0.8381207027995874, "learning_rate": 0.003, "loss": 4.0915, "step": 9548 }, { "epoch": 0.09549, "grad_norm": 0.8454264400374799, "learning_rate": 0.003, "loss": 4.1284, "step": 9549 }, { "epoch": 0.0955, "grad_norm": 0.8076905957792103, "learning_rate": 0.003, "loss": 4.0658, "step": 9550 }, { "epoch": 0.09551, "grad_norm": 0.8632802824577473, "learning_rate": 0.003, "loss": 4.0762, "step": 9551 }, { "epoch": 0.09552, "grad_norm": 1.0768644429113419, "learning_rate": 0.003, "loss": 4.1435, "step": 9552 }, { "epoch": 0.09553, "grad_norm": 1.1900364276456132, "learning_rate": 0.003, "loss": 4.1021, "step": 9553 }, { "epoch": 0.09554, "grad_norm": 0.6910437093615704, "learning_rate": 0.003, "loss": 4.0656, "step": 9554 }, { "epoch": 0.09555, "grad_norm": 0.677559408017465, "learning_rate": 0.003, "loss": 4.0712, "step": 9555 }, { "epoch": 0.09556, "grad_norm": 0.8863789977775014, "learning_rate": 0.003, "loss": 4.0847, "step": 9556 }, { "epoch": 0.09557, "grad_norm": 1.2130858830229994, "learning_rate": 0.003, "loss": 4.1126, "step": 9557 }, { "epoch": 0.09558, "grad_norm": 0.8078845755680898, "learning_rate": 0.003, "loss": 4.0944, "step": 9558 }, { "epoch": 0.09559, "grad_norm": 0.6888474933216685, "learning_rate": 0.003, "loss": 4.1144, "step": 9559 }, { "epoch": 0.0956, "grad_norm": 0.7444419985119464, "learning_rate": 0.003, "loss": 4.0786, "step": 9560 }, { "epoch": 0.09561, "grad_norm": 0.7331440318600505, "learning_rate": 0.003, "loss": 4.1056, "step": 9561 }, { "epoch": 0.09562, "grad_norm": 0.6745064106478187, "learning_rate": 0.003, "loss": 4.0546, "step": 9562 }, { "epoch": 0.09563, "grad_norm": 0.7223110614602103, "learning_rate": 0.003, "loss": 4.1217, "step": 9563 }, { "epoch": 0.09564, "grad_norm": 0.7954354073035838, "learning_rate": 0.003, "loss": 4.1098, "step": 9564 }, { "epoch": 0.09565, "grad_norm": 0.9518848575929585, "learning_rate": 0.003, "loss": 4.0799, "step": 9565 }, { "epoch": 0.09566, "grad_norm": 1.1360554053001062, "learning_rate": 0.003, "loss": 4.1018, "step": 9566 }, { "epoch": 0.09567, "grad_norm": 1.0190304984027616, "learning_rate": 0.003, "loss": 4.0982, "step": 9567 }, { "epoch": 0.09568, "grad_norm": 1.1111115745846918, "learning_rate": 0.003, "loss": 4.1073, "step": 9568 }, { "epoch": 0.09569, "grad_norm": 1.060656585628691, "learning_rate": 0.003, "loss": 4.1115, "step": 9569 }, { "epoch": 0.0957, "grad_norm": 0.9102915228109848, "learning_rate": 0.003, "loss": 4.1046, "step": 9570 }, { "epoch": 0.09571, "grad_norm": 1.018964046699271, "learning_rate": 0.003, "loss": 4.1063, "step": 9571 }, { "epoch": 0.09572, "grad_norm": 1.2527068883296468, "learning_rate": 0.003, "loss": 4.1042, "step": 9572 }, { "epoch": 0.09573, "grad_norm": 0.9851106487089002, "learning_rate": 0.003, "loss": 4.1041, "step": 9573 }, { "epoch": 0.09574, "grad_norm": 1.0626664552153582, "learning_rate": 0.003, "loss": 4.1093, "step": 9574 }, { "epoch": 0.09575, "grad_norm": 1.1535018572622826, "learning_rate": 0.003, "loss": 4.0924, "step": 9575 }, { "epoch": 0.09576, "grad_norm": 0.866406401028726, "learning_rate": 0.003, "loss": 4.0844, "step": 9576 }, { "epoch": 0.09577, "grad_norm": 0.7598448539401876, "learning_rate": 0.003, "loss": 4.0722, "step": 9577 }, { "epoch": 0.09578, "grad_norm": 0.76846069859649, "learning_rate": 0.003, "loss": 4.0693, "step": 9578 }, { "epoch": 0.09579, "grad_norm": 0.8043603561549131, "learning_rate": 0.003, "loss": 4.0766, "step": 9579 }, { "epoch": 0.0958, "grad_norm": 0.8020399563357902, "learning_rate": 0.003, "loss": 4.1078, "step": 9580 }, { "epoch": 0.09581, "grad_norm": 0.7682141048439647, "learning_rate": 0.003, "loss": 4.1397, "step": 9581 }, { "epoch": 0.09582, "grad_norm": 0.8015394924731275, "learning_rate": 0.003, "loss": 4.0847, "step": 9582 }, { "epoch": 0.09583, "grad_norm": 0.8226749457094206, "learning_rate": 0.003, "loss": 4.0771, "step": 9583 }, { "epoch": 0.09584, "grad_norm": 0.94933199587187, "learning_rate": 0.003, "loss": 4.1012, "step": 9584 }, { "epoch": 0.09585, "grad_norm": 0.9918679044193051, "learning_rate": 0.003, "loss": 4.0839, "step": 9585 }, { "epoch": 0.09586, "grad_norm": 0.9705822187192004, "learning_rate": 0.003, "loss": 4.1163, "step": 9586 }, { "epoch": 0.09587, "grad_norm": 0.9451328209074761, "learning_rate": 0.003, "loss": 4.0607, "step": 9587 }, { "epoch": 0.09588, "grad_norm": 1.2495169152311112, "learning_rate": 0.003, "loss": 4.1044, "step": 9588 }, { "epoch": 0.09589, "grad_norm": 0.8554058248225164, "learning_rate": 0.003, "loss": 4.0993, "step": 9589 }, { "epoch": 0.0959, "grad_norm": 0.8694247907519619, "learning_rate": 0.003, "loss": 4.1135, "step": 9590 }, { "epoch": 0.09591, "grad_norm": 0.8764253050367747, "learning_rate": 0.003, "loss": 4.1419, "step": 9591 }, { "epoch": 0.09592, "grad_norm": 0.928129824027457, "learning_rate": 0.003, "loss": 4.1067, "step": 9592 }, { "epoch": 0.09593, "grad_norm": 0.9505672638184878, "learning_rate": 0.003, "loss": 4.1212, "step": 9593 }, { "epoch": 0.09594, "grad_norm": 1.0714724754192935, "learning_rate": 0.003, "loss": 4.087, "step": 9594 }, { "epoch": 0.09595, "grad_norm": 0.9819378603136486, "learning_rate": 0.003, "loss": 4.0884, "step": 9595 }, { "epoch": 0.09596, "grad_norm": 1.026865992142624, "learning_rate": 0.003, "loss": 4.0921, "step": 9596 }, { "epoch": 0.09597, "grad_norm": 0.973648595397517, "learning_rate": 0.003, "loss": 4.0922, "step": 9597 }, { "epoch": 0.09598, "grad_norm": 0.9850517417503974, "learning_rate": 0.003, "loss": 4.1032, "step": 9598 }, { "epoch": 0.09599, "grad_norm": 0.9928584309344902, "learning_rate": 0.003, "loss": 4.0832, "step": 9599 }, { "epoch": 0.096, "grad_norm": 0.9952109010549817, "learning_rate": 0.003, "loss": 4.0908, "step": 9600 }, { "epoch": 0.09601, "grad_norm": 0.9715688246997389, "learning_rate": 0.003, "loss": 4.0956, "step": 9601 }, { "epoch": 0.09602, "grad_norm": 0.896038866298406, "learning_rate": 0.003, "loss": 4.1225, "step": 9602 }, { "epoch": 0.09603, "grad_norm": 0.9394621805775981, "learning_rate": 0.003, "loss": 4.1036, "step": 9603 }, { "epoch": 0.09604, "grad_norm": 0.8832102278330413, "learning_rate": 0.003, "loss": 4.0637, "step": 9604 }, { "epoch": 0.09605, "grad_norm": 0.8768838219761711, "learning_rate": 0.003, "loss": 4.1059, "step": 9605 }, { "epoch": 0.09606, "grad_norm": 0.8777392831472427, "learning_rate": 0.003, "loss": 4.103, "step": 9606 }, { "epoch": 0.09607, "grad_norm": 1.076257004258557, "learning_rate": 0.003, "loss": 4.1166, "step": 9607 }, { "epoch": 0.09608, "grad_norm": 0.9490087215698528, "learning_rate": 0.003, "loss": 4.1224, "step": 9608 }, { "epoch": 0.09609, "grad_norm": 0.8957046130890897, "learning_rate": 0.003, "loss": 4.0976, "step": 9609 }, { "epoch": 0.0961, "grad_norm": 0.7984430379991309, "learning_rate": 0.003, "loss": 4.0956, "step": 9610 }, { "epoch": 0.09611, "grad_norm": 0.7499563857935081, "learning_rate": 0.003, "loss": 4.0895, "step": 9611 }, { "epoch": 0.09612, "grad_norm": 0.7348626128667639, "learning_rate": 0.003, "loss": 4.0906, "step": 9612 }, { "epoch": 0.09613, "grad_norm": 0.7649035088697457, "learning_rate": 0.003, "loss": 4.104, "step": 9613 }, { "epoch": 0.09614, "grad_norm": 0.8970754465663441, "learning_rate": 0.003, "loss": 4.0959, "step": 9614 }, { "epoch": 0.09615, "grad_norm": 1.0353388858633117, "learning_rate": 0.003, "loss": 4.0739, "step": 9615 }, { "epoch": 0.09616, "grad_norm": 1.0422928251395278, "learning_rate": 0.003, "loss": 4.103, "step": 9616 }, { "epoch": 0.09617, "grad_norm": 0.9330622686075548, "learning_rate": 0.003, "loss": 4.0772, "step": 9617 }, { "epoch": 0.09618, "grad_norm": 0.8079910426137322, "learning_rate": 0.003, "loss": 4.096, "step": 9618 }, { "epoch": 0.09619, "grad_norm": 0.7658297024476686, "learning_rate": 0.003, "loss": 4.0891, "step": 9619 }, { "epoch": 0.0962, "grad_norm": 0.7549136355891398, "learning_rate": 0.003, "loss": 4.078, "step": 9620 }, { "epoch": 0.09621, "grad_norm": 0.7392852138332249, "learning_rate": 0.003, "loss": 4.0955, "step": 9621 }, { "epoch": 0.09622, "grad_norm": 0.8832584494387006, "learning_rate": 0.003, "loss": 4.082, "step": 9622 }, { "epoch": 0.09623, "grad_norm": 1.0636469145202627, "learning_rate": 0.003, "loss": 4.0865, "step": 9623 }, { "epoch": 0.09624, "grad_norm": 1.0064354117819914, "learning_rate": 0.003, "loss": 4.0983, "step": 9624 }, { "epoch": 0.09625, "grad_norm": 0.9371314262078005, "learning_rate": 0.003, "loss": 4.0757, "step": 9625 }, { "epoch": 0.09626, "grad_norm": 0.8253170716175934, "learning_rate": 0.003, "loss": 4.0815, "step": 9626 }, { "epoch": 0.09627, "grad_norm": 0.7030524329715849, "learning_rate": 0.003, "loss": 4.0746, "step": 9627 }, { "epoch": 0.09628, "grad_norm": 0.7250410432973622, "learning_rate": 0.003, "loss": 4.0814, "step": 9628 }, { "epoch": 0.09629, "grad_norm": 0.7346067220792749, "learning_rate": 0.003, "loss": 4.097, "step": 9629 }, { "epoch": 0.0963, "grad_norm": 0.9046057127262703, "learning_rate": 0.003, "loss": 4.0897, "step": 9630 }, { "epoch": 0.09631, "grad_norm": 0.894854230452782, "learning_rate": 0.003, "loss": 4.1255, "step": 9631 }, { "epoch": 0.09632, "grad_norm": 0.9480273147630979, "learning_rate": 0.003, "loss": 4.0613, "step": 9632 }, { "epoch": 0.09633, "grad_norm": 0.9786150037379322, "learning_rate": 0.003, "loss": 4.0802, "step": 9633 }, { "epoch": 0.09634, "grad_norm": 1.0282461128507772, "learning_rate": 0.003, "loss": 4.067, "step": 9634 }, { "epoch": 0.09635, "grad_norm": 1.2837836327015333, "learning_rate": 0.003, "loss": 4.115, "step": 9635 }, { "epoch": 0.09636, "grad_norm": 1.0371256794904447, "learning_rate": 0.003, "loss": 4.1127, "step": 9636 }, { "epoch": 0.09637, "grad_norm": 0.9109444824213261, "learning_rate": 0.003, "loss": 4.0815, "step": 9637 }, { "epoch": 0.09638, "grad_norm": 0.8349568525937817, "learning_rate": 0.003, "loss": 4.0746, "step": 9638 }, { "epoch": 0.09639, "grad_norm": 0.8952363870351221, "learning_rate": 0.003, "loss": 4.0831, "step": 9639 }, { "epoch": 0.0964, "grad_norm": 0.8817057896870121, "learning_rate": 0.003, "loss": 4.1278, "step": 9640 }, { "epoch": 0.09641, "grad_norm": 1.0165123777415066, "learning_rate": 0.003, "loss": 4.0679, "step": 9641 }, { "epoch": 0.09642, "grad_norm": 1.2088791393181662, "learning_rate": 0.003, "loss": 4.1269, "step": 9642 }, { "epoch": 0.09643, "grad_norm": 0.8298242740488613, "learning_rate": 0.003, "loss": 4.089, "step": 9643 }, { "epoch": 0.09644, "grad_norm": 0.7360761912273628, "learning_rate": 0.003, "loss": 4.1078, "step": 9644 }, { "epoch": 0.09645, "grad_norm": 0.7427508177687894, "learning_rate": 0.003, "loss": 4.0972, "step": 9645 }, { "epoch": 0.09646, "grad_norm": 0.8169170909806851, "learning_rate": 0.003, "loss": 4.0957, "step": 9646 }, { "epoch": 0.09647, "grad_norm": 0.873096623162937, "learning_rate": 0.003, "loss": 4.0556, "step": 9647 }, { "epoch": 0.09648, "grad_norm": 0.9301514895225974, "learning_rate": 0.003, "loss": 4.1311, "step": 9648 }, { "epoch": 0.09649, "grad_norm": 1.0261303129344521, "learning_rate": 0.003, "loss": 4.0698, "step": 9649 }, { "epoch": 0.0965, "grad_norm": 0.9858489832150986, "learning_rate": 0.003, "loss": 4.0794, "step": 9650 }, { "epoch": 0.09651, "grad_norm": 0.9207520107530078, "learning_rate": 0.003, "loss": 4.0982, "step": 9651 }, { "epoch": 0.09652, "grad_norm": 0.8934066228016666, "learning_rate": 0.003, "loss": 4.0845, "step": 9652 }, { "epoch": 0.09653, "grad_norm": 0.8604206377048786, "learning_rate": 0.003, "loss": 4.1113, "step": 9653 }, { "epoch": 0.09654, "grad_norm": 0.9663619779941607, "learning_rate": 0.003, "loss": 4.0642, "step": 9654 }, { "epoch": 0.09655, "grad_norm": 0.91146406489948, "learning_rate": 0.003, "loss": 4.1002, "step": 9655 }, { "epoch": 0.09656, "grad_norm": 0.8975020800835469, "learning_rate": 0.003, "loss": 4.1102, "step": 9656 }, { "epoch": 0.09657, "grad_norm": 1.0608421766317369, "learning_rate": 0.003, "loss": 4.1116, "step": 9657 }, { "epoch": 0.09658, "grad_norm": 0.9751782896772202, "learning_rate": 0.003, "loss": 4.0861, "step": 9658 }, { "epoch": 0.09659, "grad_norm": 0.8507331684637727, "learning_rate": 0.003, "loss": 4.1144, "step": 9659 }, { "epoch": 0.0966, "grad_norm": 0.7797760426559466, "learning_rate": 0.003, "loss": 4.1066, "step": 9660 }, { "epoch": 0.09661, "grad_norm": 0.7280346537301909, "learning_rate": 0.003, "loss": 4.0727, "step": 9661 }, { "epoch": 0.09662, "grad_norm": 0.7687521057717993, "learning_rate": 0.003, "loss": 4.078, "step": 9662 }, { "epoch": 0.09663, "grad_norm": 0.8765165791530211, "learning_rate": 0.003, "loss": 4.1116, "step": 9663 }, { "epoch": 0.09664, "grad_norm": 1.213269447787462, "learning_rate": 0.003, "loss": 4.111, "step": 9664 }, { "epoch": 0.09665, "grad_norm": 0.9511980015499266, "learning_rate": 0.003, "loss": 4.0851, "step": 9665 }, { "epoch": 0.09666, "grad_norm": 0.8932643356990735, "learning_rate": 0.003, "loss": 4.1002, "step": 9666 }, { "epoch": 0.09667, "grad_norm": 1.1991501443292183, "learning_rate": 0.003, "loss": 4.1147, "step": 9667 }, { "epoch": 0.09668, "grad_norm": 0.9122924207412219, "learning_rate": 0.003, "loss": 4.0836, "step": 9668 }, { "epoch": 0.09669, "grad_norm": 0.8177666074816453, "learning_rate": 0.003, "loss": 4.0861, "step": 9669 }, { "epoch": 0.0967, "grad_norm": 0.8642756088338996, "learning_rate": 0.003, "loss": 4.1086, "step": 9670 }, { "epoch": 0.09671, "grad_norm": 0.9387142926253513, "learning_rate": 0.003, "loss": 4.0866, "step": 9671 }, { "epoch": 0.09672, "grad_norm": 0.9876148943907853, "learning_rate": 0.003, "loss": 4.1201, "step": 9672 }, { "epoch": 0.09673, "grad_norm": 1.183065157513859, "learning_rate": 0.003, "loss": 4.1216, "step": 9673 }, { "epoch": 0.09674, "grad_norm": 0.8981167917963628, "learning_rate": 0.003, "loss": 4.1436, "step": 9674 }, { "epoch": 0.09675, "grad_norm": 0.7908901876516276, "learning_rate": 0.003, "loss": 4.077, "step": 9675 }, { "epoch": 0.09676, "grad_norm": 0.7836781101706645, "learning_rate": 0.003, "loss": 4.0989, "step": 9676 }, { "epoch": 0.09677, "grad_norm": 0.9396215534952582, "learning_rate": 0.003, "loss": 4.0623, "step": 9677 }, { "epoch": 0.09678, "grad_norm": 1.097205685443494, "learning_rate": 0.003, "loss": 4.1384, "step": 9678 }, { "epoch": 0.09679, "grad_norm": 0.9990488663831774, "learning_rate": 0.003, "loss": 4.1153, "step": 9679 }, { "epoch": 0.0968, "grad_norm": 0.9171443826425181, "learning_rate": 0.003, "loss": 4.1057, "step": 9680 }, { "epoch": 0.09681, "grad_norm": 1.009511628313745, "learning_rate": 0.003, "loss": 4.1129, "step": 9681 }, { "epoch": 0.09682, "grad_norm": 1.2431943193297423, "learning_rate": 0.003, "loss": 4.1022, "step": 9682 }, { "epoch": 0.09683, "grad_norm": 0.9350989569045836, "learning_rate": 0.003, "loss": 4.0919, "step": 9683 }, { "epoch": 0.09684, "grad_norm": 0.9827551754050654, "learning_rate": 0.003, "loss": 4.1214, "step": 9684 }, { "epoch": 0.09685, "grad_norm": 1.0518093494350942, "learning_rate": 0.003, "loss": 4.1023, "step": 9685 }, { "epoch": 0.09686, "grad_norm": 1.1163299364855512, "learning_rate": 0.003, "loss": 4.1101, "step": 9686 }, { "epoch": 0.09687, "grad_norm": 0.8195919781179394, "learning_rate": 0.003, "loss": 4.105, "step": 9687 }, { "epoch": 0.09688, "grad_norm": 0.8190869432151955, "learning_rate": 0.003, "loss": 4.1337, "step": 9688 }, { "epoch": 0.09689, "grad_norm": 0.916773205951842, "learning_rate": 0.003, "loss": 4.0902, "step": 9689 }, { "epoch": 0.0969, "grad_norm": 0.8066088249172502, "learning_rate": 0.003, "loss": 4.1004, "step": 9690 }, { "epoch": 0.09691, "grad_norm": 0.7401326149708389, "learning_rate": 0.003, "loss": 4.108, "step": 9691 }, { "epoch": 0.09692, "grad_norm": 0.6424151800103749, "learning_rate": 0.003, "loss": 4.1052, "step": 9692 }, { "epoch": 0.09693, "grad_norm": 0.6299822425823625, "learning_rate": 0.003, "loss": 4.0656, "step": 9693 }, { "epoch": 0.09694, "grad_norm": 0.6362006844765989, "learning_rate": 0.003, "loss": 4.0846, "step": 9694 }, { "epoch": 0.09695, "grad_norm": 0.6132233963780619, "learning_rate": 0.003, "loss": 4.0906, "step": 9695 }, { "epoch": 0.09696, "grad_norm": 0.6660368458929525, "learning_rate": 0.003, "loss": 4.0754, "step": 9696 }, { "epoch": 0.09697, "grad_norm": 0.7544139461670122, "learning_rate": 0.003, "loss": 4.0888, "step": 9697 }, { "epoch": 0.09698, "grad_norm": 0.8497357368030164, "learning_rate": 0.003, "loss": 4.1028, "step": 9698 }, { "epoch": 0.09699, "grad_norm": 0.9145155762891021, "learning_rate": 0.003, "loss": 4.0742, "step": 9699 }, { "epoch": 0.097, "grad_norm": 0.8975185216089709, "learning_rate": 0.003, "loss": 4.1153, "step": 9700 }, { "epoch": 0.09701, "grad_norm": 0.9835477897845034, "learning_rate": 0.003, "loss": 4.0894, "step": 9701 }, { "epoch": 0.09702, "grad_norm": 1.2588913749038257, "learning_rate": 0.003, "loss": 4.0771, "step": 9702 }, { "epoch": 0.09703, "grad_norm": 1.231889122377567, "learning_rate": 0.003, "loss": 4.0918, "step": 9703 }, { "epoch": 0.09704, "grad_norm": 0.9026679969753263, "learning_rate": 0.003, "loss": 4.0894, "step": 9704 }, { "epoch": 0.09705, "grad_norm": 0.917096876663794, "learning_rate": 0.003, "loss": 4.08, "step": 9705 }, { "epoch": 0.09706, "grad_norm": 1.1210979563105794, "learning_rate": 0.003, "loss": 4.1014, "step": 9706 }, { "epoch": 0.09707, "grad_norm": 1.046684770518191, "learning_rate": 0.003, "loss": 4.0628, "step": 9707 }, { "epoch": 0.09708, "grad_norm": 0.8586665214097383, "learning_rate": 0.003, "loss": 4.0587, "step": 9708 }, { "epoch": 0.09709, "grad_norm": 0.8399659702694784, "learning_rate": 0.003, "loss": 4.0948, "step": 9709 }, { "epoch": 0.0971, "grad_norm": 0.9061619367314612, "learning_rate": 0.003, "loss": 4.0863, "step": 9710 }, { "epoch": 0.09711, "grad_norm": 0.869420827619381, "learning_rate": 0.003, "loss": 4.0923, "step": 9711 }, { "epoch": 0.09712, "grad_norm": 0.9484632391522392, "learning_rate": 0.003, "loss": 4.1323, "step": 9712 }, { "epoch": 0.09713, "grad_norm": 0.9344386880847007, "learning_rate": 0.003, "loss": 4.0562, "step": 9713 }, { "epoch": 0.09714, "grad_norm": 0.8191433536044963, "learning_rate": 0.003, "loss": 4.0745, "step": 9714 }, { "epoch": 0.09715, "grad_norm": 0.9260497381898702, "learning_rate": 0.003, "loss": 4.0997, "step": 9715 }, { "epoch": 0.09716, "grad_norm": 1.2168835302778682, "learning_rate": 0.003, "loss": 4.0727, "step": 9716 }, { "epoch": 0.09717, "grad_norm": 1.1755192129415277, "learning_rate": 0.003, "loss": 4.125, "step": 9717 }, { "epoch": 0.09718, "grad_norm": 0.8578207270174794, "learning_rate": 0.003, "loss": 4.0891, "step": 9718 }, { "epoch": 0.09719, "grad_norm": 0.7982551490113904, "learning_rate": 0.003, "loss": 4.079, "step": 9719 }, { "epoch": 0.0972, "grad_norm": 0.8181582924013971, "learning_rate": 0.003, "loss": 4.0776, "step": 9720 }, { "epoch": 0.09721, "grad_norm": 0.7803993304194405, "learning_rate": 0.003, "loss": 4.1149, "step": 9721 }, { "epoch": 0.09722, "grad_norm": 0.7217245724741701, "learning_rate": 0.003, "loss": 4.0896, "step": 9722 }, { "epoch": 0.09723, "grad_norm": 0.8297726003767051, "learning_rate": 0.003, "loss": 4.0572, "step": 9723 }, { "epoch": 0.09724, "grad_norm": 0.8385216713500718, "learning_rate": 0.003, "loss": 4.0784, "step": 9724 }, { "epoch": 0.09725, "grad_norm": 0.8535894035344416, "learning_rate": 0.003, "loss": 4.0405, "step": 9725 }, { "epoch": 0.09726, "grad_norm": 0.9997627703805871, "learning_rate": 0.003, "loss": 4.1014, "step": 9726 }, { "epoch": 0.09727, "grad_norm": 1.2871027951432406, "learning_rate": 0.003, "loss": 4.1, "step": 9727 }, { "epoch": 0.09728, "grad_norm": 0.9786561845767965, "learning_rate": 0.003, "loss": 4.0836, "step": 9728 }, { "epoch": 0.09729, "grad_norm": 1.056518217713997, "learning_rate": 0.003, "loss": 4.0921, "step": 9729 }, { "epoch": 0.0973, "grad_norm": 1.0746377651628496, "learning_rate": 0.003, "loss": 4.1021, "step": 9730 }, { "epoch": 0.09731, "grad_norm": 1.1074902816443135, "learning_rate": 0.003, "loss": 4.057, "step": 9731 }, { "epoch": 0.09732, "grad_norm": 0.9021480346313361, "learning_rate": 0.003, "loss": 4.0933, "step": 9732 }, { "epoch": 0.09733, "grad_norm": 0.8698953887525058, "learning_rate": 0.003, "loss": 4.0854, "step": 9733 }, { "epoch": 0.09734, "grad_norm": 0.7657365794014724, "learning_rate": 0.003, "loss": 4.1132, "step": 9734 }, { "epoch": 0.09735, "grad_norm": 0.7136532065218357, "learning_rate": 0.003, "loss": 4.1063, "step": 9735 }, { "epoch": 0.09736, "grad_norm": 0.6717357230030461, "learning_rate": 0.003, "loss": 4.0819, "step": 9736 }, { "epoch": 0.09737, "grad_norm": 0.5638315775801472, "learning_rate": 0.003, "loss": 4.0353, "step": 9737 }, { "epoch": 0.09738, "grad_norm": 0.6110116723814848, "learning_rate": 0.003, "loss": 4.0982, "step": 9738 }, { "epoch": 0.09739, "grad_norm": 0.6086506139891856, "learning_rate": 0.003, "loss": 4.0946, "step": 9739 }, { "epoch": 0.0974, "grad_norm": 0.7935050152052187, "learning_rate": 0.003, "loss": 4.0868, "step": 9740 }, { "epoch": 0.09741, "grad_norm": 1.1123363599125007, "learning_rate": 0.003, "loss": 4.1375, "step": 9741 }, { "epoch": 0.09742, "grad_norm": 1.0030554366325666, "learning_rate": 0.003, "loss": 4.076, "step": 9742 }, { "epoch": 0.09743, "grad_norm": 0.8267397016080698, "learning_rate": 0.003, "loss": 4.0996, "step": 9743 }, { "epoch": 0.09744, "grad_norm": 0.7717073469243041, "learning_rate": 0.003, "loss": 4.0603, "step": 9744 }, { "epoch": 0.09745, "grad_norm": 0.798472215693979, "learning_rate": 0.003, "loss": 4.1156, "step": 9745 }, { "epoch": 0.09746, "grad_norm": 0.9589191423737808, "learning_rate": 0.003, "loss": 4.0763, "step": 9746 }, { "epoch": 0.09747, "grad_norm": 1.116733109628546, "learning_rate": 0.003, "loss": 4.0924, "step": 9747 }, { "epoch": 0.09748, "grad_norm": 0.8375628533401334, "learning_rate": 0.003, "loss": 4.119, "step": 9748 }, { "epoch": 0.09749, "grad_norm": 0.8553001862226853, "learning_rate": 0.003, "loss": 4.0624, "step": 9749 }, { "epoch": 0.0975, "grad_norm": 0.9088453607552779, "learning_rate": 0.003, "loss": 4.0738, "step": 9750 }, { "epoch": 0.09751, "grad_norm": 0.8569486483584623, "learning_rate": 0.003, "loss": 4.0489, "step": 9751 }, { "epoch": 0.09752, "grad_norm": 0.7380045417599184, "learning_rate": 0.003, "loss": 4.0938, "step": 9752 }, { "epoch": 0.09753, "grad_norm": 0.8851823127379947, "learning_rate": 0.003, "loss": 4.0483, "step": 9753 }, { "epoch": 0.09754, "grad_norm": 1.0358214518222049, "learning_rate": 0.003, "loss": 4.102, "step": 9754 }, { "epoch": 0.09755, "grad_norm": 0.9439521181468254, "learning_rate": 0.003, "loss": 4.0739, "step": 9755 }, { "epoch": 0.09756, "grad_norm": 0.8198513848838082, "learning_rate": 0.003, "loss": 4.0739, "step": 9756 }, { "epoch": 0.09757, "grad_norm": 0.829860888973659, "learning_rate": 0.003, "loss": 4.0867, "step": 9757 }, { "epoch": 0.09758, "grad_norm": 0.9561785595312966, "learning_rate": 0.003, "loss": 4.0857, "step": 9758 }, { "epoch": 0.09759, "grad_norm": 1.2593905860698673, "learning_rate": 0.003, "loss": 4.0945, "step": 9759 }, { "epoch": 0.0976, "grad_norm": 1.046642575447866, "learning_rate": 0.003, "loss": 4.0956, "step": 9760 }, { "epoch": 0.09761, "grad_norm": 1.207472022721697, "learning_rate": 0.003, "loss": 4.0947, "step": 9761 }, { "epoch": 0.09762, "grad_norm": 0.9869421396722704, "learning_rate": 0.003, "loss": 4.1097, "step": 9762 }, { "epoch": 0.09763, "grad_norm": 1.0380173466462297, "learning_rate": 0.003, "loss": 4.0664, "step": 9763 }, { "epoch": 0.09764, "grad_norm": 1.1529962238971903, "learning_rate": 0.003, "loss": 4.0723, "step": 9764 }, { "epoch": 0.09765, "grad_norm": 0.8259926007525762, "learning_rate": 0.003, "loss": 4.1206, "step": 9765 }, { "epoch": 0.09766, "grad_norm": 0.8887761626705335, "learning_rate": 0.003, "loss": 4.0958, "step": 9766 }, { "epoch": 0.09767, "grad_norm": 0.8128663843712138, "learning_rate": 0.003, "loss": 4.1044, "step": 9767 }, { "epoch": 0.09768, "grad_norm": 0.7660324180116674, "learning_rate": 0.003, "loss": 4.0737, "step": 9768 }, { "epoch": 0.09769, "grad_norm": 0.7219697561948825, "learning_rate": 0.003, "loss": 4.1111, "step": 9769 }, { "epoch": 0.0977, "grad_norm": 0.7328719015179833, "learning_rate": 0.003, "loss": 4.1082, "step": 9770 }, { "epoch": 0.09771, "grad_norm": 0.6934477189530808, "learning_rate": 0.003, "loss": 4.0761, "step": 9771 }, { "epoch": 0.09772, "grad_norm": 0.8226483433677332, "learning_rate": 0.003, "loss": 4.073, "step": 9772 }, { "epoch": 0.09773, "grad_norm": 1.0407467139689701, "learning_rate": 0.003, "loss": 4.0947, "step": 9773 }, { "epoch": 0.09774, "grad_norm": 1.379965710605147, "learning_rate": 0.003, "loss": 4.1208, "step": 9774 }, { "epoch": 0.09775, "grad_norm": 0.743770947000131, "learning_rate": 0.003, "loss": 4.1085, "step": 9775 }, { "epoch": 0.09776, "grad_norm": 0.7186119882928109, "learning_rate": 0.003, "loss": 4.0797, "step": 9776 }, { "epoch": 0.09777, "grad_norm": 0.7494079596750043, "learning_rate": 0.003, "loss": 4.107, "step": 9777 }, { "epoch": 0.09778, "grad_norm": 0.8250803113950317, "learning_rate": 0.003, "loss": 4.0684, "step": 9778 }, { "epoch": 0.09779, "grad_norm": 0.9604869205387597, "learning_rate": 0.003, "loss": 4.0867, "step": 9779 }, { "epoch": 0.0978, "grad_norm": 1.2294622769995196, "learning_rate": 0.003, "loss": 4.0914, "step": 9780 }, { "epoch": 0.09781, "grad_norm": 0.8713194051271618, "learning_rate": 0.003, "loss": 4.0985, "step": 9781 }, { "epoch": 0.09782, "grad_norm": 0.8347924061909529, "learning_rate": 0.003, "loss": 4.0967, "step": 9782 }, { "epoch": 0.09783, "grad_norm": 1.1395358663076178, "learning_rate": 0.003, "loss": 4.0747, "step": 9783 }, { "epoch": 0.09784, "grad_norm": 1.1019088831778014, "learning_rate": 0.003, "loss": 4.0549, "step": 9784 }, { "epoch": 0.09785, "grad_norm": 1.169417153963683, "learning_rate": 0.003, "loss": 4.094, "step": 9785 }, { "epoch": 0.09786, "grad_norm": 1.0034794245848353, "learning_rate": 0.003, "loss": 4.0837, "step": 9786 }, { "epoch": 0.09787, "grad_norm": 1.0284399680460898, "learning_rate": 0.003, "loss": 4.0807, "step": 9787 }, { "epoch": 0.09788, "grad_norm": 0.9057070287775122, "learning_rate": 0.003, "loss": 4.1, "step": 9788 }, { "epoch": 0.09789, "grad_norm": 0.8560028986393307, "learning_rate": 0.003, "loss": 4.0684, "step": 9789 }, { "epoch": 0.0979, "grad_norm": 0.9296582713729994, "learning_rate": 0.003, "loss": 4.0996, "step": 9790 }, { "epoch": 0.09791, "grad_norm": 1.1827366857116655, "learning_rate": 0.003, "loss": 4.1113, "step": 9791 }, { "epoch": 0.09792, "grad_norm": 1.0204010831990022, "learning_rate": 0.003, "loss": 4.0785, "step": 9792 }, { "epoch": 0.09793, "grad_norm": 0.9153519628354887, "learning_rate": 0.003, "loss": 4.0859, "step": 9793 }, { "epoch": 0.09794, "grad_norm": 0.9068739502438202, "learning_rate": 0.003, "loss": 4.1016, "step": 9794 }, { "epoch": 0.09795, "grad_norm": 0.9496301652018498, "learning_rate": 0.003, "loss": 4.1131, "step": 9795 }, { "epoch": 0.09796, "grad_norm": 0.9958118680271525, "learning_rate": 0.003, "loss": 4.0896, "step": 9796 }, { "epoch": 0.09797, "grad_norm": 0.9557782092056588, "learning_rate": 0.003, "loss": 4.094, "step": 9797 }, { "epoch": 0.09798, "grad_norm": 0.8533142469787229, "learning_rate": 0.003, "loss": 4.0878, "step": 9798 }, { "epoch": 0.09799, "grad_norm": 0.8190713601356191, "learning_rate": 0.003, "loss": 4.12, "step": 9799 }, { "epoch": 0.098, "grad_norm": 0.9429777135714619, "learning_rate": 0.003, "loss": 4.0969, "step": 9800 }, { "epoch": 0.09801, "grad_norm": 1.0012945094084325, "learning_rate": 0.003, "loss": 4.1072, "step": 9801 }, { "epoch": 0.09802, "grad_norm": 0.9521250021764239, "learning_rate": 0.003, "loss": 4.0886, "step": 9802 }, { "epoch": 0.09803, "grad_norm": 1.2251473492074867, "learning_rate": 0.003, "loss": 4.0867, "step": 9803 }, { "epoch": 0.09804, "grad_norm": 0.9134016069934916, "learning_rate": 0.003, "loss": 4.0863, "step": 9804 }, { "epoch": 0.09805, "grad_norm": 0.8286943653648622, "learning_rate": 0.003, "loss": 4.0772, "step": 9805 }, { "epoch": 0.09806, "grad_norm": 0.7519693958000754, "learning_rate": 0.003, "loss": 4.069, "step": 9806 }, { "epoch": 0.09807, "grad_norm": 0.7722973199567947, "learning_rate": 0.003, "loss": 4.0838, "step": 9807 }, { "epoch": 0.09808, "grad_norm": 0.74781146199721, "learning_rate": 0.003, "loss": 4.1142, "step": 9808 }, { "epoch": 0.09809, "grad_norm": 0.7364400647725448, "learning_rate": 0.003, "loss": 4.0822, "step": 9809 }, { "epoch": 0.0981, "grad_norm": 0.7200212527489623, "learning_rate": 0.003, "loss": 4.1092, "step": 9810 }, { "epoch": 0.09811, "grad_norm": 0.7544356048465244, "learning_rate": 0.003, "loss": 4.1094, "step": 9811 }, { "epoch": 0.09812, "grad_norm": 0.7686580462245453, "learning_rate": 0.003, "loss": 4.0786, "step": 9812 }, { "epoch": 0.09813, "grad_norm": 0.984346883906706, "learning_rate": 0.003, "loss": 4.0965, "step": 9813 }, { "epoch": 0.09814, "grad_norm": 1.2700083355049891, "learning_rate": 0.003, "loss": 4.0855, "step": 9814 }, { "epoch": 0.09815, "grad_norm": 0.9442514607634342, "learning_rate": 0.003, "loss": 4.0501, "step": 9815 }, { "epoch": 0.09816, "grad_norm": 1.0077025937052435, "learning_rate": 0.003, "loss": 4.0612, "step": 9816 }, { "epoch": 0.09817, "grad_norm": 0.971550909174614, "learning_rate": 0.003, "loss": 4.0799, "step": 9817 }, { "epoch": 0.09818, "grad_norm": 1.0193923220149106, "learning_rate": 0.003, "loss": 4.1056, "step": 9818 }, { "epoch": 0.09819, "grad_norm": 1.3161986032712076, "learning_rate": 0.003, "loss": 4.1133, "step": 9819 }, { "epoch": 0.0982, "grad_norm": 0.8557332761380659, "learning_rate": 0.003, "loss": 4.0889, "step": 9820 }, { "epoch": 0.09821, "grad_norm": 0.7233644886267359, "learning_rate": 0.003, "loss": 4.0773, "step": 9821 }, { "epoch": 0.09822, "grad_norm": 0.8326436608697041, "learning_rate": 0.003, "loss": 4.1006, "step": 9822 }, { "epoch": 0.09823, "grad_norm": 0.8980854204875377, "learning_rate": 0.003, "loss": 4.1099, "step": 9823 }, { "epoch": 0.09824, "grad_norm": 1.2283773516871788, "learning_rate": 0.003, "loss": 4.105, "step": 9824 }, { "epoch": 0.09825, "grad_norm": 0.7785325214207814, "learning_rate": 0.003, "loss": 4.0837, "step": 9825 }, { "epoch": 0.09826, "grad_norm": 0.597043372236362, "learning_rate": 0.003, "loss": 4.0634, "step": 9826 }, { "epoch": 0.09827, "grad_norm": 0.6464758597311613, "learning_rate": 0.003, "loss": 4.0684, "step": 9827 }, { "epoch": 0.09828, "grad_norm": 0.8139034532487643, "learning_rate": 0.003, "loss": 4.0877, "step": 9828 }, { "epoch": 0.09829, "grad_norm": 1.244278860848288, "learning_rate": 0.003, "loss": 4.0872, "step": 9829 }, { "epoch": 0.0983, "grad_norm": 0.856108216172038, "learning_rate": 0.003, "loss": 4.0863, "step": 9830 }, { "epoch": 0.09831, "grad_norm": 0.6433844409257128, "learning_rate": 0.003, "loss": 4.0957, "step": 9831 }, { "epoch": 0.09832, "grad_norm": 0.7326548261726062, "learning_rate": 0.003, "loss": 4.0585, "step": 9832 }, { "epoch": 0.09833, "grad_norm": 0.7997173970955105, "learning_rate": 0.003, "loss": 4.1088, "step": 9833 }, { "epoch": 0.09834, "grad_norm": 0.9405671127384347, "learning_rate": 0.003, "loss": 4.0984, "step": 9834 }, { "epoch": 0.09835, "grad_norm": 1.131659672629919, "learning_rate": 0.003, "loss": 4.0995, "step": 9835 }, { "epoch": 0.09836, "grad_norm": 0.997538927089926, "learning_rate": 0.003, "loss": 4.0604, "step": 9836 }, { "epoch": 0.09837, "grad_norm": 1.0175411430188779, "learning_rate": 0.003, "loss": 4.0584, "step": 9837 }, { "epoch": 0.09838, "grad_norm": 0.9799166460402474, "learning_rate": 0.003, "loss": 4.085, "step": 9838 }, { "epoch": 0.09839, "grad_norm": 1.0396590201522609, "learning_rate": 0.003, "loss": 4.1259, "step": 9839 }, { "epoch": 0.0984, "grad_norm": 1.1282086069881085, "learning_rate": 0.003, "loss": 4.0965, "step": 9840 }, { "epoch": 0.09841, "grad_norm": 0.8198231903899644, "learning_rate": 0.003, "loss": 4.1053, "step": 9841 }, { "epoch": 0.09842, "grad_norm": 0.9217921401090745, "learning_rate": 0.003, "loss": 4.0831, "step": 9842 }, { "epoch": 0.09843, "grad_norm": 1.0197186165919299, "learning_rate": 0.003, "loss": 4.0887, "step": 9843 }, { "epoch": 0.09844, "grad_norm": 1.2101350812829232, "learning_rate": 0.003, "loss": 4.0737, "step": 9844 }, { "epoch": 0.09845, "grad_norm": 0.8651149977324143, "learning_rate": 0.003, "loss": 4.0875, "step": 9845 }, { "epoch": 0.09846, "grad_norm": 0.7265952518823648, "learning_rate": 0.003, "loss": 4.0697, "step": 9846 }, { "epoch": 0.09847, "grad_norm": 0.6887147254332203, "learning_rate": 0.003, "loss": 4.0926, "step": 9847 }, { "epoch": 0.09848, "grad_norm": 0.7507853589418222, "learning_rate": 0.003, "loss": 4.0704, "step": 9848 }, { "epoch": 0.09849, "grad_norm": 1.0036628039255613, "learning_rate": 0.003, "loss": 4.0831, "step": 9849 }, { "epoch": 0.0985, "grad_norm": 1.2748454320535545, "learning_rate": 0.003, "loss": 4.1194, "step": 9850 }, { "epoch": 0.09851, "grad_norm": 0.8314804970803586, "learning_rate": 0.003, "loss": 4.1185, "step": 9851 }, { "epoch": 0.09852, "grad_norm": 0.817675377067337, "learning_rate": 0.003, "loss": 4.0897, "step": 9852 }, { "epoch": 0.09853, "grad_norm": 0.7841091435659291, "learning_rate": 0.003, "loss": 4.0832, "step": 9853 }, { "epoch": 0.09854, "grad_norm": 0.6372546033409912, "learning_rate": 0.003, "loss": 4.0618, "step": 9854 }, { "epoch": 0.09855, "grad_norm": 0.679618174317228, "learning_rate": 0.003, "loss": 4.0498, "step": 9855 }, { "epoch": 0.09856, "grad_norm": 0.7500362277289165, "learning_rate": 0.003, "loss": 4.11, "step": 9856 }, { "epoch": 0.09857, "grad_norm": 0.8068873392409563, "learning_rate": 0.003, "loss": 4.0809, "step": 9857 }, { "epoch": 0.09858, "grad_norm": 0.7652166764948078, "learning_rate": 0.003, "loss": 4.0752, "step": 9858 }, { "epoch": 0.09859, "grad_norm": 0.8218029235936964, "learning_rate": 0.003, "loss": 4.1203, "step": 9859 }, { "epoch": 0.0986, "grad_norm": 0.843394170131785, "learning_rate": 0.003, "loss": 4.0777, "step": 9860 }, { "epoch": 0.09861, "grad_norm": 0.9379505462471198, "learning_rate": 0.003, "loss": 4.0881, "step": 9861 }, { "epoch": 0.09862, "grad_norm": 1.32072231867818, "learning_rate": 0.003, "loss": 4.112, "step": 9862 }, { "epoch": 0.09863, "grad_norm": 1.0523750210753542, "learning_rate": 0.003, "loss": 4.1123, "step": 9863 }, { "epoch": 0.09864, "grad_norm": 0.9406173426354154, "learning_rate": 0.003, "loss": 4.1149, "step": 9864 }, { "epoch": 0.09865, "grad_norm": 0.8931432490745203, "learning_rate": 0.003, "loss": 4.093, "step": 9865 }, { "epoch": 0.09866, "grad_norm": 0.9699347096958343, "learning_rate": 0.003, "loss": 4.0825, "step": 9866 }, { "epoch": 0.09867, "grad_norm": 1.046711651416207, "learning_rate": 0.003, "loss": 4.0697, "step": 9867 }, { "epoch": 0.09868, "grad_norm": 1.0808042490594871, "learning_rate": 0.003, "loss": 4.0806, "step": 9868 }, { "epoch": 0.09869, "grad_norm": 1.1961822678848502, "learning_rate": 0.003, "loss": 4.0827, "step": 9869 }, { "epoch": 0.0987, "grad_norm": 0.9228079748238792, "learning_rate": 0.003, "loss": 4.0717, "step": 9870 }, { "epoch": 0.09871, "grad_norm": 0.8545983336178743, "learning_rate": 0.003, "loss": 4.1474, "step": 9871 }, { "epoch": 0.09872, "grad_norm": 0.8784832785791439, "learning_rate": 0.003, "loss": 4.1026, "step": 9872 }, { "epoch": 0.09873, "grad_norm": 1.070280498696221, "learning_rate": 0.003, "loss": 4.0946, "step": 9873 }, { "epoch": 0.09874, "grad_norm": 1.0439215572833065, "learning_rate": 0.003, "loss": 4.1139, "step": 9874 }, { "epoch": 0.09875, "grad_norm": 1.0814551372745504, "learning_rate": 0.003, "loss": 4.0915, "step": 9875 }, { "epoch": 0.09876, "grad_norm": 0.8511905866205883, "learning_rate": 0.003, "loss": 4.0915, "step": 9876 }, { "epoch": 0.09877, "grad_norm": 0.811275306253899, "learning_rate": 0.003, "loss": 4.0817, "step": 9877 }, { "epoch": 0.09878, "grad_norm": 1.249209967465114, "learning_rate": 0.003, "loss": 4.0752, "step": 9878 }, { "epoch": 0.09879, "grad_norm": 0.9304246999674388, "learning_rate": 0.003, "loss": 4.0764, "step": 9879 }, { "epoch": 0.0988, "grad_norm": 0.9414369214716679, "learning_rate": 0.003, "loss": 4.069, "step": 9880 }, { "epoch": 0.09881, "grad_norm": 1.0878622028222777, "learning_rate": 0.003, "loss": 4.0834, "step": 9881 }, { "epoch": 0.09882, "grad_norm": 0.9240651030772346, "learning_rate": 0.003, "loss": 4.1051, "step": 9882 }, { "epoch": 0.09883, "grad_norm": 0.7991058410005727, "learning_rate": 0.003, "loss": 4.0838, "step": 9883 }, { "epoch": 0.09884, "grad_norm": 0.9050441156700046, "learning_rate": 0.003, "loss": 4.0734, "step": 9884 }, { "epoch": 0.09885, "grad_norm": 1.1399203818579307, "learning_rate": 0.003, "loss": 4.0881, "step": 9885 }, { "epoch": 0.09886, "grad_norm": 0.8766884786296881, "learning_rate": 0.003, "loss": 4.0735, "step": 9886 }, { "epoch": 0.09887, "grad_norm": 0.801700624949135, "learning_rate": 0.003, "loss": 4.1047, "step": 9887 }, { "epoch": 0.09888, "grad_norm": 0.8628961038287333, "learning_rate": 0.003, "loss": 4.1255, "step": 9888 }, { "epoch": 0.09889, "grad_norm": 0.7467079097142902, "learning_rate": 0.003, "loss": 4.071, "step": 9889 }, { "epoch": 0.0989, "grad_norm": 0.7726770182387934, "learning_rate": 0.003, "loss": 4.0924, "step": 9890 }, { "epoch": 0.09891, "grad_norm": 0.9462651212194271, "learning_rate": 0.003, "loss": 4.1005, "step": 9891 }, { "epoch": 0.09892, "grad_norm": 1.139679261250144, "learning_rate": 0.003, "loss": 4.0649, "step": 9892 }, { "epoch": 0.09893, "grad_norm": 1.0951119397642, "learning_rate": 0.003, "loss": 4.0947, "step": 9893 }, { "epoch": 0.09894, "grad_norm": 0.905062924614346, "learning_rate": 0.003, "loss": 4.1049, "step": 9894 }, { "epoch": 0.09895, "grad_norm": 0.7933902578399411, "learning_rate": 0.003, "loss": 4.1002, "step": 9895 }, { "epoch": 0.09896, "grad_norm": 0.704722792642583, "learning_rate": 0.003, "loss": 4.0674, "step": 9896 }, { "epoch": 0.09897, "grad_norm": 0.6940718562411224, "learning_rate": 0.003, "loss": 4.0802, "step": 9897 }, { "epoch": 0.09898, "grad_norm": 0.8845748690268396, "learning_rate": 0.003, "loss": 4.0794, "step": 9898 }, { "epoch": 0.09899, "grad_norm": 1.0988377109535674, "learning_rate": 0.003, "loss": 4.0833, "step": 9899 }, { "epoch": 0.099, "grad_norm": 1.1105673173122586, "learning_rate": 0.003, "loss": 4.1227, "step": 9900 }, { "epoch": 0.09901, "grad_norm": 0.9810554294045679, "learning_rate": 0.003, "loss": 4.1031, "step": 9901 }, { "epoch": 0.09902, "grad_norm": 1.088761843451906, "learning_rate": 0.003, "loss": 4.1059, "step": 9902 }, { "epoch": 0.09903, "grad_norm": 0.8865852858603028, "learning_rate": 0.003, "loss": 4.0866, "step": 9903 }, { "epoch": 0.09904, "grad_norm": 1.043616089079261, "learning_rate": 0.003, "loss": 4.0747, "step": 9904 }, { "epoch": 0.09905, "grad_norm": 1.068953951671368, "learning_rate": 0.003, "loss": 4.0947, "step": 9905 }, { "epoch": 0.09906, "grad_norm": 1.044686025986228, "learning_rate": 0.003, "loss": 4.1099, "step": 9906 }, { "epoch": 0.09907, "grad_norm": 0.8886077091593347, "learning_rate": 0.003, "loss": 4.0591, "step": 9907 }, { "epoch": 0.09908, "grad_norm": 0.8555950937756621, "learning_rate": 0.003, "loss": 4.1026, "step": 9908 }, { "epoch": 0.09909, "grad_norm": 0.7817570855188947, "learning_rate": 0.003, "loss": 4.0984, "step": 9909 }, { "epoch": 0.0991, "grad_norm": 0.8362250403473259, "learning_rate": 0.003, "loss": 4.0942, "step": 9910 }, { "epoch": 0.09911, "grad_norm": 0.9500328133313455, "learning_rate": 0.003, "loss": 4.1008, "step": 9911 }, { "epoch": 0.09912, "grad_norm": 1.031805969191571, "learning_rate": 0.003, "loss": 4.0875, "step": 9912 }, { "epoch": 0.09913, "grad_norm": 1.0138965400557471, "learning_rate": 0.003, "loss": 4.1154, "step": 9913 }, { "epoch": 0.09914, "grad_norm": 0.9678001875339941, "learning_rate": 0.003, "loss": 4.0724, "step": 9914 }, { "epoch": 0.09915, "grad_norm": 0.9475987221695131, "learning_rate": 0.003, "loss": 4.0832, "step": 9915 }, { "epoch": 0.09916, "grad_norm": 0.9451880390051262, "learning_rate": 0.003, "loss": 4.1057, "step": 9916 }, { "epoch": 0.09917, "grad_norm": 0.9072128377197956, "learning_rate": 0.003, "loss": 4.108, "step": 9917 }, { "epoch": 0.09918, "grad_norm": 0.9058874365902825, "learning_rate": 0.003, "loss": 4.0683, "step": 9918 }, { "epoch": 0.09919, "grad_norm": 0.9988923576329186, "learning_rate": 0.003, "loss": 4.122, "step": 9919 }, { "epoch": 0.0992, "grad_norm": 0.972034979186135, "learning_rate": 0.003, "loss": 4.1062, "step": 9920 }, { "epoch": 0.09921, "grad_norm": 0.9407505238435098, "learning_rate": 0.003, "loss": 4.0559, "step": 9921 }, { "epoch": 0.09922, "grad_norm": 1.0709663672999241, "learning_rate": 0.003, "loss": 4.1022, "step": 9922 }, { "epoch": 0.09923, "grad_norm": 1.011390937859059, "learning_rate": 0.003, "loss": 4.1031, "step": 9923 }, { "epoch": 0.09924, "grad_norm": 1.0734808599050338, "learning_rate": 0.003, "loss": 4.0633, "step": 9924 }, { "epoch": 0.09925, "grad_norm": 0.9724775683011624, "learning_rate": 0.003, "loss": 4.0693, "step": 9925 }, { "epoch": 0.09926, "grad_norm": 0.9858754523172633, "learning_rate": 0.003, "loss": 4.097, "step": 9926 }, { "epoch": 0.09927, "grad_norm": 1.0026350425415425, "learning_rate": 0.003, "loss": 4.1296, "step": 9927 }, { "epoch": 0.09928, "grad_norm": 0.9618825289266347, "learning_rate": 0.003, "loss": 4.085, "step": 9928 }, { "epoch": 0.09929, "grad_norm": 0.8720935243540162, "learning_rate": 0.003, "loss": 4.0939, "step": 9929 }, { "epoch": 0.0993, "grad_norm": 0.8729170421206854, "learning_rate": 0.003, "loss": 4.0792, "step": 9930 }, { "epoch": 0.09931, "grad_norm": 0.9387248454992504, "learning_rate": 0.003, "loss": 4.0886, "step": 9931 }, { "epoch": 0.09932, "grad_norm": 1.0565356457114414, "learning_rate": 0.003, "loss": 4.0997, "step": 9932 }, { "epoch": 0.09933, "grad_norm": 0.8625813163715004, "learning_rate": 0.003, "loss": 4.0926, "step": 9933 }, { "epoch": 0.09934, "grad_norm": 0.7002720895141619, "learning_rate": 0.003, "loss": 4.0966, "step": 9934 }, { "epoch": 0.09935, "grad_norm": 0.7613232133923477, "learning_rate": 0.003, "loss": 4.0723, "step": 9935 }, { "epoch": 0.09936, "grad_norm": 1.0484981670115654, "learning_rate": 0.003, "loss": 4.0926, "step": 9936 }, { "epoch": 0.09937, "grad_norm": 1.2541580708606934, "learning_rate": 0.003, "loss": 4.064, "step": 9937 }, { "epoch": 0.09938, "grad_norm": 0.7679423053752109, "learning_rate": 0.003, "loss": 4.0958, "step": 9938 }, { "epoch": 0.09939, "grad_norm": 0.7623951555699858, "learning_rate": 0.003, "loss": 4.0961, "step": 9939 }, { "epoch": 0.0994, "grad_norm": 0.743467827926508, "learning_rate": 0.003, "loss": 4.1149, "step": 9940 }, { "epoch": 0.09941, "grad_norm": 0.8158817381706474, "learning_rate": 0.003, "loss": 4.0846, "step": 9941 }, { "epoch": 0.09942, "grad_norm": 0.9477928068113599, "learning_rate": 0.003, "loss": 4.087, "step": 9942 }, { "epoch": 0.09943, "grad_norm": 0.8989443009796476, "learning_rate": 0.003, "loss": 4.0361, "step": 9943 }, { "epoch": 0.09944, "grad_norm": 0.8465160184457518, "learning_rate": 0.003, "loss": 4.1195, "step": 9944 }, { "epoch": 0.09945, "grad_norm": 0.96077143421336, "learning_rate": 0.003, "loss": 4.0574, "step": 9945 }, { "epoch": 0.09946, "grad_norm": 0.982634646562586, "learning_rate": 0.003, "loss": 4.1104, "step": 9946 }, { "epoch": 0.09947, "grad_norm": 0.9829649395908524, "learning_rate": 0.003, "loss": 4.1031, "step": 9947 }, { "epoch": 0.09948, "grad_norm": 1.0904657767502883, "learning_rate": 0.003, "loss": 4.1043, "step": 9948 }, { "epoch": 0.09949, "grad_norm": 0.958754249548164, "learning_rate": 0.003, "loss": 4.1023, "step": 9949 }, { "epoch": 0.0995, "grad_norm": 0.941102523885678, "learning_rate": 0.003, "loss": 4.0933, "step": 9950 }, { "epoch": 0.09951, "grad_norm": 0.9547561697534392, "learning_rate": 0.003, "loss": 4.0716, "step": 9951 }, { "epoch": 0.09952, "grad_norm": 1.0369210267712394, "learning_rate": 0.003, "loss": 4.0678, "step": 9952 }, { "epoch": 0.09953, "grad_norm": 1.1666110255591098, "learning_rate": 0.003, "loss": 4.118, "step": 9953 }, { "epoch": 0.09954, "grad_norm": 0.8404966208187018, "learning_rate": 0.003, "loss": 4.0673, "step": 9954 }, { "epoch": 0.09955, "grad_norm": 0.6348854161168744, "learning_rate": 0.003, "loss": 4.0669, "step": 9955 }, { "epoch": 0.09956, "grad_norm": 0.6821965250062815, "learning_rate": 0.003, "loss": 4.0788, "step": 9956 }, { "epoch": 0.09957, "grad_norm": 0.6331997868974779, "learning_rate": 0.003, "loss": 4.086, "step": 9957 }, { "epoch": 0.09958, "grad_norm": 0.777484394684235, "learning_rate": 0.003, "loss": 4.1028, "step": 9958 }, { "epoch": 0.09959, "grad_norm": 1.0101723616966292, "learning_rate": 0.003, "loss": 4.1088, "step": 9959 }, { "epoch": 0.0996, "grad_norm": 1.2396789474457461, "learning_rate": 0.003, "loss": 4.1022, "step": 9960 }, { "epoch": 0.09961, "grad_norm": 0.883683117211385, "learning_rate": 0.003, "loss": 4.0983, "step": 9961 }, { "epoch": 0.09962, "grad_norm": 0.9018658759893716, "learning_rate": 0.003, "loss": 4.1046, "step": 9962 }, { "epoch": 0.09963, "grad_norm": 1.0876146413308398, "learning_rate": 0.003, "loss": 4.1091, "step": 9963 }, { "epoch": 0.09964, "grad_norm": 1.0287603306521411, "learning_rate": 0.003, "loss": 4.0756, "step": 9964 }, { "epoch": 0.09965, "grad_norm": 1.0182673045567394, "learning_rate": 0.003, "loss": 4.0825, "step": 9965 }, { "epoch": 0.09966, "grad_norm": 1.1527631254244322, "learning_rate": 0.003, "loss": 4.1112, "step": 9966 }, { "epoch": 0.09967, "grad_norm": 0.8709975266827947, "learning_rate": 0.003, "loss": 4.0987, "step": 9967 }, { "epoch": 0.09968, "grad_norm": 0.8331778651716775, "learning_rate": 0.003, "loss": 4.0875, "step": 9968 }, { "epoch": 0.09969, "grad_norm": 0.9011465361487814, "learning_rate": 0.003, "loss": 4.0948, "step": 9969 }, { "epoch": 0.0997, "grad_norm": 1.0080570307471883, "learning_rate": 0.003, "loss": 4.086, "step": 9970 }, { "epoch": 0.09971, "grad_norm": 1.1140173135877225, "learning_rate": 0.003, "loss": 4.0976, "step": 9971 }, { "epoch": 0.09972, "grad_norm": 0.9393638818310338, "learning_rate": 0.003, "loss": 4.0819, "step": 9972 }, { "epoch": 0.09973, "grad_norm": 1.0649082720147478, "learning_rate": 0.003, "loss": 4.0968, "step": 9973 }, { "epoch": 0.09974, "grad_norm": 1.130092450772193, "learning_rate": 0.003, "loss": 4.0932, "step": 9974 }, { "epoch": 0.09975, "grad_norm": 0.88585944847689, "learning_rate": 0.003, "loss": 4.099, "step": 9975 }, { "epoch": 0.09976, "grad_norm": 0.857801840723658, "learning_rate": 0.003, "loss": 4.0739, "step": 9976 }, { "epoch": 0.09977, "grad_norm": 0.8296564805689629, "learning_rate": 0.003, "loss": 4.0656, "step": 9977 }, { "epoch": 0.09978, "grad_norm": 0.8072297066940064, "learning_rate": 0.003, "loss": 4.0714, "step": 9978 }, { "epoch": 0.09979, "grad_norm": 0.8227232337051928, "learning_rate": 0.003, "loss": 4.09, "step": 9979 }, { "epoch": 0.0998, "grad_norm": 0.8476832328670773, "learning_rate": 0.003, "loss": 4.0842, "step": 9980 }, { "epoch": 0.09981, "grad_norm": 0.9357206908798009, "learning_rate": 0.003, "loss": 4.0903, "step": 9981 }, { "epoch": 0.09982, "grad_norm": 1.143225588531109, "learning_rate": 0.003, "loss": 4.099, "step": 9982 }, { "epoch": 0.09983, "grad_norm": 0.8237550880806321, "learning_rate": 0.003, "loss": 4.0823, "step": 9983 }, { "epoch": 0.09984, "grad_norm": 0.7811421447324085, "learning_rate": 0.003, "loss": 4.1046, "step": 9984 }, { "epoch": 0.09985, "grad_norm": 0.8438712038256704, "learning_rate": 0.003, "loss": 4.0998, "step": 9985 }, { "epoch": 0.09986, "grad_norm": 0.8254292728857884, "learning_rate": 0.003, "loss": 4.1023, "step": 9986 }, { "epoch": 0.09987, "grad_norm": 0.8131007944980652, "learning_rate": 0.003, "loss": 4.0731, "step": 9987 }, { "epoch": 0.09988, "grad_norm": 0.8584836605861299, "learning_rate": 0.003, "loss": 4.1098, "step": 9988 }, { "epoch": 0.09989, "grad_norm": 1.089542214562673, "learning_rate": 0.003, "loss": 4.0835, "step": 9989 }, { "epoch": 0.0999, "grad_norm": 1.2008176887832276, "learning_rate": 0.003, "loss": 4.0943, "step": 9990 }, { "epoch": 0.09991, "grad_norm": 0.9071696842404575, "learning_rate": 0.003, "loss": 4.1082, "step": 9991 }, { "epoch": 0.09992, "grad_norm": 0.9778468657106637, "learning_rate": 0.003, "loss": 4.0711, "step": 9992 }, { "epoch": 0.09993, "grad_norm": 1.018266367771181, "learning_rate": 0.003, "loss": 4.095, "step": 9993 }, { "epoch": 0.09994, "grad_norm": 0.9996060334571353, "learning_rate": 0.003, "loss": 4.1086, "step": 9994 }, { "epoch": 0.09995, "grad_norm": 0.8639535717682494, "learning_rate": 0.003, "loss": 4.1042, "step": 9995 }, { "epoch": 0.09996, "grad_norm": 0.752574714119443, "learning_rate": 0.003, "loss": 4.0829, "step": 9996 }, { "epoch": 0.09997, "grad_norm": 0.7765764977536432, "learning_rate": 0.003, "loss": 4.0895, "step": 9997 }, { "epoch": 0.09998, "grad_norm": 0.8178576426181308, "learning_rate": 0.003, "loss": 4.0958, "step": 9998 }, { "epoch": 0.09999, "grad_norm": 1.007703696887012, "learning_rate": 0.003, "loss": 4.0974, "step": 9999 }, { "epoch": 0.1, "grad_norm": 1.2762926674519628, "learning_rate": 0.003, "loss": 4.1068, "step": 10000 } ], "logging_steps": 1, "max_steps": 100000, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.9643642855424e+17, "train_batch_size": 1024, "trial_name": null, "trial_params": null }